lang-build.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. #!/usr/bin/env python3
  2. from collections import defaultdict
  3. import codecs
  4. import argparse
  5. import os
  6. import polib
  7. import struct
  8. import sys
  9. import lib.charset as cs
  10. from lib.io import info, warn, fatal, load_map
  11. FW_MAGIC = 0x4bb45aa5
  12. def translation_ref(translation):
  13. cmt = translation.comment
  14. if cmt and cmt.startswith('MSG_'):
  15. return cmt.split(' ', 1)[0]
  16. else:
  17. return repr(translation.msgid)
  18. def main():
  19. ap = argparse.ArgumentParser()
  20. ap.add_argument('--warn-unused', action='store_true',
  21. help='Warn about unused translations')
  22. ap.add_argument('--show-coalesced', action='store_true',
  23. help='List coalesced translations')
  24. ap.add_argument('map', help='Firmware symbol map file')
  25. ap.add_argument('po', help='PO file')
  26. ap.add_argument('out', help='output')
  27. args = ap.parse_args()
  28. # check arguments
  29. for path in [args.map, args.po]:
  30. if not os.path.isfile(path):
  31. fatal("{} does not exist or is not a regular file".format(args.po))
  32. # load the map file
  33. syms = load_map(args.map)
  34. fw_sig_data = None
  35. msgid_data = defaultdict(list)
  36. id_msgid = {}
  37. sym_cnt = 0
  38. for sym in syms:
  39. if sym['name'] == '_PRI_LANG_SIGNATURE':
  40. fw_sig_data = sym['data']
  41. else:
  42. # redo forward text transformation for transparent matching
  43. msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))
  44. msgid_data[msgid].append(sym)
  45. id_msgid[sym['id']] = msgid
  46. # update the max symbol count
  47. if sym_cnt <= sym['id']:
  48. sym_cnt = sym['id'] + 1
  49. if fw_sig_data is None:
  50. fatal('_PRI_LANG_SIGNATURE not found in map')
  51. # open translations
  52. po = polib.pofile(args.po)
  53. lang_code = po.metadata['Language']
  54. if not lang_code.isascii() or len(lang_code) != 2:
  55. fatal(f'unsupported language code {lang_code}')
  56. # build a catalog of all translations
  57. trans_table = {}
  58. for translation in po:
  59. if translation.obsolete:
  60. continue
  61. msgid = translation.msgid
  62. found = msgid in msgid_data
  63. if found:
  64. trans_table[msgid] = (translation, msgid_data[msgid])
  65. elif args.warn_unused:
  66. err = "{}:{}".format(args.po, translation.linenum)
  67. err += ": unused translation "
  68. err += translation_ref(translation)
  69. warn(err)
  70. for msgid, syms in msgid_data.items():
  71. if msgid not in trans_table:
  72. # warn about missing translations
  73. warn("untranslated text: " + repr(msgid))
  74. # write the binary catalog
  75. with open(args.out, "w+b") as fd:
  76. fixed_offset = 16+2*sym_cnt
  77. written_locs = {}
  78. # compute final data tables
  79. offsets = b''
  80. strings = b'\0'
  81. for i in range(sym_cnt):
  82. msgid = id_msgid.get(i)
  83. translation = trans_table.get(msgid)
  84. if translation is None or len(translation[0].msgstr) == 0 or translation[0].msgstr == msgid:
  85. # first slot reserved for untraslated/identical entries
  86. offsets += struct.pack("<H", fixed_offset)
  87. else:
  88. string_bin = cs.unicode_to_source(translation[0].msgstr)
  89. # check for invalid characters
  90. invalid_char = cs.translation_check(string_bin)
  91. if invalid_char is not None:
  92. line = translation[0].linenum
  93. warn(f'{args.po}:{line} contains unhandled character ' + repr(invalid_char))
  94. string_bin = string_bin.encode('raw_unicode_escape', 'ignore')
  95. string_off = written_locs.get(string_bin)
  96. offset = fixed_offset + len(strings)
  97. if string_off is not None:
  98. # coalesce repeated strings
  99. if args.show_coalesced:
  100. info(f'coalescing {offset:04x}:{string_off:04x} {string_bin}')
  101. offset = string_off
  102. else:
  103. # allocate a new string
  104. written_locs[string_bin] = offset
  105. strings += string_bin + b'\0'
  106. offsets += struct.pack("<H", offset)
  107. # header
  108. size = 16 + len(offsets) + len(strings)
  109. header = struct.pack(
  110. "<IHHHHI",
  111. FW_MAGIC,
  112. size,
  113. sym_cnt,
  114. 0, # no checksum yet
  115. (ord(lang_code[0]) << 8) + ord(lang_code[1]),
  116. fw_sig_data)
  117. fd.write(header)
  118. fd.write(offsets)
  119. fd.write(strings)
  120. # calculate and update the checksum
  121. cksum = 0
  122. fd.seek(0)
  123. for i in range(size):
  124. cksum += (ord(fd.read(1)) << (0 if i % 2 else 8))
  125. cksum &= 0xffff
  126. fd.seek(8)
  127. fd.write(struct.pack("<H", cksum))
  128. return 0
  129. if __name__ == '__main__':
  130. exit(main())