lang-build.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. #!/usr/bin/env python3
  2. from collections import defaultdict
  3. import codecs
  4. import argparse
  5. import os
  6. import polib
  7. import struct
  8. import sys
  9. import lib.charset as cs
  10. from lib.io import info, warn, fatal, load_map
  11. FW_MAGIC = 0x4bb45aa5
  12. def translation_ref(translation):
  13. cmt = translation.comment
  14. if cmt and cmt.startswith('MSG_'):
  15. return cmt.split(' ', 1)[0]
  16. else:
  17. return repr(translation.msgid)
  18. def main():
  19. ap = argparse.ArgumentParser()
  20. ap.add_argument('--warn-unused', action='store_true',
  21. help='Warn about unused translations')
  22. ap.add_argument('--show-coalesced', action='store_true',
  23. help='List coalesced translations')
  24. ap.add_argument('map', help='Firmware symbol map file')
  25. ap.add_argument('po', help='PO file')
  26. ap.add_argument('out', help='output')
  27. args = ap.parse_args()
  28. # check arguments
  29. for path in [args.map, args.po]:
  30. if not os.path.isfile(path):
  31. fatal("{} does not exist or is not a regular file".format(args.po))
  32. # load the map file
  33. syms = load_map(args.map)
  34. fw_sig_data = None
  35. msgid_data = defaultdict(list)
  36. id_msgid = {}
  37. sym_cnt = 0
  38. for sym in syms:
  39. if sym['name'] == '_PRI_LANG_SIGNATURE':
  40. fw_sig_data = sym['data']
  41. else:
  42. # redo forward text transformation for transparent matching
  43. msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))
  44. msgid_data[msgid].append(sym)
  45. id_msgid[sym['id']] = msgid
  46. # update the max symbol count
  47. if sym_cnt <= sym['id']:
  48. sym_cnt = sym['id'] + 1
  49. if fw_sig_data is None:
  50. fatal('_PRI_LANG_SIGNATURE not found in map')
  51. # open translations
  52. po = polib.pofile(args.po)
  53. lang_code = po.metadata['Language']
  54. if not lang_code.isascii() or len(lang_code) != 2:
  55. fatal(f'unsupported language code {lang_code}')
  56. # build a catalog of all translations
  57. trans_table = {}
  58. for translation in po:
  59. msgid = translation.msgid
  60. found = msgid in msgid_data
  61. if found:
  62. trans_table[msgid] = (translation, msgid_data[msgid])
  63. elif args.warn_unused:
  64. err = "{}:{}".format(args.po, translation.linenum)
  65. err += ": unused translation "
  66. err += translation_ref(translation)
  67. warn(err)
  68. for msgid, syms in msgid_data.items():
  69. if msgid not in trans_table:
  70. # warn about missing translations
  71. warn("untranslated text: " + repr(msgid))
  72. # write the binary catalog
  73. with open(args.out, "w+b") as fd:
  74. fixed_offset = 16+2*sym_cnt
  75. written_locs = {}
  76. # compute final data tables
  77. offsets = b''
  78. strings = b'\0'
  79. for i in range(sym_cnt):
  80. msgid = id_msgid.get(i)
  81. translation = trans_table.get(msgid)
  82. if translation is None or len(translation[0].msgstr) == 0 or translation[0].msgstr == msgid:
  83. # first slot reserved for untraslated/identical entries
  84. offsets += struct.pack("<H", fixed_offset)
  85. else:
  86. string_bin = cs.unicode_to_source(translation[0].msgstr)
  87. # check for invalid characters
  88. invalid_char = cs.translation_check(string_bin)
  89. if invalid_char is not None:
  90. line = translation[0].linenum
  91. warn(f'{args.po}:{line} contains unhandled character ' + repr(invalid_char))
  92. string_bin = string_bin.encode('raw_unicode_escape', 'ignore')
  93. string_off = written_locs.get(string_bin)
  94. offset = fixed_offset + len(strings)
  95. if string_off is not None:
  96. # coalesce repeated strings
  97. if args.show_coalesced:
  98. info(f'coalescing {offset:04x}:{string_off:04x} {string_bin}')
  99. offset = string_off
  100. else:
  101. # allocate a new string
  102. written_locs[string_bin] = offset
  103. strings += string_bin + b'\0'
  104. offsets += struct.pack("<H", offset)
  105. # header
  106. size = 16 + len(offsets) + len(strings)
  107. header = struct.pack(
  108. "<IHHHHI",
  109. FW_MAGIC,
  110. size,
  111. sym_cnt,
  112. 0, # no checksum yet
  113. (ord(lang_code[0]) << 8) + ord(lang_code[1]),
  114. fw_sig_data)
  115. fd.write(header)
  116. fd.write(offsets)
  117. fd.write(strings)
  118. # calculate and update the checksum
  119. cksum = 0
  120. fd.seek(0)
  121. for i in range(size):
  122. cksum += (ord(fd.read(1)) << (0 if i % 2 else 8))
  123. cksum &= 0xffff
  124. fd.seek(8)
  125. fd.write(struct.pack("<H", cksum))
  126. return 0
  127. if __name__ == '__main__':
  128. exit(main())