123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- #!/usr/bin/env python3
- from collections import defaultdict
- import codecs
- import argparse
- import os
- import polib
- import struct
- import sys
- import lib.charset as cs
- from lib.io import info, warn, fatal, load_map
- FW_MAGIC = 0x4bb45aa5
- def translation_ref(translation):
- cmt = translation.comment
- if cmt and cmt.startswith('MSG_'):
- return cmt.split(' ', 1)[0]
- else:
- return repr(translation.msgid)
- def main():
- ap = argparse.ArgumentParser()
- ap.add_argument('--warn-unused', action='store_true',
- help='Warn about unused translations')
- ap.add_argument('--show-coalesced', action='store_true',
- help='List coalesced translations')
- ap.add_argument('map', help='Firmware symbol map file')
- ap.add_argument('po', help='PO file')
- ap.add_argument('out', help='output')
- args = ap.parse_args()
- # check arguments
- for path in [args.map, args.po]:
- if not os.path.isfile(path):
- fatal("{} does not exist or is not a regular file".format(args.po))
- # load the map file
- syms = load_map(args.map)
- fw_sig_data = None
- msgid_data = defaultdict(list)
- id_msgid = {}
- sym_cnt = 0
- for sym in syms:
- if sym['name'] == '_PRI_LANG_SIGNATURE':
- fw_sig_data = sym['data']
- else:
- # redo forward text transformation for transparent matching
- msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))
- msgid_data[msgid].append(sym)
- id_msgid[sym['id']] = msgid
- # update the max symbol count
- if sym_cnt <= sym['id']:
- sym_cnt = sym['id'] + 1
- if fw_sig_data is None:
- fatal('_PRI_LANG_SIGNATURE not found in map')
- # open translations
- po = polib.pofile(args.po)
- lang_code = po.metadata['Language']
- if not lang_code.isascii() or len(lang_code) != 2:
- fatal(f'unsupported language code {lang_code}')
- # build a catalog of all translations
- trans_table = {}
- for translation in po:
- if translation.obsolete:
- continue
- msgid = translation.msgid
- found = msgid in msgid_data
- if found:
- trans_table[msgid] = (translation, msgid_data[msgid])
- elif args.warn_unused:
- err = "{}:{}".format(args.po, translation.linenum)
- err += ": unused translation "
- err += translation_ref(translation)
- warn(err)
- for msgid, syms in msgid_data.items():
- if msgid not in trans_table:
- # warn about missing translations
- warn("untranslated text: " + repr(msgid))
- # write the binary catalog
- with open(args.out, "w+b") as fd:
- fixed_offset = 16+2*sym_cnt
- written_locs = {}
- # compute final data tables
- offsets = b''
- strings = b'\0'
- for i in range(sym_cnt):
- msgid = id_msgid.get(i)
- translation = trans_table.get(msgid)
- if translation is None or len(translation[0].msgstr) == 0 or translation[0].msgstr == msgid:
- # first slot reserved for untraslated/identical entries
- offsets += struct.pack("<H", fixed_offset)
- else:
- string_bin = cs.unicode_to_source(translation[0].msgstr)
- # check for invalid characters
- invalid_char = cs.translation_check(string_bin)
- if invalid_char is not None:
- line = translation[0].linenum
- warn(f'{args.po}:{line} contains unhandled character ' + repr(invalid_char))
- string_bin = string_bin.encode('raw_unicode_escape', 'ignore')
- string_off = written_locs.get(string_bin)
- offset = fixed_offset + len(strings)
- if string_off is not None:
- # coalesce repeated strings
- if args.show_coalesced:
- info(f'coalescing {offset:04x}:{string_off:04x} {string_bin}')
- offset = string_off
- else:
- # allocate a new string
- written_locs[string_bin] = offset
- strings += string_bin + b'\0'
- offsets += struct.pack("<H", offset)
- # header
- size = 16 + len(offsets) + len(strings)
- header = struct.pack(
- "<IHHHHI",
- FW_MAGIC,
- size,
- sym_cnt,
- 0, # no checksum yet
- (ord(lang_code[0]) << 8) + ord(lang_code[1]),
- fw_sig_data)
- fd.write(header)
- fd.write(offsets)
- fd.write(strings)
- # calculate and update the checksum
- cksum = 0
- fd.seek(0)
- for i in range(size):
- cksum += (ord(fd.read(1)) << (0 if i % 2 else 8))
- cksum &= 0xffff
- fd.seek(8)
- fd.write(struct.pack("<H", cksum))
- return 0
- if __name__ == '__main__':
- exit(main())
|