| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 | #!/usr/bin/env python3from collections import defaultdictimport codecsimport argparseimport osimport polibimport structimport sysimport lib.charset as csfrom lib.io import info, warn, fatal, load_mapFW_MAGIC = 0x4bb45aa5def translation_ref(translation):    cmt = translation.comment    if cmt and cmt.startswith('MSG_'):        return cmt.split(' ', 1)[0]    else:        return repr(translation.msgid)def main():    ap = argparse.ArgumentParser()    ap.add_argument('--warn-unused', action='store_true',                    help='Warn about unused translations')    ap.add_argument('--show-coalesced', action='store_true',                    help='List coalesced translations')    ap.add_argument('map', help='Firmware symbol map file')    ap.add_argument('po', help='PO file')    ap.add_argument('out', help='output')    args = ap.parse_args()    # check arguments    for path in [args.map, args.po]:        if not os.path.isfile(path):            fatal("{} does not exist or is not a regular file".format(args.po))    # load the map file    syms = load_map(args.map)    fw_sig_data = None    msgid_data = defaultdict(list)    id_msgid = {}    sym_cnt = 0    for sym in syms:        if sym['name'] == '_PRI_LANG_SIGNATURE':            fw_sig_data = sym['data']        else:            # redo forward text transformation for transparent matching            msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))            msgid_data[msgid].append(sym)            id_msgid[sym['id']] = msgid            # update the max symbol count            if sym_cnt <= sym['id']:                sym_cnt = sym['id'] + 1    if fw_sig_data is None:        fatal('_PRI_LANG_SIGNATURE not found in map')    # open translations    po = polib.pofile(args.po)    lang_code = po.metadata['Language']    if not lang_code.isascii() or len(lang_code) != 2:        fatal(f'unsupported language code {lang_code}')    # build a catalog of all translations    trans_table = {}    for translation in po:        msgid = translation.msgid        found = msgid in msgid_data        if found:            trans_table[msgid] = (translation, msgid_data[msgid])        elif args.warn_unused:            err = "{}:{}".format(args.po, translation.linenum)            err += ": unused translation "            err += translation_ref(translation)            warn(err)    for msgid, syms in msgid_data.items():        if msgid not in trans_table:            # warn about missing translations            warn("untranslated text: " + repr(msgid))    # write the binary catalog    with open(args.out, "w+b") as fd:        fixed_offset = 16+2*sym_cnt        written_locs = {}        # compute final data tables        offsets = b''        strings = b'\0'        for i in range(sym_cnt):            msgid = id_msgid.get(i)            translation = trans_table.get(msgid)            if translation is None or len(translation[0].msgstr) == 0 or translation[0].msgstr == msgid:                # first slot reserved for untraslated/identical entries                offsets += struct.pack("<H", fixed_offset)            else:                string_bin = cs.unicode_to_source(translation[0].msgstr)                # check for invalid characters                invalid_char = cs.translation_check(string_bin)                if invalid_char is not None:                    line = translation[0].linenum                    warn(f'{args.po}:{line} contains unhandled character ' + repr(invalid_char))                string_bin = string_bin.encode('raw_unicode_escape', 'ignore')                string_off = written_locs.get(string_bin)                offset = fixed_offset + len(strings)                if string_off is not None:                    # coalesce repeated strings                    if args.show_coalesced:                        info(f'coalescing {offset:04x}:{string_off:04x} {string_bin}')                    offset = string_off                else:                    # allocate a new string                    written_locs[string_bin] = offset                    strings += string_bin + b'\0'                offsets += struct.pack("<H", offset)        # header        size = 16 + len(offsets) + len(strings)        header = struct.pack(            "<IHHHHI",            FW_MAGIC,            size,            sym_cnt,            0, # no checksum yet            (ord(lang_code[0]) << 8) + ord(lang_code[1]),            fw_sig_data)        fd.write(header)        fd.write(offsets)        fd.write(strings)        # calculate and update the checksum        cksum = 0        fd.seek(0)        for i in range(size):            cksum += (ord(fd.read(1)) << (0 if i % 2 else 8))            cksum &= 0xffff        fd.seek(8)        fd.write(struct.pack("<H", cksum))    return 0if __name__ == '__main__':    exit(main())
 |