lang-map.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. #!/usr/bin/env python3
  2. from collections import defaultdict
  3. import argparse
  4. import elftools.elf.elffile
  5. import struct
  6. import sys
  7. import zlib
  8. from lib.io import warn
  9. def warn_sym(name, start, size, msg):
  10. warn(f'{name}[{start:x}+{size:x}]: {msg}')
  11. def get_lang_symbols(elf, symtab):
  12. # fetch language markers
  13. pri_start = symtab.get_symbol_by_name("__loc_pri_start")[0].entry.st_value
  14. pri_end = symtab.get_symbol_by_name("__loc_pri_end")[0].entry.st_value
  15. text_data = elf.get_section_by_name('.text').data()
  16. # extract translatable symbols
  17. syms = []
  18. sym_id = 0
  19. for sym in sorted(symtab.iter_symbols(), key=lambda x: x.entry.st_value):
  20. sym_start = sym.entry.st_value
  21. sym_size = sym.entry.st_size
  22. sym_end = sym_start + sym_size
  23. if sym_start >= pri_start and sym_end < pri_end and sym_size > 0:
  24. data = text_data[sym_start:sym_end]
  25. # perform basic checks on the language section
  26. if data[0] != 255 or data[1] != 255:
  27. warn_sym(sym.name, sym_start, sym_size, 'invalid location offset')
  28. if data[-1] != 0:
  29. warn_sym(sym.name, sym_start, sym_size, 'unterminated string')
  30. syms.append({'start': sym_start,
  31. 'size': sym_size,
  32. 'name': sym.name,
  33. 'id': sym_id,
  34. 'data': data[2:-1]})
  35. sym_id += 1
  36. return syms
  37. def fw_signature(syms):
  38. # any id which is stable when the translatable string do not change would do, so build it out of
  39. # the firmware translation symbol table itself
  40. data = b''
  41. for sym in syms:
  42. data += struct.pack("<HHH", sym['start'], sym['size'], sym['id'])
  43. data += sym['name'].encode('ascii') + b'\0'
  44. data += sym['data'] + b'\0'
  45. return zlib.crc32(data)
  46. def get_sig_sym(symtab, syms):
  47. pri_sym = symtab.get_symbol_by_name('_PRI_LANG_SIGNATURE')[0]
  48. pri_sym_data = fw_signature(syms)
  49. pri_sym = {'start': pri_sym.entry.st_value,
  50. 'size': pri_sym.entry.st_size,
  51. 'name': pri_sym.name,
  52. 'id': '',
  53. 'data': pri_sym_data}
  54. return pri_sym
  55. def patch_binary(path, syms, pri_sym):
  56. fw = open(path, "r+b")
  57. # signature
  58. fw.seek(pri_sym['start'])
  59. fw.write(struct.pack("<I", pri_sym['data']))
  60. # string IDs
  61. for sym in syms:
  62. fw.seek(sym['start'])
  63. fw.write(struct.pack("<H", sym['id']))
  64. def check_duplicate_data(syms):
  65. data_syms = defaultdict(list)
  66. for sym in syms:
  67. data_syms[sym['data']].append(sym)
  68. for data, sym_list in data_syms.items():
  69. if len(sym_list) > 1:
  70. sym_names = [x['name'] for x in sym_list]
  71. warn(f'symbols {sym_names} contain the same data: {data}')
  72. def output_map(syms):
  73. print('OFFSET\tSIZE\tNAME\tID\tSTRING')
  74. for sym in syms:
  75. print('{:04x}\t{:04x}\t{}\t{}\t{}'.format(sym['start'], sym['size'], sym['name'], sym['id'], sym['data']))
  76. def main():
  77. ap = argparse.ArgumentParser()
  78. ap.add_argument('elf', help='Firmware ELF file')
  79. ap.add_argument('bin', nargs='?', help='Firmware BIN file')
  80. args = ap.parse_args()
  81. # extract translatable symbols
  82. elf = elftools.elf.elffile.ELFFile(open(args.elf, "rb"))
  83. symtab = elf.get_section_by_name('.symtab')
  84. syms = get_lang_symbols(elf, symtab)
  85. pri_sym = get_sig_sym(symtab, syms)
  86. # do one additional pass to check for symbols containing the same data
  87. check_duplicate_data(syms)
  88. # output the symbol table map
  89. output_map(syms + [pri_sym])
  90. # patch the symbols in the final binary
  91. if args.bin is not None:
  92. patch_binary(args.bin, syms, pri_sym)
  93. return 0
  94. if __name__ == '__main__':
  95. exit(main())