elf_mem_map 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. #!/usr/bin/env python3
  2. import argparse
  3. import elftools.elf.elffile
  4. import elftools.dwarf.descriptions
  5. from collections import namedtuple
  6. from struct import unpack
  7. import sys
  8. import re
  9. SRAM_START = 0x200
  10. SRAM_OFFSET = 0x800000
  11. EEPROM_OFFSET = 0x810000
  12. FILL_BYTE = b'\0'
  13. Entry = namedtuple('Entry', ['name', 'loc', 'size', 'declpos'])
  14. Member = namedtuple('Member', ['name', 'off', 'size'])
  15. def array_inc(loc, dim, idx=0):
  16. if idx == len(dim):
  17. return True
  18. loc[idx] += 1
  19. if loc[idx] == dim[idx]:
  20. loc[idx] = 0
  21. return array_inc(loc, dim, idx+1)
  22. return False
  23. def get_type_size(type_DIE):
  24. while True:
  25. if 'DW_AT_byte_size' in type_DIE.attributes:
  26. return type_DIE, type_DIE.attributes.get('DW_AT_byte_size').value
  27. if 'DW_AT_type' not in type_DIE.attributes:
  28. return None
  29. type_DIE = type_DIE.get_DIE_from_attribute('DW_AT_type')
  30. def get_type_arrsize(type_DIE):
  31. size = get_type_size(type_DIE)
  32. if size is None:
  33. return None
  34. byte_size = size[1]
  35. if size[0].tag != 'DW_TAG_pointer_type':
  36. array_DIE = get_type_def(type_DIE, 'DW_TAG_array_type')
  37. if array_DIE is not None:
  38. for range_DIE in array_DIE.iter_children():
  39. if range_DIE.tag == 'DW_TAG_subrange_type' and \
  40. 'DW_AT_upper_bound' in range_DIE.attributes:
  41. dim = range_DIE.attributes['DW_AT_upper_bound'].value + 1
  42. byte_size *= dim
  43. return byte_size
  44. def get_type_def(type_DIE, type_tag):
  45. while True:
  46. if type_DIE.tag == type_tag:
  47. return type_DIE
  48. if 'DW_AT_type' not in type_DIE.attributes:
  49. return None
  50. type_DIE = type_DIE.get_DIE_from_attribute('DW_AT_type')
  51. def get_FORM_block1(attr):
  52. if attr.form != 'DW_FORM_block1':
  53. return None
  54. if attr.value[0] == 3: # OP_addr
  55. return int.from_bytes(attr.value[1:], 'little')
  56. if attr.value[0] == 35: # OP_plus_uconst (ULEB128)
  57. v = 0
  58. s = 0
  59. for b in attr.value[1:]:
  60. v |= (b & 0x7f) << s
  61. if b & 0x80 == 0:
  62. break
  63. s += 7
  64. return v
  65. return None
  66. def get_array_dims(DIE):
  67. array_DIE = get_type_def(DIE, 'DW_TAG_array_type')
  68. if array_DIE is None:
  69. return []
  70. array_dim = []
  71. for range_DIE in array_DIE.iter_children():
  72. if range_DIE.tag == 'DW_TAG_subrange_type' and \
  73. 'DW_AT_upper_bound' in range_DIE.attributes:
  74. array_dim.append(range_DIE.attributes['DW_AT_upper_bound'].value + 1)
  75. return array_dim
  76. def get_struct_members(DIE, entry, expand_structs, struct_gaps):
  77. struct_DIE = get_type_def(DIE, 'DW_TAG_structure_type')
  78. if struct_DIE is None:
  79. return []
  80. members = []
  81. for member_DIE in struct_DIE.iter_children():
  82. if member_DIE.tag == 'DW_TAG_member' and 'DW_AT_name' in member_DIE.attributes:
  83. m_name = member_DIE.attributes['DW_AT_name'].value.decode('ascii')
  84. m_off = get_FORM_block1(member_DIE.attributes['DW_AT_data_member_location'])
  85. m_byte_size = get_type_size(member_DIE)[1]
  86. # still expand member arrays
  87. m_array_dim = get_array_dims(member_DIE)
  88. if m_byte_size == 1 and len(m_array_dim) > 1:
  89. # likely string, remove one dimension
  90. m_byte_size *= m_array_dim.pop()
  91. if len(m_array_dim) == 0 or (len(m_array_dim) == 1 and m_array_dim[0] == 1):
  92. # plain entry
  93. members.append(Member(m_name, m_off, m_byte_size))
  94. elif len(m_array_dim) == 1 and m_byte_size == 1:
  95. # likely string, avoid expansion
  96. members.append(Member(m_name + '[]', m_off, m_array_dim[0]))
  97. else:
  98. # expand array entries
  99. m_array_pos = m_off
  100. m_array_loc = [0] * len(m_array_dim)
  101. while True:
  102. # location index
  103. sfx = ''
  104. for d in range(len(m_array_dim)):
  105. sfx += '[{}]'.format(str(m_array_loc[d]).rjust(len(str(m_array_dim[d]-1)), '0'))
  106. members.append(Member(m_name + sfx, m_array_pos, m_byte_size))
  107. # advance
  108. if array_inc(m_array_loc, m_array_dim):
  109. break
  110. m_array_pos += m_byte_size
  111. if struct_gaps and len(members):
  112. # fill gaps in the middle
  113. members = list(sorted(members, key=lambda x: x.off))
  114. last_end = 0
  115. for n in range(len(members)):
  116. member = members[n]
  117. if member.off > last_end:
  118. members.append(Member('*UNKNOWN*', last_end, member.off - last_end))
  119. last_end = member.off + member.size
  120. if struct_gaps and len(members):
  121. # fill gap at the end
  122. members = list(sorted(members, key=lambda x: x.off))
  123. last = members[-1]
  124. last_end = last.off + last.size
  125. if entry.size > last_end:
  126. members.append(Member('*UNKNOWN*', last_end, entry.size - last_end))
  127. return members
  128. def get_elf_globals(path, expand_structs, struct_gaps=True):
  129. fd = open(path, "rb")
  130. if fd is None:
  131. return
  132. elffile = elftools.elf.elffile.ELFFile(fd)
  133. if elffile is None or not elffile.has_dwarf_info():
  134. return
  135. # probably not needed, since we're decoding expressions manually
  136. elftools.dwarf.descriptions.set_global_machine_arch(elffile.get_machine_arch())
  137. dwarfinfo = elffile.get_dwarf_info()
  138. grefs = []
  139. for CU in dwarfinfo.iter_CUs():
  140. file_entries = dwarfinfo.line_program_for_CU(CU).header["file_entry"]
  141. for DIE in CU.iter_DIEs():
  142. # handle only variable types
  143. if DIE.tag != 'DW_TAG_variable':
  144. continue
  145. if 'DW_AT_location' not in DIE.attributes:
  146. continue
  147. if 'DW_AT_name' not in DIE.attributes and \
  148. 'DW_AT_abstract_origin' not in DIE.attributes:
  149. continue
  150. # handle locations encoded directly as DW_OP_addr (leaf globals)
  151. loc = get_FORM_block1(DIE.attributes['DW_AT_location'])
  152. if loc is None or loc < SRAM_OFFSET or loc >= EEPROM_OFFSET:
  153. continue
  154. loc -= SRAM_OFFSET
  155. # variable name/type
  156. if 'DW_AT_name' not in DIE.attributes and \
  157. 'DW_AT_abstract_origin' in DIE.attributes:
  158. DIE = DIE.get_DIE_from_attribute('DW_AT_abstract_origin')
  159. if 'DW_AT_location' in DIE.attributes:
  160. # duplicate reference (handled directly), skip
  161. continue
  162. if 'DW_AT_name' not in DIE.attributes:
  163. continue
  164. if 'DW_AT_type' not in DIE.attributes:
  165. continue
  166. name = DIE.attributes['DW_AT_name'].value.decode('ascii')
  167. # get final storage size
  168. size = get_type_size(DIE)
  169. if size is None:
  170. continue
  171. byte_size = size[1]
  172. # location of main definition
  173. declpos = ''
  174. if 'DW_AT_decl_file' in DIE.attributes and \
  175. 'DW_AT_decl_line' in DIE.attributes:
  176. line = DIE.attributes['DW_AT_decl_line'].value
  177. fname = DIE.attributes['DW_AT_decl_file'].value
  178. if fname and fname - 1 < len(file_entries):
  179. fname = file_entries[fname-1].name.decode('ascii')
  180. declpos = '{}:{}'.format(fname, line)
  181. # fetch array dimensions (if known)
  182. array_dim = get_array_dims(DIE)
  183. # fetch structure members (one level only)
  184. entry = Entry(name, loc, byte_size, declpos)
  185. if not expand_structs or size[0].tag == 'DW_TAG_pointer_type':
  186. members = []
  187. else:
  188. members = get_struct_members(DIE, entry, expand_structs, struct_gaps)
  189. def expand_members(entry, members):
  190. if len(members) == 0:
  191. grefs.append(entry)
  192. else:
  193. for member in members:
  194. grefs.append(Entry(entry.name + '.' + member.name,
  195. entry.loc + member.off, member.size,
  196. entry.declpos))
  197. if byte_size == 1 and len(array_dim) > 1:
  198. # likely string, remove one dimension
  199. byte_size *= array_dim.pop()
  200. if len(array_dim) == 0 or (len(array_dim) == 1 and array_dim[0] == 1):
  201. # plain entry
  202. expand_members(entry, members)
  203. elif len(array_dim) == 1 and byte_size == 1:
  204. # likely string, avoid expansion
  205. grefs.append(Entry(entry.name + '[]', entry.loc,
  206. array_dim[0], entry.declpos))
  207. else:
  208. # expand array entries
  209. array_pos = loc
  210. array_loc = [0] * len(array_dim)
  211. while True:
  212. # location index
  213. sfx = ''
  214. for d in range(len(array_dim)):
  215. sfx += '[{}]'.format(str(array_loc[d]).rjust(len(str(array_dim[d]-1)), '0'))
  216. expand_members(Entry(entry.name + sfx, array_pos,
  217. byte_size, entry.declpos), members)
  218. # advance
  219. if array_inc(array_loc, array_dim):
  220. break
  221. array_pos += byte_size
  222. return grefs
  223. def decode_dump(path):
  224. fd = open(path, 'r')
  225. if fd is None:
  226. return None
  227. buf_addr = None # starting address
  228. buf_data = None # data
  229. in_dump = False
  230. for line in enumerate(fd):
  231. line = (line[0], line[1].rstrip())
  232. tokens = line[1].split(maxsplit=1)
  233. if not in_dump:
  234. if len(tokens) > 0 and tokens[0] in ['D2', 'D23']:
  235. in_dump = True
  236. continue
  237. else:
  238. if len(tokens) < 1:
  239. print('malformed line {}: {}'.format(*line), file=sys.stderr)
  240. continue
  241. elif tokens[0] == 'ok':
  242. break
  243. elif tokens[0] == 'reason:':
  244. # ignored
  245. continue
  246. elif not re.match(r'[0-9a-fA-F]', tokens[0]):
  247. print('malformed line {}: {}'.format(*line), file=sys.stderr)
  248. continue
  249. addr = int.from_bytes(bytes.fromhex(tokens[0]), 'big')
  250. data = bytes.fromhex(tokens[1])
  251. if buf_addr is None:
  252. buf_addr = addr
  253. buf_data = data
  254. else:
  255. # grow buffer as needed
  256. if addr < buf_addr:
  257. buf_data = FILL_BYTE * (buf_addr - addr)
  258. buf_addr = addr
  259. addr_end = addr + len(data)
  260. buf_end = buf_addr + len(buf_data)
  261. if addr_end > buf_end:
  262. buf_data += FILL_BYTE * (addr_end - buf_end)
  263. # replace new part
  264. rep_start = addr - buf_addr
  265. rep_end = rep_start + len(data)
  266. buf_data = buf_data[:rep_start] + data + buf_data[rep_end:]
  267. return (buf_addr, buf_data)
  268. def annotate_refs(grefs, addr, data, width, gaps=True, overlaps=True):
  269. last_end = None
  270. for entry in grefs:
  271. if entry.loc < addr:
  272. continue
  273. if entry.loc + entry.size > addr + len(data):
  274. continue
  275. pos = entry.loc-addr
  276. end_pos = pos + entry.size
  277. buf = data[pos:end_pos]
  278. buf_repr = ''
  279. if len(buf) in [1, 2, 4]:
  280. # attempt to decode as integers
  281. buf_repr += ' I:' + str(int.from_bytes(buf, 'little')).rjust(10)
  282. if len(buf) in [4, 8]:
  283. # attempt to decode as floats
  284. typ = 'f' if len(buf) == 4 else 'd'
  285. buf_repr += ' F:' + '{:10.3f}'.format(unpack(typ, buf)[0])
  286. if last_end is not None:
  287. if gaps and last_end < pos:
  288. # decode gaps
  289. gap_size = pos - last_end
  290. gap_buf = data[last_end:pos]
  291. print('{:04x} {} {:4} R:{}'.format(addr+last_end, "*UNKNOWN*".ljust(width),
  292. gap_size, gap_buf.hex()))
  293. if overlaps and last_end > pos + 1:
  294. gap_size = pos - last_end
  295. print('{:04x} {} {:4}'.format(addr+last_end, "*OVERLAP*".ljust(width), gap_size))
  296. print('{:04x} {} {:4}{} R:{}'.format(entry.loc, entry.name.ljust(width),
  297. entry.size, buf_repr, buf.hex()))
  298. last_end = end_pos
  299. def print_map(grefs):
  300. print('OFFSET\tSIZE\tNAME\tDECLPOS')
  301. for entry in grefs:
  302. print('{:x}\t{}\t{}\t{}'.format(entry.loc, entry.size, entry.name, entry.declpos))
  303. def print_qdirstat(grefs):
  304. print('[qdirstat 1.0 cache file]')
  305. entries = {}
  306. for entry in grefs:
  307. # do not output registers when looking at space usage
  308. if entry.loc < SRAM_START:
  309. continue
  310. paths = list(filter(None, re.split(r'[\[\].]', entry.name)))
  311. base = entries
  312. for i in range(len(paths) - 1):
  313. name = paths[i]
  314. if name not in base:
  315. base[name] = {}
  316. base = base[name]
  317. name = paths[-1]
  318. if name in base:
  319. name = '{}_{:x}'.format(entry.name, entry.loc)
  320. base[name] = entry.size
  321. def walker(root, prefix):
  322. files = []
  323. dirs = []
  324. for name, entries in root.items():
  325. if type(entries) == int:
  326. files.append([name, entries])
  327. else:
  328. dirs.append([name, entries])
  329. # print files
  330. print('D\t{}\t{}\t0x0'.format(prefix, 0))
  331. for name, size in files:
  332. print('F\t{}\t{}\t0x0'.format(name, size))
  333. # recurse directories
  334. for name, entries in dirs:
  335. walker(entries, prefix + '/' + name)
  336. walker(entries, '/')
  337. def main():
  338. ap = argparse.ArgumentParser(description="""
  339. Generate a symbol table map starting directly from an ELF
  340. firmware with DWARF3 debugging information.
  341. When used along with a memory dump obtained from the D2 g-code,
  342. show the value of each symbol which is within the address range.
  343. """)
  344. ap.add_argument('elf', help='ELF file containing DWARF debugging information')
  345. ap.add_argument('--no-gaps', action='store_true',
  346. help='do not dump memory inbetween known symbols')
  347. ap.add_argument('--no-expand-structs', action='store_true',
  348. help='do not decode structure data')
  349. ap.add_argument('--overlaps', action='store_true',
  350. help='annotate overlaps greater than 1 byte')
  351. ap.add_argument('--name-width', type=int, default=50,
  352. help='set name column width')
  353. g = ap.add_mutually_exclusive_group(required=True)
  354. g.add_argument('dump', nargs='?', help='RAM dump obtained from D2 g-code')
  355. g.add_argument('--map', action='store_true', help='dump global memory map')
  356. g.add_argument('--qdirstat', action='store_true',
  357. help='dump qdirstat-compatible size usage map')
  358. args = ap.parse_args()
  359. grefs = get_elf_globals(args.elf, expand_structs=not args.no_expand_structs)
  360. grefs = list(sorted(grefs, key=lambda x: x.loc))
  361. if args.map:
  362. print_map(grefs)
  363. elif args.qdirstat:
  364. print_qdirstat(grefs)
  365. else:
  366. addr, data = decode_dump(args.dump)
  367. annotate_refs(grefs, addr, data,
  368. width=args.name_width,
  369. gaps=not args.no_gaps,
  370. overlaps=args.overlaps)
  371. if __name__ == '__main__':
  372. exit(main())