Get bin info.py
From Attie's Wiki
(Difference between revisions)
m |
m (now supports symbols in sections other than .rodata) |
||
Line 59: | Line 59: | ||
return info | return info | ||
− | def | + | def get_section_list(filename): |
− | + | sections = {} | |
− | + | consume = False | |
− | + | field_layouts = {} | |
− | + | field_layouts[10] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'link', 'info', 'align' ] | |
− | + | field_layouts[11] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'flags', 'link', 'info', 'align' ] | |
− | + | ||
− | + | ||
− | + | ||
− | + | fields_int = {} | |
− | + | fields_int['section_number'] = 10 | |
− | + | fields_int['address'] = 16 | |
− | + | fields_int['offset'] = 16 | |
− | + | fields_int['size'] = 16 | |
− | + | fields_int['entsize'] = 16 | |
− | + | fields_int['link'] = 10 | |
− | + | fields_int['info'] = 10 | |
− | + | fields_int['align'] = 10 | |
− | + | ||
− | + | ||
− | + | ||
− | + | for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'): | |
− | + | if not consume: | |
+ | if line == ' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al': | ||
+ | consume = True | ||
+ | continue | ||
+ | else: | ||
+ | if line == 'Key to Flags:': | ||
+ | consume = False | ||
+ | continue | ||
− | + | if not consume: | |
− | + | continue | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | |||
− | |||
fields = line.split() | fields = line.split() | ||
+ | |||
if '[' in fields: | if '[' in fields: | ||
fields.remove('[') | fields.remove('[') | ||
− | if len(fields) | + | if len(fields) not in field_layouts: |
continue | continue | ||
+ | field_layout = field_layouts[len(fields)] | ||
+ | |||
try: | try: | ||
− | fields[0] = | + | fields[0] = fields[0].translate(None, ''.join([ '[', ']' ])) |
− | + | ||
− | + | ||
info = {} | info = {} | ||
− | + | for i in range(len(fields)): | |
− | + | field_name = field_layout[i] | |
− | + | ||
− | + | if field_name in fields_int: | |
− | + | field_value = int(fields[i], fields_int[field_name]) | |
− | + | else: | |
− | + | field_value = fields[i] | |
− | + | ||
− | + | info[field_name] = field_value | |
− | + | ||
− | + | sections[info['name']] = info; | |
− | + | ||
except: | except: | ||
pass | pass | ||
+ | |||
+ | return sections | ||
+ | |||
+ | def get_section_info(filename, section): | ||
+ | sections = get_section_list(filename) | ||
+ | |||
+ | if section in sections: | ||
+ | return sections[section] | ||
return None | return None | ||
− | def | + | def get_section_container(filename, base_addr, length = 0): |
− | + | sections = get_section_list(filename) | |
− | + | ||
+ | for section_name in sections: | ||
+ | section = sections[section_name] | ||
+ | |||
+ | section_start = section['address'] | ||
+ | section_end = section_start + section['size'] | ||
+ | |||
+ | if base_addr < section_start or base_addr > section_end: | ||
+ | continue | ||
+ | |||
+ | if length > 0 and base_addr + length > section_end: | ||
+ | continue | ||
+ | |||
+ | return section | ||
+ | |||
+ | return None | ||
+ | def get_section_data(filename, section_info): | ||
data = {} | data = {} | ||
data['base_offset'] = None | data['base_offset'] = None | ||
data['data'] = [] | data['data'] = [] | ||
− | for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', | + | for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section_info['name'] ]).split('\n'): |
if line[0:4] != ' 0x': | if line[0:4] != ' 0x': | ||
continue | continue | ||
Line 142: | Line 158: | ||
data['base_offset'] = int(cols[0][2:], 16) | data['base_offset'] = int(cols[0][2:], 16) | ||
if data['base_offset'] != section_info['offset']: | if data['base_offset'] != section_info['offset']: | ||
− | raise Exception('section "%s" has an unexpected starting address...' % ( | + | raise Exception('section "%s" has an unexpected starting address...' % ( section_info['name'] )) |
remain = section_info['size'] - len(data['data']) | remain = section_info['size'] - len(data['data']) | ||
Line 159: | Line 175: | ||
return data | return data | ||
− | + | def get_symbol_info(filename, symbol): | |
− | symbol_info = | + | symbol_info = [] |
− | + | for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'): | |
− | + | fields = line.split() | |
− | offset = symbol_info['value'] - section_info['address'] | + | if len(fields) >= 7: |
− | data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ] | + | try: |
+ | fields[0] = int(fields[0][:-1]) | ||
+ | if fields[7] != symbol: | ||
+ | continue | ||
+ | |||
+ | data = {} | ||
+ | data['symbol_number'] = fields[0] | ||
+ | data['value'] = int(fields[1], 16) | ||
+ | data['size'] = int(fields[2], 10) | ||
+ | data['type'] = fields[3] | ||
+ | data['bind'] = fields[4] | ||
+ | data['visibility'] = fields[5] | ||
+ | data['Ndx'] = fields[6] | ||
+ | data['name'] = fields[7] | ||
+ | symbol_info.append(data) | ||
+ | except: | ||
+ | pass | ||
+ | |||
+ | if not validate_symbol_info_list(symbol_info): | ||
+ | raise Exception('symbol "%s" has multiple definitions...' % (args.symbol)) | ||
+ | |||
+ | return symbol_info[0] | ||
+ | |||
+ | def validate_symbol_info_list(symbol_list): | ||
+ | if len(symbol_list) > 1: | ||
+ | for i in range(1, len(symbol_list)): | ||
+ | for f in [ 'value', 'size', 'type', 'name']: | ||
+ | if symbol_list[0][f] != symbol_list[i][f]: | ||
+ | return False | ||
+ | return True | ||
+ | |||
+ | def get_symbol_data(filename, symbol): | ||
+ | symbol_info = get_symbol_info(filename, symbol) | ||
+ | section_info = get_section_container(filename, symbol_info['value'], symbol_info['size']) | ||
+ | section_data = get_section_data(filename, section_info) | ||
+ | |||
+ | offset = symbol_info['value'] - section_info['address'] | ||
+ | data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ] | ||
+ | |||
+ | return data | ||
+ | |||
+ | data = get_symbol_data(args.filename, args.symbol) | ||
if args.str: | if args.str: | ||
Line 173: | Line 230: | ||
elif args.cbo: | elif args.cbo: | ||
− | data = ''.join(map(str, map( | + | data = ''.join(map(str, map(chr, data))) |
+ | |||
n = len(data) | n = len(data) | ||
if n % 8 == 0: | if n % 8 == 0: | ||
− | t = ' | + | t = 'q' |
s = 8 | s = 8 | ||
elif len(data) % 4 == 0: | elif len(data) % 4 == 0: | ||
Line 193: | Line 251: | ||
n = (n - (n % s)) / s | n = (n - (n % s)) / s | ||
+ | file_info = get_file_info(args.filename) | ||
if file_info['endianness'] == 'little': | if file_info['endianness'] == 'little': | ||
e = '<' | e = '<' |
Revision as of 21:08, 13 April 2016
This script helps you extract the data associated with a symbol in a binary.
#!/usr/bin/env python import argparse import os, sys import subprocess import struct ap = argparse.ArgumentParser(description='Extract data from an ELF binary') ap.add_argument('filename', type=str, help='the file to process') ap.add_argument('symbol', type=str, help='the symbol to extract') ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string') ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)') args = ap.parse_args() if not os.path.isfile(args.filename): raise Exception('"%s" is not a file or does not exist...' % ( args.filename )) if args.str and args.cbo: raise Exception('Arguments --cbo and --str are mutually exclusive...') if 'CROSS_COMPILE' in os.environ: cross_compile = os.environ['CROSS_COMPILE'] else: cross_compile = '' bin = {} bin['readelf'] = '%sreadelf' % ( cross_compile ) def get_file_info(filename): info = {} info['class'] = None info['data'] = None info['type'] = None info['machine'] = None info['endianness'] = None for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'): fields = [ x.strip() for x in line.split(':', 1) ] if len(fields) != 2: continue key = fields[0].lower() value = fields[1] if key in info: info[key] = value if info['data'] != None: if 'little endian' in info['data']: info['endianness'] = 'little' elif 'bit endian' in info['data']: info['endianness'] = 'big' else: info['endianness'] = 'unknown' return info def get_section_list(filename): sections = {} consume = False field_layouts = {} field_layouts[10] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'link', 'info', 'align' ] field_layouts[11] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'flags', 'link', 'info', 'align' ] fields_int = {} fields_int['section_number'] = 10 fields_int['address'] = 16 fields_int['offset'] = 16 fields_int['size'] = 16 fields_int['entsize'] = 16 fields_int['link'] = 10 fields_int['info'] = 10 fields_int['align'] = 10 for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'): if not consume: if line == ' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al': consume = True continue else: if line == 'Key to Flags:': consume = False continue if not consume: continue fields = line.split() if '[' in fields: fields.remove('[') if len(fields) not in field_layouts: continue field_layout = field_layouts[len(fields)] try: fields[0] = fields[0].translate(None, ''.join([ '[', ']' ])) info = {} for i in range(len(fields)): field_name = field_layout[i] if field_name in fields_int: field_value = int(fields[i], fields_int[field_name]) else: field_value = fields[i] info[field_name] = field_value sections[info['name']] = info; except: pass return sections def get_section_info(filename, section): sections = get_section_list(filename) if section in sections: return sections[section] return None def get_section_container(filename, base_addr, length = 0): sections = get_section_list(filename) for section_name in sections: section = sections[section_name] section_start = section['address'] section_end = section_start + section['size'] if base_addr < section_start or base_addr > section_end: continue if length > 0 and base_addr + length > section_end: continue return section return None def get_section_data(filename, section_info): data = {} data['base_offset'] = None data['data'] = [] for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section_info['name'] ]).split('\n'): if line[0:4] != ' 0x': continue cols = line.split() if 'base_offset' is None: data['base_offset'] = int(cols[0][2:], 16) if data['base_offset'] != section_info['offset']: raise Exception('section "%s" has an unexpected starting address...' % ( section_info['name'] )) remain = section_info['size'] - len(data['data']) if remain > 16: r = range(16) else: r = range(remain) for i in r: byte = i % 4 col = 1 + ((i - byte) / 4) o = byte * 2 data['data'].append(int(cols[col][o:o+2], 16)) return data def get_symbol_info(filename, symbol): symbol_info = [] for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'): fields = line.split() if len(fields) >= 7: try: fields[0] = int(fields[0][:-1]) if fields[7] != symbol: continue data = {} data['symbol_number'] = fields[0] data['value'] = int(fields[1], 16) data['size'] = int(fields[2], 10) data['type'] = fields[3] data['bind'] = fields[4] data['visibility'] = fields[5] data['Ndx'] = fields[6] data['name'] = fields[7] symbol_info.append(data) except: pass if not validate_symbol_info_list(symbol_info): raise Exception('symbol "%s" has multiple definitions...' % (args.symbol)) return symbol_info[0] def validate_symbol_info_list(symbol_list): if len(symbol_list) > 1: for i in range(1, len(symbol_list)): for f in [ 'value', 'size', 'type', 'name']: if symbol_list[0][f] != symbol_list[i][f]: return False return True def get_symbol_data(filename, symbol): symbol_info = get_symbol_info(filename, symbol) section_info = get_section_container(filename, symbol_info['value'], symbol_info['size']) section_data = get_section_data(filename, section_info) offset = symbol_info['value'] - section_info['address'] data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ] return data data = get_symbol_data(args.filename, args.symbol) if args.str: if data[-1] != 0: raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol )) data = ''.join(map(str, map(unichr, data[:-1]))) elif args.cbo: data = ''.join(map(str, map(chr, data))) n = len(data) if n % 8 == 0: t = 'q' s = 8 elif len(data) % 4 == 0: t = 'i' s = 4 elif len(data) % 2 == 0: t = 'h' s = 2 elif len(data) == 1: t = 'c' s = 1 else: raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol )) n = (n - (n % s)) / s file_info = get_file_info(args.filename) if file_info['endianness'] == 'little': e = '<' elif file_info['endianness'] == 'big': e = '>' else: raise Exception('file "%s" has an unknown endianness...' % ( args.filename )) tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0] data = struct.pack('>%d%s' % ( n, t ), tmp) else: data = ''.join(map(str, map(unichr, data))) sys.stdout.write(data)
Example Usage
cat <<EOF | gcc -x c - -o eg const char my_data[] = "test string"; void main(void) {} EOF ./get_bin_info.py eg my_data -S