Get bin info.py
From Attie's Wiki
(Difference between revisions)
m (now operates with .rodata sections that are not aligned to a 16-byte boundary...) |
m |
||
(6 intermediate revisions by one user not shown) | |||
Line 1: | Line 1: | ||
This script helps you extract the data associated with a symbol in a binary. | This script helps you extract the data associated with a symbol in a binary. | ||
− | |||
− | <source lang=" | + | <source lang="python"> |
− | #!/bin/ | + | #!/usr/bin/env python |
+ | |||
+ | import argparse | ||
+ | import os, sys | ||
+ | import subprocess | ||
+ | import struct | ||
+ | |||
+ | ap = argparse.ArgumentParser(description='Extract data from an ELF binary') | ||
+ | ap.add_argument('filename', type=str, help='the file to process') | ||
+ | ap.add_argument('symbol', type=str, help='the symbol to extract') | ||
+ | ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string') | ||
+ | ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)') | ||
+ | args = ap.parse_args() | ||
+ | |||
+ | if not os.path.isfile(args.filename): | ||
+ | raise Exception('"%s" is not a file or does not exist...' % ( args.filename )) | ||
+ | |||
+ | if args.str and args.cbo: | ||
+ | raise Exception('Arguments --cbo and --str are mutually exclusive...') | ||
+ | |||
+ | if 'CROSS_COMPILE' in os.environ: | ||
+ | cross_compile = os.environ['CROSS_COMPILE'] | ||
+ | else: | ||
+ | cross_compile = '' | ||
+ | |||
+ | bin = {} | ||
+ | bin['readelf'] = '%sreadelf' % ( cross_compile ) | ||
+ | |||
+ | def get_file_info(filename): | ||
+ | info = {} | ||
+ | info['class'] = None | ||
+ | info['data'] = None | ||
+ | info['type'] = None | ||
+ | info['machine'] = None | ||
+ | info['endianness'] = None | ||
+ | |||
+ | for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'): | ||
+ | fields = [ x.strip() for x in line.split(':', 1) ] | ||
+ | |||
+ | if len(fields) != 2: | ||
+ | continue | ||
+ | |||
+ | key = fields[0].lower() | ||
+ | value = fields[1] | ||
+ | if key in info: | ||
+ | info[key] = value | ||
+ | |||
+ | if info['data'] != None: | ||
+ | if 'little endian' in info['data']: | ||
+ | info['endianness'] = 'little' | ||
+ | elif 'bit endian' in info['data']: | ||
+ | info['endianness'] = 'big' | ||
+ | else: | ||
+ | info['endianness'] = 'unknown' | ||
+ | |||
+ | return info | ||
+ | |||
+ | def get_section_list(filename): | ||
+ | sections = {} | ||
+ | consume = False | ||
+ | field_layouts = {} | ||
+ | |||
+ | field_layouts[10] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'link', 'info', 'align' ] | ||
+ | field_layouts[11] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'flags', 'link', 'info', 'align' ] | ||
+ | |||
+ | fields_int = {} | ||
+ | fields_int['section_number'] = 10 | ||
+ | fields_int['address'] = 16 | ||
+ | fields_int['offset'] = 16 | ||
+ | fields_int['size'] = 16 | ||
+ | fields_int['entsize'] = 16 | ||
+ | fields_int['link'] = 10 | ||
+ | fields_int['info'] = 10 | ||
+ | fields_int['align'] = 10 | ||
+ | |||
+ | for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'): | ||
+ | if not consume: | ||
+ | if line == ' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al': | ||
+ | consume = True | ||
+ | continue | ||
+ | else: | ||
+ | if line == 'Key to Flags:': | ||
+ | consume = False | ||
+ | continue | ||
+ | |||
+ | if not consume: | ||
+ | continue | ||
+ | |||
+ | fields = line.split() | ||
+ | |||
+ | if '[' in fields: | ||
+ | fields.remove('[') | ||
+ | if len(fields) not in field_layouts: | ||
+ | continue | ||
+ | field_layout = field_layouts[len(fields)] | ||
+ | |||
+ | try: | ||
+ | fields[0] = fields[0].translate(None, ''.join([ '[', ']' ])) | ||
+ | |||
+ | info = {} | ||
+ | for i in range(len(fields)): | ||
+ | field_name = field_layout[i] | ||
+ | |||
+ | if field_name in fields_int: | ||
+ | field_value = int(fields[i], fields_int[field_name]) | ||
+ | else: | ||
+ | field_value = fields[i] | ||
+ | |||
+ | info[field_name] = field_value | ||
+ | |||
+ | sections[info['name']] = info; | ||
+ | except: | ||
+ | pass | ||
+ | |||
+ | return sections | ||
+ | |||
+ | def get_section_info(filename, section): | ||
+ | sections = get_section_list(filename) | ||
+ | |||
+ | if section in sections: | ||
+ | return sections[section] | ||
+ | return None | ||
+ | |||
+ | def get_section_container(filename, base_addr, length = 0): | ||
+ | sections = get_section_list(filename) | ||
+ | |||
+ | for section_name in sections: | ||
+ | section = sections[section_name] | ||
+ | |||
+ | section_start = section['address'] | ||
+ | section_end = section_start + section['size'] | ||
+ | |||
+ | if base_addr < section_start or base_addr > section_end: | ||
+ | continue | ||
+ | |||
+ | if length > 0 and base_addr + length > section_end: | ||
+ | continue | ||
+ | |||
+ | return section | ||
+ | |||
+ | return None | ||
+ | |||
+ | def get_section_data(filename, section_info): | ||
+ | data = {} | ||
+ | data['base_offset'] = None | ||
+ | data['data'] = [] | ||
+ | |||
+ | for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section_info['name'] ]).split('\n'): | ||
+ | if line[0:4] != ' 0x': | ||
+ | continue | ||
+ | |||
+ | cols = line.split() | ||
+ | |||
+ | if 'base_offset' is None: | ||
+ | data['base_offset'] = int(cols[0][2:], 16) | ||
+ | if data['base_offset'] != section_info['offset']: | ||
+ | raise Exception('section "%s" has an unexpected starting address...' % ( section_info['name'] )) | ||
+ | |||
+ | remain = section_info['size'] - len(data['data']) | ||
+ | if remain > 16: | ||
+ | r = range(16) | ||
+ | else: | ||
+ | r = range(remain) | ||
+ | |||
+ | for i in r: | ||
+ | byte = i % 4 | ||
+ | col = 1 + ((i - byte) / 4) | ||
+ | |||
+ | o = byte * 2 | ||
+ | data['data'].append(int(cols[col][o:o+2], 16)) | ||
+ | |||
+ | return data | ||
+ | |||
+ | def get_symbol_info(filename, symbol): | ||
+ | symbol_info = [] | ||
+ | for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'): | ||
+ | fields = line.split() | ||
+ | |||
+ | if len(fields) >= 7: | ||
+ | try: | ||
+ | fields[0] = int(fields[0][:-1]) | ||
+ | if fields[7] != symbol: | ||
+ | continue | ||
+ | |||
+ | data = {} | ||
+ | data['symbol_number'] = fields[0] | ||
+ | data['value'] = int(fields[1], 16) | ||
+ | data['size'] = int(fields[2], 10) | ||
+ | data['type'] = fields[3] | ||
+ | data['bind'] = fields[4] | ||
+ | data['visibility'] = fields[5] | ||
+ | data['Ndx'] = fields[6] | ||
+ | data['name'] = fields[7] | ||
+ | symbol_info.append(data) | ||
+ | except: | ||
+ | pass | ||
+ | |||
+ | if not validate_symbol_info_list(symbol_info): | ||
+ | raise Exception('symbol "%s" has multiple definitions...' % (args.symbol)) | ||
+ | |||
+ | if len(symbol_info) == 0: | ||
+ | raise Exception('symbol "%s" is not defined...' % (args.symbol)) | ||
− | + | return symbol_info[0] | |
− | + | ||
− | + | ||
− | + | def validate_symbol_info_list(symbol_list): | |
− | + | if len(symbol_list) > 1: | |
+ | for i in range(1, len(symbol_list)): | ||
+ | for f in [ 'value', 'size', 'type', 'name']: | ||
+ | if symbol_list[0][f] != symbol_list[i][f]: | ||
+ | return False | ||
+ | return True | ||
− | + | def get_symbol_data(filename, symbol): | |
− | + | symbol_info = get_symbol_info(filename, symbol) | |
− | + | section_info = get_section_container(filename, symbol_info['value'], symbol_info['size']) | |
− | + | section_data = get_section_data(filename, section_info) | |
− | + | offset = symbol_info['value'] - section_info['address'] | |
− | + | data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ] | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | return data | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | data = get_symbol_data(args.filename, args.symbol) | |
− | + | ||
− | + | if args.str: | |
− | + | if data[-1] != 0: | |
+ | raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol )) | ||
+ | data = ''.join(map(str, map(unichr, data[:-1]))) | ||
− | + | elif args.cbo: | |
− | + | data = ''.join(map(str, map(chr, data))) | |
− | + | n = len(data) | |
− | + | ||
− | + | if n % 8 == 0: | |
− | + | t = 'q' | |
+ | s = 8 | ||
+ | elif len(data) % 4 == 0: | ||
+ | t = 'i' | ||
+ | s = 4 | ||
+ | elif len(data) % 2 == 0: | ||
+ | t = 'h' | ||
+ | s = 2 | ||
+ | elif len(data) == 1: | ||
+ | t = 'c' | ||
+ | s = 1 | ||
+ | else: | ||
+ | raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol )) | ||
− | + | n = (n - (n % s)) / s | |
− | + | ||
− | + | file_info = get_file_info(args.filename) | |
− | + | if file_info['endianness'] == 'little': | |
+ | e = '<' | ||
+ | elif file_info['endianness'] == 'big': | ||
+ | e = '>' | ||
+ | else: | ||
+ | raise Exception('file "%s" has an unknown endianness...' % ( args.filename )) | ||
− | + | tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0] | |
− | + | data = struct.pack('>%d%s' % ( n, t ), tmp) | |
− | + | else: | |
− | + | data = ''.join(map(str, map(unichr, data))) | |
− | + | sys.stdout.write(data) | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
</source> | </source> | ||
Line 77: | Line 278: | ||
EOF | EOF | ||
− | ./get_bin_info. | + | ./get_bin_info.py eg my_data -S |
</source> | </source> |
Latest revision as of 11:30, 14 April 2016
This script helps you extract the data associated with a symbol in a binary.
#!/usr/bin/env python import argparse import os, sys import subprocess import struct ap = argparse.ArgumentParser(description='Extract data from an ELF binary') ap.add_argument('filename', type=str, help='the file to process') ap.add_argument('symbol', type=str, help='the symbol to extract') ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string') ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)') args = ap.parse_args() if not os.path.isfile(args.filename): raise Exception('"%s" is not a file or does not exist...' % ( args.filename )) if args.str and args.cbo: raise Exception('Arguments --cbo and --str are mutually exclusive...') if 'CROSS_COMPILE' in os.environ: cross_compile = os.environ['CROSS_COMPILE'] else: cross_compile = '' bin = {} bin['readelf'] = '%sreadelf' % ( cross_compile ) def get_file_info(filename): info = {} info['class'] = None info['data'] = None info['type'] = None info['machine'] = None info['endianness'] = None for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'): fields = [ x.strip() for x in line.split(':', 1) ] if len(fields) != 2: continue key = fields[0].lower() value = fields[1] if key in info: info[key] = value if info['data'] != None: if 'little endian' in info['data']: info['endianness'] = 'little' elif 'bit endian' in info['data']: info['endianness'] = 'big' else: info['endianness'] = 'unknown' return info def get_section_list(filename): sections = {} consume = False field_layouts = {} field_layouts[10] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'link', 'info', 'align' ] field_layouts[11] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'flags', 'link', 'info', 'align' ] fields_int = {} fields_int['section_number'] = 10 fields_int['address'] = 16 fields_int['offset'] = 16 fields_int['size'] = 16 fields_int['entsize'] = 16 fields_int['link'] = 10 fields_int['info'] = 10 fields_int['align'] = 10 for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'): if not consume: if line == ' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al': consume = True continue else: if line == 'Key to Flags:': consume = False continue if not consume: continue fields = line.split() if '[' in fields: fields.remove('[') if len(fields) not in field_layouts: continue field_layout = field_layouts[len(fields)] try: fields[0] = fields[0].translate(None, ''.join([ '[', ']' ])) info = {} for i in range(len(fields)): field_name = field_layout[i] if field_name in fields_int: field_value = int(fields[i], fields_int[field_name]) else: field_value = fields[i] info[field_name] = field_value sections[info['name']] = info; except: pass return sections def get_section_info(filename, section): sections = get_section_list(filename) if section in sections: return sections[section] return None def get_section_container(filename, base_addr, length = 0): sections = get_section_list(filename) for section_name in sections: section = sections[section_name] section_start = section['address'] section_end = section_start + section['size'] if base_addr < section_start or base_addr > section_end: continue if length > 0 and base_addr + length > section_end: continue return section return None def get_section_data(filename, section_info): data = {} data['base_offset'] = None data['data'] = [] for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section_info['name'] ]).split('\n'): if line[0:4] != ' 0x': continue cols = line.split() if 'base_offset' is None: data['base_offset'] = int(cols[0][2:], 16) if data['base_offset'] != section_info['offset']: raise Exception('section "%s" has an unexpected starting address...' % ( section_info['name'] )) remain = section_info['size'] - len(data['data']) if remain > 16: r = range(16) else: r = range(remain) for i in r: byte = i % 4 col = 1 + ((i - byte) / 4) o = byte * 2 data['data'].append(int(cols[col][o:o+2], 16)) return data def get_symbol_info(filename, symbol): symbol_info = [] for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'): fields = line.split() if len(fields) >= 7: try: fields[0] = int(fields[0][:-1]) if fields[7] != symbol: continue data = {} data['symbol_number'] = fields[0] data['value'] = int(fields[1], 16) data['size'] = int(fields[2], 10) data['type'] = fields[3] data['bind'] = fields[4] data['visibility'] = fields[5] data['Ndx'] = fields[6] data['name'] = fields[7] symbol_info.append(data) except: pass if not validate_symbol_info_list(symbol_info): raise Exception('symbol "%s" has multiple definitions...' % (args.symbol)) if len(symbol_info) == 0: raise Exception('symbol "%s" is not defined...' % (args.symbol)) return symbol_info[0] def validate_symbol_info_list(symbol_list): if len(symbol_list) > 1: for i in range(1, len(symbol_list)): for f in [ 'value', 'size', 'type', 'name']: if symbol_list[0][f] != symbol_list[i][f]: return False return True def get_symbol_data(filename, symbol): symbol_info = get_symbol_info(filename, symbol) section_info = get_section_container(filename, symbol_info['value'], symbol_info['size']) section_data = get_section_data(filename, section_info) offset = symbol_info['value'] - section_info['address'] data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ] return data data = get_symbol_data(args.filename, args.symbol) if args.str: if data[-1] != 0: raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol )) data = ''.join(map(str, map(unichr, data[:-1]))) elif args.cbo: data = ''.join(map(str, map(chr, data))) n = len(data) if n % 8 == 0: t = 'q' s = 8 elif len(data) % 4 == 0: t = 'i' s = 4 elif len(data) % 2 == 0: t = 'h' s = 2 elif len(data) == 1: t = 'c' s = 1 else: raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol )) n = (n - (n % s)) / s file_info = get_file_info(args.filename) if file_info['endianness'] == 'little': e = '<' elif file_info['endianness'] == 'big': e = '>' else: raise Exception('file "%s" has an unknown endianness...' % ( args.filename )) tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0] data = struct.pack('>%d%s' % ( n, t ), tmp) else: data = ''.join(map(str, map(unichr, data))) sys.stdout.write(data)
[edit] Example Usage
cat <<EOF | gcc -x c - -o eg const char my_data[] = "test string"; void main(void) {} EOF ./get_bin_info.py eg my_data -S