Get bin info.py
From Attie's Wiki
(Difference between revisions)
m |
m |
||
Line 1: | Line 1: | ||
This script helps you extract the data associated with a symbol in a binary. | This script helps you extract the data associated with a symbol in a binary. | ||
− | |||
− | <source lang=" | + | <source lang="python"> |
− | #!/bin/ | + | #!/usr/bin/env python |
− | + | import argparse | |
− | + | import os, sys | |
− | + | import subprocess | |
+ | import struct | ||
− | + | ap = argparse.ArgumentParser(description='Extract data from an ELF binary') | |
− | + | ap.add_argument('filename', type=str, help='the file to process') | |
+ | ap.add_argument('symbol', type=str, help='the symbol to extract') | ||
+ | ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string') | ||
+ | ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)') | ||
+ | args = ap.parse_args() | ||
− | if | + | if not os.path.isfile(args.filename): |
− | + | raise Exception('"%s" is not a file or does not exist...' % ( args.filename )) | |
− | + | ||
− | + | ||
− | + | if args.str and args.cbo: | |
− | + | raise Exception('Arguments --cbo and --str are mutually exclusive...') | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | if 'CROSS_COMPILE' in os.environ: | |
− | + | cross_compile = os.environ['CROSS_COMPILE'] | |
− | + | else: | |
− | + | cross_compile = '' | |
− | + | ||
− | + | ||
− | + | ||
− | + | bin = {} | |
− | + | bin['readelf'] = '%sreadelf' % ( cross_compile ) | |
− | + | def get_file_info(filename): | |
− | + | info = {} | |
+ | info['class'] = None | ||
+ | info['data'] = None | ||
+ | info['type'] = None | ||
+ | info['machine'] = None | ||
+ | info['endianness'] = None | ||
− | + | for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'): | |
− | + | fields = [ x.strip() for x in line.split(':', 1) ] | |
− | + | if len(fields) != 2: | |
− | + | continue | |
− | + | key = fields[0].lower() | |
− | + | value = fields[1] | |
+ | if key in info: | ||
+ | info[key] = value | ||
− | + | if info['data'] != None: | |
− | + | if 'little endian' in info['data']: | |
+ | info['endianness'] = 'little' | ||
+ | elif 'bit endian' in info['data']: | ||
+ | info['endianness'] = 'big' | ||
+ | else: | ||
+ | info['endianness'] = 'unknown' | ||
− | + | return info | |
− | + | ||
− | + | def get_section_info(filename, section): | |
− | + | for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers' ]).split('\n'): | |
+ | fields = line.split() | ||
+ | if '[' in fields: | ||
+ | fields.remove('[') | ||
+ | if len(fields) != 11: | ||
+ | continue | ||
+ | try: | ||
+ | fields[0] = int(fields[0].translate(None, ''.join([ '[', ']' ]))) | ||
+ | if fields[1] != section: | ||
+ | continue | ||
− | + | info = {} | |
− | + | info['section_number'] = fields[0] | |
+ | info['name'] = fields[1] | ||
+ | info['type'] = fields[2] | ||
+ | info['address'] = int(fields[3], 16) | ||
+ | info['offset'] = int(fields[4], 16) | ||
+ | info['size'] = int(fields[5], 16) | ||
+ | info['ES'] = fields[6] | ||
+ | info['Flg'] = fields[7] | ||
+ | info['Lk'] = fields[8] | ||
+ | info['Inf'] = fields[9] | ||
+ | info['Al'] = fields[10] | ||
+ | return info | ||
+ | except: | ||
+ | pass | ||
+ | return None | ||
− | + | def get_section_data(filename, section, section_info=None): | |
− | + | if section_info is None: | |
− | + | section_info = get_section_info(filename, sectioN) | |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | = | + | data = {} |
− | + | data['base_offset'] = None | |
− | + | data['data'] = [] | |
− | + | ||
− | + | ||
− | + | ||
− | ./ | + | for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section ]).split('\n'): |
+ | if line[0:4] != ' 0x': | ||
+ | continue | ||
+ | |||
+ | cols = line.split() | ||
+ | |||
+ | if 'base_offset' is None: | ||
+ | data['base_offset'] = int(cols[0][2:], 16) | ||
+ | if data['base_offset'] != section_info['offset']: | ||
+ | raise Exception('section "%s" has an unexpected starting address...' % ( section )) | ||
+ | |||
+ | remain = section_info['size'] - len(data['data']) | ||
+ | if remain > 16: | ||
+ | r = range(16) | ||
+ | else: | ||
+ | r = range(remain) | ||
+ | |||
+ | for i in r: | ||
+ | byte = i % 4 | ||
+ | col = 1 + ((i - byte) / 4) | ||
+ | |||
+ | o = byte * 2 | ||
+ | data['data'].append(int(cols[col][o:o+2], 16)) | ||
+ | |||
+ | return data | ||
+ | |||
+ | def get_sym_info(filename, symbol): | ||
+ | sym_info = [] | ||
+ | for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'): | ||
+ | fields = line.split() | ||
+ | |||
+ | if len(fields) >= 7: | ||
+ | try: | ||
+ | fields[0] = int(fields[0][:-1]) | ||
+ | if fields[7] != symbol: | ||
+ | continue | ||
+ | |||
+ | data = {} | ||
+ | data['symbol_number'] = fields[0] | ||
+ | data['value'] = int(fields[1], 16) | ||
+ | data['size'] = int(fields[2], 10) | ||
+ | data['type'] = fields[3] | ||
+ | data['bind'] = fields[4] | ||
+ | data['visibility'] = fields[5] | ||
+ | data['Ndx'] = fields[6] | ||
+ | data['name'] = fields[7] | ||
+ | sym_info.append(data) | ||
+ | except: | ||
+ | pass | ||
+ | |||
+ | if not validate_sym_info_list(sym_info): | ||
+ | raise Exception('symbol "%s" has multiple definitions...' % (args.symbol)) | ||
+ | |||
+ | return sym_info[0] | ||
+ | |||
+ | def validate_sym_info_list(symbol_list): | ||
+ | if len(symbol_list) > 1: | ||
+ | for i in range(1, len(symbol_list)): | ||
+ | for f in [ 'value', 'size', 'type', 'name']: | ||
+ | if symbol_list[0][f] != symbol_list[i][f]: | ||
+ | return False | ||
+ | return True | ||
+ | |||
+ | file_info = get_file_info(args.filename) | ||
+ | symbol_info = get_sym_info(args.filename, args.symbol) | ||
+ | section_info = get_section_info(args.filename, '.rodata') | ||
+ | section_data = get_section_data(args.filename, '.rodata', section_info=section_info) | ||
+ | |||
+ | offset = symbol_info['value'] - section_info['offset'] | ||
+ | data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ] | ||
+ | |||
+ | if args.str: | ||
+ | if data[-1] != 0: | ||
+ | raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol )) | ||
+ | data = ''.join(map(str, map(unichr, data[:-1]))) | ||
+ | |||
+ | elif args.cbo: | ||
+ | data = ''.join(map(str, map(unichr, data))) | ||
+ | n = len(data) | ||
+ | |||
+ | if n % 8 == 0: | ||
+ | t = 'd' | ||
+ | s = 8 | ||
+ | elif len(data) % 4 == 0: | ||
+ | t = 'i' | ||
+ | s = 4 | ||
+ | elif len(data) % 2 == 0: | ||
+ | t = 'h' | ||
+ | s = 2 | ||
+ | elif len(data) == 1: | ||
+ | t = 'c' | ||
+ | s = 1 | ||
+ | else: | ||
+ | raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol )) | ||
+ | |||
+ | n = (n - (n % s)) / s | ||
+ | |||
+ | if file_info['endianness'] == 'little': | ||
+ | e = '<' | ||
+ | elif file_info['endianness'] == 'big': | ||
+ | e = '>' | ||
+ | else: | ||
+ | raise Exception('file "%s" has an unknown endianness...' % ( args.filename )) | ||
+ | |||
+ | tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0] | ||
+ | data = struct.pack('>%d%s' % ( n, t ), tmp) | ||
+ | |||
+ | else: | ||
+ | data = ''.join(map(str, map(unichr, data))) | ||
+ | |||
+ | sys.stdout.write(data) | ||
</source> | </source> |
Revision as of 13:14, 16 October 2015
This script helps you extract the data associated with a symbol in a binary.
#!/usr/bin/env python import argparse import os, sys import subprocess import struct ap = argparse.ArgumentParser(description='Extract data from an ELF binary') ap.add_argument('filename', type=str, help='the file to process') ap.add_argument('symbol', type=str, help='the symbol to extract') ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string') ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)') args = ap.parse_args() if not os.path.isfile(args.filename): raise Exception('"%s" is not a file or does not exist...' % ( args.filename )) if args.str and args.cbo: raise Exception('Arguments --cbo and --str are mutually exclusive...') if 'CROSS_COMPILE' in os.environ: cross_compile = os.environ['CROSS_COMPILE'] else: cross_compile = '' bin = {} bin['readelf'] = '%sreadelf' % ( cross_compile ) def get_file_info(filename): info = {} info['class'] = None info['data'] = None info['type'] = None info['machine'] = None info['endianness'] = None for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'): fields = [ x.strip() for x in line.split(':', 1) ] if len(fields) != 2: continue key = fields[0].lower() value = fields[1] if key in info: info[key] = value if info['data'] != None: if 'little endian' in info['data']: info['endianness'] = 'little' elif 'bit endian' in info['data']: info['endianness'] = 'big' else: info['endianness'] = 'unknown' return info def get_section_info(filename, section): for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers' ]).split('\n'): fields = line.split() if '[' in fields: fields.remove('[') if len(fields) != 11: continue try: fields[0] = int(fields[0].translate(None, ''.join([ '[', ']' ]))) if fields[1] != section: continue info = {} info['section_number'] = fields[0] info['name'] = fields[1] info['type'] = fields[2] info['address'] = int(fields[3], 16) info['offset'] = int(fields[4], 16) info['size'] = int(fields[5], 16) info['ES'] = fields[6] info['Flg'] = fields[7] info['Lk'] = fields[8] info['Inf'] = fields[9] info['Al'] = fields[10] return info except: pass return None def get_section_data(filename, section, section_info=None): if section_info is None: section_info = get_section_info(filename, sectioN) data = {} data['base_offset'] = None data['data'] = [] for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section ]).split('\n'): if line[0:4] != ' 0x': continue cols = line.split() if 'base_offset' is None: data['base_offset'] = int(cols[0][2:], 16) if data['base_offset'] != section_info['offset']: raise Exception('section "%s" has an unexpected starting address...' % ( section )) remain = section_info['size'] - len(data['data']) if remain > 16: r = range(16) else: r = range(remain) for i in r: byte = i % 4 col = 1 + ((i - byte) / 4) o = byte * 2 data['data'].append(int(cols[col][o:o+2], 16)) return data def get_sym_info(filename, symbol): sym_info = [] for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'): fields = line.split() if len(fields) >= 7: try: fields[0] = int(fields[0][:-1]) if fields[7] != symbol: continue data = {} data['symbol_number'] = fields[0] data['value'] = int(fields[1], 16) data['size'] = int(fields[2], 10) data['type'] = fields[3] data['bind'] = fields[4] data['visibility'] = fields[5] data['Ndx'] = fields[6] data['name'] = fields[7] sym_info.append(data) except: pass if not validate_sym_info_list(sym_info): raise Exception('symbol "%s" has multiple definitions...' % (args.symbol)) return sym_info[0] def validate_sym_info_list(symbol_list): if len(symbol_list) > 1: for i in range(1, len(symbol_list)): for f in [ 'value', 'size', 'type', 'name']: if symbol_list[0][f] != symbol_list[i][f]: return False return True file_info = get_file_info(args.filename) symbol_info = get_sym_info(args.filename, args.symbol) section_info = get_section_info(args.filename, '.rodata') section_data = get_section_data(args.filename, '.rodata', section_info=section_info) offset = symbol_info['value'] - section_info['offset'] data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ] if args.str: if data[-1] != 0: raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol )) data = ''.join(map(str, map(unichr, data[:-1]))) elif args.cbo: data = ''.join(map(str, map(unichr, data))) n = len(data) if n % 8 == 0: t = 'd' s = 8 elif len(data) % 4 == 0: t = 'i' s = 4 elif len(data) % 2 == 0: t = 'h' s = 2 elif len(data) == 1: t = 'c' s = 1 else: raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol )) n = (n - (n % s)) / s if file_info['endianness'] == 'little': e = '<' elif file_info['endianness'] == 'big': e = '>' else: raise Exception('file "%s" has an unknown endianness...' % ( args.filename )) tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0] data = struct.pack('>%d%s' % ( n, t ), tmp) else: data = ''.join(map(str, map(unichr, data))) sys.stdout.write(data)