Get bin info.py

Latest revision as of 11:30, 14 April 2016

This script helps you extract the data associated with a symbol in a binary.

#!/usr/bin/env python
 
import argparse
import os, sys
import subprocess
import struct
 
ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
ap.add_argument('filename', type=str, help='the file to process')
ap.add_argument('symbol', type=str, help='the symbol to extract')
ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
args = ap.parse_args()
 
if not os.path.isfile(args.filename):
    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
 
if args.str and args.cbo:
    raise Exception('Arguments --cbo and --str are mutually exclusive...')
 
if 'CROSS_COMPILE' in os.environ:
    cross_compile = os.environ['CROSS_COMPILE']
else:
    cross_compile = ''
 
bin = {}
bin['readelf'] = '%sreadelf' % ( cross_compile )
 
def get_file_info(filename):
    info = {}
    info['class'] = None
    info['data'] = None
    info['type'] = None
    info['machine'] = None
    info['endianness'] = None
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
        fields = [ x.strip() for x in line.split(':', 1) ]
 
        if len(fields) != 2:
            continue
 
        key = fields[0].lower()
        value = fields[1]
        if key in info:
            info[key] = value
 
    if info['data'] != None:
        if 'little endian' in info['data']:
            info['endianness'] = 'little'
        elif 'bit endian' in info['data']:
            info['endianness'] = 'big'
        else:
            info['endianness'] = 'unknown'
 
    return info
 
def get_section_list(filename):
    sections = {}
    consume = False
    field_layouts = {}
 
    field_layouts[10] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize',          'link', 'info', 'align' ]
    field_layouts[11] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'flags', 'link', 'info', 'align' ]
 
    fields_int = {}
    fields_int['section_number'] = 10
    fields_int['address'] = 16
    fields_int['offset'] = 16
    fields_int['size'] = 16
    fields_int['entsize'] = 16
    fields_int['link'] = 10
    fields_int['info'] = 10
    fields_int['align'] = 10
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'):
        if not consume:
            if line == '  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al':
                consume = True
                continue
        else:
            if line == 'Key to Flags:':
                consume = False
                continue
 
        if not consume:
            continue
 
        fields = line.split()
 
        if '[' in fields:
            fields.remove('[')
        if len(fields) not in field_layouts:
            continue
        field_layout = field_layouts[len(fields)]
 
        try:
            fields[0] = fields[0].translate(None, ''.join([ '[', ']' ]))
 
            info = {}
            for i in range(len(fields)):
                field_name = field_layout[i]
 
                if field_name in fields_int:
                    field_value = int(fields[i], fields_int[field_name])
                else:
                    field_value = fields[i]
 
                info[field_name] = field_value
 
            sections[info['name']] = info;
        except:
            pass
 
    return sections
 
def get_section_info(filename, section):
    sections = get_section_list(filename)
 
    if section in sections:
        return sections[section]
    return None
 
def get_section_container(filename, base_addr, length = 0):
    sections = get_section_list(filename)
 
    for section_name in sections:
        section = sections[section_name]
 
        section_start = section['address']
        section_end = section_start + section['size']
 
        if base_addr < section_start or base_addr > section_end:
            continue
 
        if length > 0 and base_addr + length > section_end:
            continue
 
        return section
 
    return None
 
def get_section_data(filename, section_info):
    data = {}
    data['base_offset'] = None
    data['data'] = []
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section_info['name'] ]).split('\n'):
        if line[0:4] != '  0x':
            continue
 
        cols = line.split()
 
        if 'base_offset' is None:
            data['base_offset'] = int(cols[0][2:], 16)
            if data['base_offset'] != section_info['offset']:
                raise Exception('section "%s" has an unexpected starting address...' % ( section_info['name'] ))
 
        remain = section_info['size'] - len(data['data'])
        if remain > 16:
            r = range(16)
        else:
            r = range(remain)
 
        for i in r:
            byte = i % 4
            col = 1 + ((i - byte) / 4)
 
            o = byte * 2
            data['data'].append(int(cols[col][o:o+2], 16))
 
    return data
 
def get_symbol_info(filename, symbol):
    symbol_info = []
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
        fields = line.split()
 
        if len(fields) >= 7:
            try:
                fields[0] = int(fields[0][:-1])
                if fields[7] != symbol:
                    continue
 
                data = {}
                data['symbol_number'] = fields[0]
                data['value'] = int(fields[1], 16)
                data['size'] = int(fields[2], 10)
                data['type'] = fields[3]
                data['bind'] = fields[4]
                data['visibility'] = fields[5]
                data['Ndx'] = fields[6]
                data['name'] = fields[7]
                symbol_info.append(data)
            except:
                pass
 
    if not validate_symbol_info_list(symbol_info):
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 
    if len(symbol_info) == 0:
        raise Exception('symbol "%s" is not defined...' % (args.symbol))
 
    return symbol_info[0]
 
def validate_symbol_info_list(symbol_list):
    if len(symbol_list) > 1:
        for i in range(1, len(symbol_list)):
            for f in [ 'value', 'size', 'type', 'name']:
                if symbol_list[0][f] != symbol_list[i][f]:
                    return False
    return True
 
def get_symbol_data(filename, symbol):
    symbol_info = get_symbol_info(filename, symbol)
    section_info = get_section_container(filename, symbol_info['value'], symbol_info['size'])
    section_data = get_section_data(filename, section_info)
 
    offset = symbol_info['value'] - section_info['address']
    data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
 
    return data
 
data = get_symbol_data(args.filename, args.symbol)
 
if args.str:
    if data[-1] != 0:
        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
    data = ''.join(map(str, map(unichr, data[:-1])))
 
elif args.cbo:
    data = ''.join(map(str, map(chr, data)))
 
    n = len(data)
 
    if n % 8 == 0:
        t = 'q'
        s = 8
    elif len(data) % 4 == 0:
        t = 'i'
        s = 4
    elif len(data) % 2 == 0:
        t = 'h'
        s = 2
    elif len(data) == 1:
        t = 'c'
        s  = 1
    else:
        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
 
    n = (n - (n % s)) / s
 
    file_info = get_file_info(args.filename)
    if file_info['endianness'] == 'little':
        e = '<'
    elif file_info['endianness'] == 'big':
        e = '>'
    else:
        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
 
    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
    data = struct.pack('>%d%s' % ( n, t ), tmp)
 
else:
    data = ''.join(map(str, map(unichr, data)))
 
sys.stdout.write(data)

[edit] Example Usage

cat <<EOF | gcc -x c - -o eg
const char my_data[] = "test string";
void main(void) {}
EOF
 
./get_bin_info.py eg my_data -S

Get bin info.py

Latest revision as of 11:30, 14 April 2016

[edit] Example Usage

Personal tools

Namespaces

Variants

Views

Actions

Search

Navigation

Toolbox