Get bin info.py

From Attie's Wiki
(Difference between revisions)
Jump to: navigation, search
m (fixes and example usage)
m
Line 58: Line 58:
  
 
     return info
 
     return info
 +
 +
def get_sym_info(filename, symbol):
 +
    sym_info = []
 +
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
 +
        fields = line.split()
 +
 +
        if len(fields) >= 7:
 +
            try:
 +
                fields[0] = int(fields[0][:-1])
 +
                if fields[7] != symbol:
 +
                    continue
 +
 +
                data = {}
 +
                data['symbol_number'] = fields[0]
 +
                data['value'] = int(fields[1], 16)
 +
                data['size'] = int(fields[2], 10)
 +
                data['type'] = fields[3]
 +
                data['bind'] = fields[4]
 +
                data['visibility'] = fields[5]
 +
                data['Ndx'] = fields[6]
 +
                data['name'] = fields[7]
 +
                sym_info.append(data)
 +
            except:
 +
                pass
 +
 +
    if not validate_sym_info_list(sym_info):
 +
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 +
 +
    return sym_info[0]
 +
 +
def validate_sym_info_list(symbol_list):
 +
    if len(symbol_list) > 1:
 +
        for i in range(1, len(symbol_list)):
 +
            for f in [ 'value', 'size', 'type', 'name']:
 +
                if symbol_list[0][f] != symbol_list[i][f]:
 +
                    return False
 +
    return True
  
 
def get_section_info(filename, section):
 
def get_section_info(filename, section):
Line 121: Line 158:
  
 
     return data
 
     return data
 
def get_sym_info(filename, symbol):
 
    sym_info = []
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
 
        fields = line.split()
 
 
        if len(fields) >= 7:
 
            try:
 
                fields[0] = int(fields[0][:-1])
 
                if fields[7] != symbol:
 
                    continue
 
 
                data = {}
 
                data['symbol_number'] = fields[0]
 
                data['value'] = int(fields[1], 16)
 
                data['size'] = int(fields[2], 10)
 
                data['type'] = fields[3]
 
                data['bind'] = fields[4]
 
                data['visibility'] = fields[5]
 
                data['Ndx'] = fields[6]
 
                data['name'] = fields[7]
 
                sym_info.append(data)
 
            except:
 
                pass
 
 
    if not validate_sym_info_list(sym_info):
 
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 
 
    return sym_info[0]
 
 
def validate_sym_info_list(symbol_list):
 
    if len(symbol_list) > 1:
 
        for i in range(1, len(symbol_list)):
 
            for f in [ 'value', 'size', 'type', 'name']:
 
                if symbol_list[0][f] != symbol_list[i][f]:
 
                    return False
 
    return True
 
  
 
file_info = get_file_info(args.filename)
 
file_info = get_file_info(args.filename)

Revision as of 13:41, 13 April 2016

This script helps you extract the data associated with a symbol in a binary.

#!/usr/bin/env python
 
import argparse
import os, sys
import subprocess
import struct
 
ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
ap.add_argument('filename', type=str, help='the file to process')
ap.add_argument('symbol', type=str, help='the symbol to extract')
ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
args = ap.parse_args()
 
if not os.path.isfile(args.filename):
    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
 
if args.str and args.cbo:
    raise Exception('Arguments --cbo and --str are mutually exclusive...')
 
if 'CROSS_COMPILE' in os.environ:
    cross_compile = os.environ['CROSS_COMPILE']
else:
    cross_compile = ''
 
bin = {}
bin['readelf'] = '%sreadelf' % ( cross_compile )
 
def get_file_info(filename):
    info = {}
    info['class'] = None
    info['data'] = None
    info['type'] = None
    info['machine'] = None
    info['endianness'] = None
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
        fields = [ x.strip() for x in line.split(':', 1) ]
 
        if len(fields) != 2:
            continue
 
        key = fields[0].lower()
        value = fields[1]
        if key in info:
            info[key] = value
 
    if info['data'] != None:
        if 'little endian' in info['data']:
            info['endianness'] = 'little'
        elif 'bit endian' in info['data']:
            info['endianness'] = 'big'
        else:
            info['endianness'] = 'unknown'
 
    return info
 
def get_sym_info(filename, symbol):
    sym_info = []
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
        fields = line.split()
 
        if len(fields) >= 7:
            try:
                fields[0] = int(fields[0][:-1])
                if fields[7] != symbol:
                    continue
 
                data = {}
                data['symbol_number'] = fields[0]
                data['value'] = int(fields[1], 16)
                data['size'] = int(fields[2], 10)
                data['type'] = fields[3]
                data['bind'] = fields[4]
                data['visibility'] = fields[5]
                data['Ndx'] = fields[6]
                data['name'] = fields[7]
                sym_info.append(data)
            except:
                pass
 
    if not validate_sym_info_list(sym_info):
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 
    return sym_info[0]
 
def validate_sym_info_list(symbol_list):
    if len(symbol_list) > 1:
        for i in range(1, len(symbol_list)):
            for f in [ 'value', 'size', 'type', 'name']:
                if symbol_list[0][f] != symbol_list[i][f]:
                    return False
    return True
 
def get_section_info(filename, section):
    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'):
        fields = line.split()
        if '[' in fields:
            fields.remove('[')
        if len(fields) != 11:
            continue
        try:
            fields[0] = int(fields[0].translate(None, ''.join([ '[', ']' ])))
            if fields[1] != section:
                continue
 
            info = {}
            info['section_number'] = fields[0]
            info['name'] = fields[1]
            info['type'] = fields[2]
            info['address'] = int(fields[3], 16)
            info['offset'] = int(fields[4], 16)
            info['size'] = int(fields[5], 16)
            info['entsize'] = fields[6]
            info['flags'] = fields[7]
            info['link'] = fields[8]
            info['info'] = fields[9]
            info['align'] = fields[10]
            return info
        except:
            pass
    return None
 
def get_section_data(filename, section, section_info=None):
    if section_info is None:
        section_info = get_section_info(filename, section)
 
    data = {}
    data['base_offset'] = None
    data['data'] = []
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section ]).split('\n'):
        if line[0:4] != '  0x':
            continue
 
        cols = line.split()
 
        if 'base_offset' is None:
            data['base_offset'] = int(cols[0][2:], 16)
            if data['base_offset'] != section_info['offset']:
                raise Exception('section "%s" has an unexpected starting address...' % ( section ))
 
        remain = section_info['size'] - len(data['data'])
        if remain > 16:
            r = range(16)
        else:
            r = range(remain)
 
        for i in r:
            byte = i % 4
            col = 1 + ((i - byte) / 4)
 
            o = byte * 2
            data['data'].append(int(cols[col][o:o+2], 16))
 
    return data
 
file_info = get_file_info(args.filename)
symbol_info = get_sym_info(args.filename, args.symbol)
section_info = get_section_info(args.filename, '.rodata')
section_data = get_section_data(args.filename, '.rodata', section_info=section_info)
 
offset = symbol_info['value'] - section_info['address']
data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
 
if args.str:
    if data[-1] != 0:
        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
    data = ''.join(map(str, map(unichr, data[:-1])))
 
elif args.cbo:
    data = ''.join(map(str, map(unichr, data)))
    n = len(data)
 
    if n % 8 == 0:
        t = 'd'
        s = 8
    elif len(data) % 4 == 0:
        t = 'i'
        s = 4
    elif len(data) % 2 == 0:
        t = 'h'
        s = 2
    elif len(data) == 1:
        t = 'c'
        s  = 1
    else:
        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
 
    n = (n - (n % s)) / s
 
    if file_info['endianness'] == 'little':
        e = '<'
    elif file_info['endianness'] == 'big':
        e = '>'
    else:
        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
 
    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
    data = struct.pack('>%d%s' % ( n, t ), tmp)
 
else:
    data = ''.join(map(str, map(unichr, data)))
 
sys.stdout.write(data)

Example Usage

cat <<EOF | gcc -x c - -o eg
const char my_data[] = "test string";
void main(void) {}
EOF
 
./get_bin_info.py eg my_data -S
Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox