Get bin info.py

From Attie's Wiki
(Difference between revisions)
Jump to: navigation, search
m
m
Line 1: Line 1:
 
This script helps you extract the data associated with a symbol in a binary.
 
This script helps you extract the data associated with a symbol in a binary.
Remember that the output here will not necessarily be correctly ordered (see [http://en.wikipedia.org/wiki/Endianness Endianness]).
 
  
<source lang="bash">
+
<source lang="python">
#!/bin/bash -eu
+
#!/usr/bin/env python
  
if [ -z ${CROSS_COMPILE+x} ]; then
+
import argparse
CROSS_COMPILE=""
+
import os, sys
fi
+
import subprocess
 +
import struct
  
FILE=$1; shift
+
ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
SYMBOL=$1; shift
+
ap.add_argument('filename', type=str, help='the file to process')
 +
ap.add_argument('symbol', type=str, help='the symbol to extract')
 +
ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
 +
ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
 +
args = ap.parse_args()
  
if [ ! -e ${FILE} ]; then
+
if not os.path.isfile(args.filename):
echo "File '${FILE}' does not exist..." >&2
+
    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
exit 1
+
fi
+
  
# get the symbol information
+
if args.str and args.cbo:
SYM_INFO=$(${CROSS_COMPILE}readelf ${FILE} --syms | grep "${SYMBOL}\$" || true)
+
    raise Exception('Arguments --cbo and --str are mutually exclusive...')
if [ "${SYM_INFO}" == "" ]; then
+
echo "Symbol '${SYMBOL}' does not exist..." >&2
+
exit 1
+
fi
+
  
# validate the symbols (it may have been present in more than one symbol table)
+
if 'CROSS_COMPILE' in os.environ:
echo "${SYM_INFO}" | awk 'BEGIN{a=-1}{v=strtonum("0x"$2); if (a==-1) a=v; else if (a!=v) exit(1)}' || (
+
    cross_compile = os.environ['CROSS_COMPILE']
echo "Symbol '${SYMBOL}' has conflicting definitions..." >&2
+
else:
echo "${SYM_INFO}" >&2
+
    cross_compile = ''
exit 1
+
)
+
SYM_INFO=$(echo "${SYM_INFO}" | head -n 1)
+
  
# get the offset of the symbol's data (in hex), rounded down to the closes 16-byte boundary
+
bin = {}
SYM_OFFSET_ALIGNED=$(echo "${SYM_INFO}" | awk '{v=strtonum("0x"$2); printf("0x%08x", v - (v % 16))}')
+
bin['readelf'] = '%sreadelf' % ( cross_compile )
  
# get the offset of the symbol's data (in decomal), into this 16-byte aligned data
+
def get_file_info(filename):
SYM_OFFSET_INTO_ALIGNED_DATA=$(echo "${SYM_INFO}" | awk '{v=strtonum("0x"$2); printf("%d", (v % 16) * 2)}')
+
    info = {}
 +
    info['class'] = None
 +
    info['data'] = None
 +
    info['type'] = None
 +
    info['machine'] = None
 +
    info['endianness'] = None
  
# get the size of the symbol's data (in decimal), as nibbles / hex digits
+
    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
SYM_SIZE_IN_NIBBLES=$(echo "${SYM_INFO}" | awk '{v=strtonum($3); printf("%d",v * 2)}')
+
        fields = [ x.strip() for x in line.split(':', 1) ]
  
# get the number of 16-byte rows that this symbol spans
+
        if len(fields) != 2:
SYM_ROWS_SPANNED=$(echo "${SYM_INFO}" | awk '{v=strtonum($3); printf("%d", ((v - (v % 16)) / 16))}')
+
            continue
  
# get the section's full data
+
        key = fields[0].lower()
SECTION_DATA=$(${CROSS_COMPILE}readelf ${FILE} --hex-dump .rodata | tail -n +3 | head -n -1)
+
        value = fields[1]
 +
        if key in info:
 +
            info[key] = value
  
# get the alignment of the .rodata section with respect to the 16-byte boundary that we're working with
+
    if info['data'] != None:
SECTION_ALIGNMENT=$(echo "${SECTION_DATA}" 2>/dev/null | head -n 1 | awk '{v=strtonum($1); printf("%d", (v % 16))}')
+
        if 'little endian' in info['data']:
 +
            info['endianness'] = 'little'
 +
        elif 'bit endian' in info['data']:
 +
            info['endianness'] = 'big'
 +
        else:
 +
            info['endianness'] = 'unknown'
  
# adjust the symbol's aligned offset with the section's alignment
+
    return info
SYM_OFFSET_REALIGNED=$(echo "${SYM_OFFSET_ALIGNED} ${SECTION_ALIGNMENT}" | awk '{o=strtonum($1); a=strtonum($2); printf("0x%08x", o + a)}')
+
  
# get the rows for the interesting region of the file
+
def get_section_info(filename, section):
REGION_DATA=$(${CROSS_COMPILE}readelf ${FILE} --hex-dump .rodata | grep "${SYM_OFFSET_REALIGNED}" -A${SYM_ROWS_SPANNED} | cut -b14-48 | tr -d ' \n')
+
    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers' ]).split('\n'):
 +
        fields = line.split()
 +
        if '[' in fields:
 +
            fields.remove('[')
 +
        if len(fields) != 11:
 +
            continue
 +
        try:
 +
            fields[0] = int(fields[0].translate(None, ''.join([ '[', ']' ])))
 +
            if fields[1] != section:
 +
                continue
  
# snip out the symbol's data
+
            info = {}
DATA=$(echo -n "${REGION_DATA}" | cut -b $((SYM_OFFSET_INTO_ALIGNED_DATA + 1))-$((SYM_OFFSET_INTO_ALIGNED_DATA + SYM_SIZE_IN_NIBBLES)))
+
            info['section_number'] = fields[0]
 +
            info['name'] = fields[1]
 +
            info['type'] = fields[2]
 +
            info['address'] = int(fields[3], 16)
 +
            info['offset'] = int(fields[4], 16)
 +
            info['size'] = int(fields[5], 16)
 +
            info['ES'] = fields[6]
 +
            info['Flg'] = fields[7]
 +
            info['Lk'] = fields[8]
 +
            info['Inf'] = fields[9]
 +
            info['Al'] = fields[10]
 +
            return info
 +
        except:
 +
            pass
 +
    return None
  
# print out the symbol's data...
+
def get_section_data(filename, section, section_info=None):
## ... as hex digits
+
    if section_info is None:
#echo ${DATA}
+
        section_info = get_section_info(filename, sectioN)
## ... as raw binary data
+
echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{printf "%c", strtonum("0x"$0)}'
+
## ... rendered by hexdump
+
#echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{printf "%c", strtonum("0x"$0)}' | hexdump -C
+
## ... a nul-terminated C string (without the nul)
+
#echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{v=strtonum("0x"$0); if (v==0) exit(0); printf "%c", v}'
+
</source>
+
  
==Example Usage==
+
    data = {}
<source lang="bash">
+
    data['base_offset'] = None
cat <<EOF | gcc -x c - -o eg
+
    data['data'] = []
const char my_data[] = "test string";
+
void main(void) {}
+
EOF
+
  
./get_bin_info.sh eg my_data
+
    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section ]).split('\n'):
 +
        if line[0:4] != '  0x':
 +
            continue
 +
 
 +
        cols = line.split()
 +
 
 +
        if 'base_offset' is None:
 +
            data['base_offset'] = int(cols[0][2:], 16)
 +
            if data['base_offset'] != section_info['offset']:
 +
                raise Exception('section "%s" has an unexpected starting address...' % ( section ))
 +
 
 +
        remain = section_info['size'] - len(data['data'])
 +
        if remain > 16:
 +
            r = range(16)
 +
        else:
 +
            r = range(remain)
 +
 
 +
        for i in r:
 +
            byte = i % 4
 +
            col = 1 + ((i - byte) / 4)
 +
 
 +
            o = byte * 2
 +
            data['data'].append(int(cols[col][o:o+2], 16))
 +
 
 +
    return data
 +
 
 +
def get_sym_info(filename, symbol):
 +
    sym_info = []
 +
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
 +
        fields = line.split()
 +
 
 +
        if len(fields) >= 7:
 +
            try:
 +
                fields[0] = int(fields[0][:-1])
 +
                if fields[7] != symbol:
 +
                    continue
 +
 
 +
                data = {}
 +
                data['symbol_number'] = fields[0]
 +
                data['value'] = int(fields[1], 16)
 +
                data['size'] = int(fields[2], 10)
 +
                data['type'] = fields[3]
 +
                data['bind'] = fields[4]
 +
                data['visibility'] = fields[5]
 +
                data['Ndx'] = fields[6]
 +
                data['name'] = fields[7]
 +
                sym_info.append(data)
 +
            except:
 +
                pass
 +
 
 +
    if not validate_sym_info_list(sym_info):
 +
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 +
 
 +
    return sym_info[0]
 +
 
 +
def validate_sym_info_list(symbol_list):
 +
    if len(symbol_list) > 1:
 +
        for i in range(1, len(symbol_list)):
 +
            for f in [ 'value', 'size', 'type', 'name']:
 +
                if symbol_list[0][f] != symbol_list[i][f]:
 +
                    return False
 +
    return True
 +
 
 +
file_info = get_file_info(args.filename)
 +
symbol_info = get_sym_info(args.filename, args.symbol)
 +
section_info = get_section_info(args.filename, '.rodata')
 +
section_data = get_section_data(args.filename, '.rodata', section_info=section_info)
 +
 
 +
offset = symbol_info['value'] - section_info['offset']
 +
data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
 +
 
 +
if args.str:
 +
    if data[-1] != 0:
 +
        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
 +
    data = ''.join(map(str, map(unichr, data[:-1])))
 +
 
 +
elif args.cbo:
 +
    data = ''.join(map(str, map(unichr, data)))
 +
    n = len(data)
 +
 
 +
    if n % 8 == 0:
 +
        t = 'd'
 +
        s = 8
 +
    elif len(data) % 4 == 0:
 +
        t = 'i'
 +
        s = 4
 +
    elif len(data) % 2 == 0:
 +
        t = 'h'
 +
        s = 2
 +
    elif len(data) == 1:
 +
        t = 'c'
 +
        s  = 1
 +
    else:
 +
        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
 +
 
 +
    n = (n - (n % s)) / s
 +
 
 +
    if file_info['endianness'] == 'little':
 +
        e = '<'
 +
    elif file_info['endianness'] == 'big':
 +
        e = '>'
 +
    else:
 +
        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
 +
 
 +
    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
 +
    data = struct.pack('>%d%s' % ( n, t ), tmp)
 +
 
 +
else:
 +
    data = ''.join(map(str, map(unichr, data)))
 +
 
 +
sys.stdout.write(data)
 
</source>
 
</source>

Revision as of 13:14, 16 October 2015

This script helps you extract the data associated with a symbol in a binary.

#!/usr/bin/env python
 
import argparse
import os, sys
import subprocess
import struct
 
ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
ap.add_argument('filename', type=str, help='the file to process')
ap.add_argument('symbol', type=str, help='the symbol to extract')
ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
args = ap.parse_args()
 
if not os.path.isfile(args.filename):
    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
 
if args.str and args.cbo:
    raise Exception('Arguments --cbo and --str are mutually exclusive...')
 
if 'CROSS_COMPILE' in os.environ:
    cross_compile = os.environ['CROSS_COMPILE']
else:
    cross_compile = ''
 
bin = {}
bin['readelf'] = '%sreadelf' % ( cross_compile )
 
def get_file_info(filename):
    info = {}
    info['class'] = None
    info['data'] = None
    info['type'] = None
    info['machine'] = None
    info['endianness'] = None
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
        fields = [ x.strip() for x in line.split(':', 1) ]
 
        if len(fields) != 2:
            continue
 
        key = fields[0].lower()
        value = fields[1]
        if key in info:
            info[key] = value
 
    if info['data'] != None:
        if 'little endian' in info['data']:
            info['endianness'] = 'little'
        elif 'bit endian' in info['data']:
            info['endianness'] = 'big'
        else:
            info['endianness'] = 'unknown'
 
    return info
 
def get_section_info(filename, section):
    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers' ]).split('\n'):
        fields = line.split()
        if '[' in fields:
            fields.remove('[')
        if len(fields) != 11:
            continue
        try:
            fields[0] = int(fields[0].translate(None, ''.join([ '[', ']' ])))
            if fields[1] != section:
                continue
 
            info = {}
            info['section_number'] = fields[0]
            info['name'] = fields[1]
            info['type'] = fields[2]
            info['address'] = int(fields[3], 16)
            info['offset'] = int(fields[4], 16)
            info['size'] = int(fields[5], 16)
            info['ES'] = fields[6]
            info['Flg'] = fields[7]
            info['Lk'] = fields[8]
            info['Inf'] = fields[9]
            info['Al'] = fields[10]
            return info
        except:
            pass
    return None
 
def get_section_data(filename, section, section_info=None):
    if section_info is None:
        section_info = get_section_info(filename, sectioN)
 
    data = {}
    data['base_offset'] = None
    data['data'] = []
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section ]).split('\n'):
        if line[0:4] != '  0x':
            continue
 
        cols = line.split()
 
        if 'base_offset' is None:
            data['base_offset'] = int(cols[0][2:], 16)
            if data['base_offset'] != section_info['offset']:
                raise Exception('section "%s" has an unexpected starting address...' % ( section ))
 
        remain = section_info['size'] - len(data['data'])
        if remain > 16:
            r = range(16)
        else:
            r = range(remain)
 
        for i in r:
            byte = i % 4
            col = 1 + ((i - byte) / 4)
 
            o = byte * 2
            data['data'].append(int(cols[col][o:o+2], 16))
 
    return data
 
def get_sym_info(filename, symbol):
    sym_info = []
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
        fields = line.split()
 
        if len(fields) >= 7:
            try:
                fields[0] = int(fields[0][:-1])
                if fields[7] != symbol:
                    continue
 
                data = {}
                data['symbol_number'] = fields[0]
                data['value'] = int(fields[1], 16)
                data['size'] = int(fields[2], 10)
                data['type'] = fields[3]
                data['bind'] = fields[4]
                data['visibility'] = fields[5]
                data['Ndx'] = fields[6]
                data['name'] = fields[7]
                sym_info.append(data)
            except:
                pass
 
    if not validate_sym_info_list(sym_info):
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 
    return sym_info[0]
 
def validate_sym_info_list(symbol_list):
    if len(symbol_list) > 1:
        for i in range(1, len(symbol_list)):
            for f in [ 'value', 'size', 'type', 'name']:
                if symbol_list[0][f] != symbol_list[i][f]:
                    return False
    return True
 
file_info = get_file_info(args.filename)
symbol_info = get_sym_info(args.filename, args.symbol)
section_info = get_section_info(args.filename, '.rodata')
section_data = get_section_data(args.filename, '.rodata', section_info=section_info)
 
offset = symbol_info['value'] - section_info['offset']
data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
 
if args.str:
    if data[-1] != 0:
        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
    data = ''.join(map(str, map(unichr, data[:-1])))
 
elif args.cbo:
    data = ''.join(map(str, map(unichr, data)))
    n = len(data)
 
    if n % 8 == 0:
        t = 'd'
        s = 8
    elif len(data) % 4 == 0:
        t = 'i'
        s = 4
    elif len(data) % 2 == 0:
        t = 'h'
        s = 2
    elif len(data) == 1:
        t = 'c'
        s  = 1
    else:
        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
 
    n = (n - (n % s)) / s
 
    if file_info['endianness'] == 'little':
        e = '<'
    elif file_info['endianness'] == 'big':
        e = '>'
    else:
        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
 
    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
    data = struct.pack('>%d%s' % ( n, t ), tmp)
 
else:
    data = ''.join(map(str, map(unichr, data)))
 
sys.stdout.write(data)
Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox