Get bin info.py

From Attie's Wiki
(Difference between revisions)
Jump to: navigation, search
m (now operates with .rodata sections that are not aligned to a 16-byte boundary...)
m
 
(6 intermediate revisions by one user not shown)
Line 1: Line 1:
 
This script helps you extract the data associated with a symbol in a binary.
 
This script helps you extract the data associated with a symbol in a binary.
Remember that the output here will not necessarily be correctly ordered (see [http://en.wikipedia.org/wiki/Endianness Endianness]).
 
  
<source lang="bash">
+
<source lang="python">
#!/bin/bash -eu
+
#!/usr/bin/env python
 +
 
 +
import argparse
 +
import os, sys
 +
import subprocess
 +
import struct
 +
 
 +
ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
 +
ap.add_argument('filename', type=str, help='the file to process')
 +
ap.add_argument('symbol', type=str, help='the symbol to extract')
 +
ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
 +
ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
 +
args = ap.parse_args()
 +
 
 +
if not os.path.isfile(args.filename):
 +
    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
 +
 
 +
if args.str and args.cbo:
 +
    raise Exception('Arguments --cbo and --str are mutually exclusive...')
 +
 
 +
if 'CROSS_COMPILE' in os.environ:
 +
    cross_compile = os.environ['CROSS_COMPILE']
 +
else:
 +
    cross_compile = ''
 +
 
 +
bin = {}
 +
bin['readelf'] = '%sreadelf' % ( cross_compile )
 +
 
 +
def get_file_info(filename):
 +
    info = {}
 +
    info['class'] = None
 +
    info['data'] = None
 +
    info['type'] = None
 +
    info['machine'] = None
 +
    info['endianness'] = None
 +
 
 +
    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
 +
        fields = [ x.strip() for x in line.split(':', 1) ]
 +
 
 +
        if len(fields) != 2:
 +
            continue
 +
 
 +
        key = fields[0].lower()
 +
        value = fields[1]
 +
        if key in info:
 +
            info[key] = value
 +
 
 +
    if info['data'] != None:
 +
        if 'little endian' in info['data']:
 +
            info['endianness'] = 'little'
 +
        elif 'bit endian' in info['data']:
 +
            info['endianness'] = 'big'
 +
        else:
 +
            info['endianness'] = 'unknown'
 +
 
 +
    return info
 +
 
 +
def get_section_list(filename):
 +
    sections = {}
 +
    consume = False
 +
    field_layouts = {}
 +
 
 +
    field_layouts[10] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize',          'link', 'info', 'align' ]
 +
    field_layouts[11] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'flags', 'link', 'info', 'align' ]
 +
 
 +
    fields_int = {}
 +
    fields_int['section_number'] = 10
 +
    fields_int['address'] = 16
 +
    fields_int['offset'] = 16
 +
    fields_int['size'] = 16
 +
    fields_int['entsize'] = 16
 +
    fields_int['link'] = 10
 +
    fields_int['info'] = 10
 +
    fields_int['align'] = 10
 +
 
 +
    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'):
 +
        if not consume:
 +
            if line == '  [Nr] Name              Type            Addr    Off    Size  ES Flg Lk Inf Al':
 +
                consume = True
 +
                continue
 +
        else:
 +
            if line == 'Key to Flags:':
 +
                consume = False
 +
                continue
 +
 
 +
        if not consume:
 +
            continue
 +
 
 +
        fields = line.split()
 +
 
 +
        if '[' in fields:
 +
            fields.remove('[')
 +
        if len(fields) not in field_layouts:
 +
            continue
 +
        field_layout = field_layouts[len(fields)]
 +
 
 +
        try:
 +
            fields[0] = fields[0].translate(None, ''.join([ '[', ']' ]))
 +
 
 +
            info = {}
 +
            for i in range(len(fields)):
 +
                field_name = field_layout[i]
 +
 
 +
                if field_name in fields_int:
 +
                    field_value = int(fields[i], fields_int[field_name])
 +
                else:
 +
                    field_value = fields[i]
 +
 
 +
                info[field_name] = field_value
 +
 
 +
            sections[info['name']] = info;
 +
        except:
 +
            pass
 +
 
 +
    return sections
 +
 
 +
def get_section_info(filename, section):
 +
    sections = get_section_list(filename)
 +
 
 +
    if section in sections:
 +
        return sections[section]
 +
    return None
 +
 
 +
def get_section_container(filename, base_addr, length = 0):
 +
    sections = get_section_list(filename)
 +
 
 +
    for section_name in sections:
 +
        section = sections[section_name]
 +
 
 +
        section_start = section['address']
 +
        section_end = section_start + section['size']
 +
 
 +
        if base_addr < section_start or base_addr > section_end:
 +
            continue
 +
 
 +
        if length > 0 and base_addr + length > section_end:
 +
            continue
 +
 
 +
        return section
 +
 
 +
    return None
 +
 
 +
def get_section_data(filename, section_info):
 +
    data = {}
 +
    data['base_offset'] = None
 +
    data['data'] = []
 +
 
 +
    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section_info['name'] ]).split('\n'):
 +
        if line[0:4] != '  0x':
 +
            continue
 +
 
 +
        cols = line.split()
 +
 
 +
        if 'base_offset' is None:
 +
            data['base_offset'] = int(cols[0][2:], 16)
 +
            if data['base_offset'] != section_info['offset']:
 +
                raise Exception('section "%s" has an unexpected starting address...' % ( section_info['name'] ))
 +
 
 +
        remain = section_info['size'] - len(data['data'])
 +
        if remain > 16:
 +
            r = range(16)
 +
        else:
 +
            r = range(remain)
 +
 
 +
        for i in r:
 +
            byte = i % 4
 +
            col = 1 + ((i - byte) / 4)
 +
 
 +
            o = byte * 2
 +
            data['data'].append(int(cols[col][o:o+2], 16))
 +
 
 +
    return data
 +
 
 +
def get_symbol_info(filename, symbol):
 +
    symbol_info = []
 +
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
 +
        fields = line.split()
 +
 
 +
        if len(fields) >= 7:
 +
            try:
 +
                fields[0] = int(fields[0][:-1])
 +
                if fields[7] != symbol:
 +
                    continue
 +
 
 +
                data = {}
 +
                data['symbol_number'] = fields[0]
 +
                data['value'] = int(fields[1], 16)
 +
                data['size'] = int(fields[2], 10)
 +
                data['type'] = fields[3]
 +
                data['bind'] = fields[4]
 +
                data['visibility'] = fields[5]
 +
                data['Ndx'] = fields[6]
 +
                data['name'] = fields[7]
 +
                symbol_info.append(data)
 +
            except:
 +
                pass
 +
 
 +
    if not validate_symbol_info_list(symbol_info):
 +
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 +
 
 +
    if len(symbol_info) == 0:
 +
        raise Exception('symbol "%s" is not defined...' % (args.symbol))
  
if [ -z ${CROSS_COMPILE+x} ]; then
+
    return symbol_info[0]
CROSS_COMPILE=""
+
fi
+
  
FILE=$1; shift
+
def validate_symbol_info_list(symbol_list):
SYMBOL=$1; shift
+
    if len(symbol_list) > 1:
 +
        for i in range(1, len(symbol_list)):
 +
            for f in [ 'value', 'size', 'type', 'name']:
 +
                if symbol_list[0][f] != symbol_list[i][f]:
 +
                    return False
 +
    return True
  
if [ ! -e ${FILE} ]; then
+
def get_symbol_data(filename, symbol):
echo "File '${FILE}' does not exist..." >&2
+
    symbol_info = get_symbol_info(filename, symbol)
exit 1
+
    section_info = get_section_container(filename, symbol_info['value'], symbol_info['size'])
fi
+
    section_data = get_section_data(filename, section_info)
  
# get the symbol information
+
    offset = symbol_info['value'] - section_info['address']
SYM_INFO=$(${CROSS_COMPILE}readelf ${FILE} --syms | grep "${SYMBOL}\$" || true)
+
    data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
if [ "${SYM_INFO}" == "" ]; then
+
echo "Symbol '${SYMBOL}' does not exist..." >&2
+
exit 1
+
fi
+
  
# validate the symbols (it may have been present in more than one symbol table)
+
    return data
echo "${SYM_INFO}" | awk 'BEGIN{a=-1}{v=strtonum("0x"$2); if (a==-1) a=v; else if (a!=v) exit(1)}' || (
+
echo "Symbol '${SYMBOL}' has conflicting definitions..." >&2
+
echo "${SYM_INFO}" >&2
+
exit 1
+
)
+
SYM_INFO=$(echo "${SYM_INFO}" | head -n 1)
+
  
# get the offset of the symbol's data (in hex), rounded down to the closes 16-byte boundary
+
data = get_symbol_data(args.filename, args.symbol)
SYM_OFFSET_ALIGNED=$(echo "${SYM_INFO}" | awk '{v=strtonum("0x"$2); printf("0x%08x", v - (v % 16))}')
+
  
# get the offset of the symbol's data (in decomal), into this 16-byte aligned data
+
if args.str:
SYM_OFFSET_INTO_ALIGNED_DATA=$(echo "${SYM_INFO}" | awk '{v=strtonum("0x"$2); printf("%d", (v % 16) * 2)}')
+
    if data[-1] != 0:
 +
        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
 +
    data = ''.join(map(str, map(unichr, data[:-1])))
  
# get the size of the symbol's data (in decimal), as nibbles / hex digits
+
elif args.cbo:
SYM_SIZE_IN_NIBBLES=$(echo "${SYM_INFO}" | awk '{v=strtonum($3); printf "%d",v * 2}')
+
    data = ''.join(map(str, map(chr, data)))
  
# get the number of 16-byte rows that this symbol spans
+
    n = len(data)
SYM_ROWS_SPANNED=$(echo "${SYM_INFO}" | awk '{v=strtonum($3); printf("%d", ((v - (v % 16)) / 16))}')
+
  
# get the section's full data
+
    if n % 8 == 0:
SECTION_DATA=$(${CROSS_COMPILE}readelf ${FILE} --hex-dump .rodata | tail -n +3 | head -n -1)
+
        t = 'q'
 +
        s = 8
 +
    elif len(data) % 4 == 0:
 +
        t = 'i'
 +
        s = 4
 +
    elif len(data) % 2 == 0:
 +
        t = 'h'
 +
        s = 2
 +
    elif len(data) == 1:
 +
        t = 'c'
 +
        s  = 1
 +
    else:
 +
        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
  
# get the alignment of the .rodata section with respect to the 16-byte boundary that we're working with
+
    n = (n - (n % s)) / s
SECTION_ALIGNMENT=$(echo "${SECTION_DATA}" 2>/dev/null | head -n 1 | awk '{v=strtonum($1); printf("%d", (v % 16))}')
+
  
# adjust the symbol's aligned offset with the section's alignment
+
    file_info = get_file_info(args.filename)
SYM_OFFSET_REALIGNED=$(echo "${SYM_OFFSET_ALIGNED} ${SECTION_ALIGNMENT}" | awk '{o=strtonum($1); a=strtonum($2); printf("0x%08x", o + a)}')
+
    if file_info['endianness'] == 'little':
 +
        e = '<'
 +
    elif file_info['endianness'] == 'big':
 +
        e = '>'
 +
    else:
 +
        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
  
# get the rows for the interesting region of the file
+
    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
REGION_DATA=$(${CROSS_COMPILE}readelf ${FILE} --hex-dump .rodata | grep "${SYM_OFFSET_REALIGNED}" -A${SYM_ROWS_SPANNED} | cut -b14-48 | tr -d ' \n')
+
    data = struct.pack('>%d%s' % ( n, t ), tmp)
  
# snip out the symbol's data
+
else:
DATA=$(echo -n "${REGION_DATA}" | cut -b $((SYM_OFFSET_INTO_ALIGNED_DATA + 1))-$((SYM_OFFSET_INTO_ALIGNED_DATA + SYM_SIZE_IN_NIBBLES)))
+
    data = ''.join(map(str, map(unichr, data)))
  
# print out the symbol's data...
+
sys.stdout.write(data)
## ... as hex digits
+
#echo ${DATA}
+
## ... as raw binary data
+
echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{printf "%c", strtonum("0x"$0)}'
+
## ... rendered by hexdump
+
#echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{printf "%c", strtonum("0x"$0)}' | hexdump -C
+
## ... a nul-terminated C string (without the nul)
+
#echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{v=strtonum("0x"$0); if (v==0) exit(0); printf "%c", v}'
+
 
</source>
 
</source>
  
Line 77: Line 278:
 
EOF
 
EOF
  
./get_bin_info.sh eg my_data
+
./get_bin_info.py eg my_data -S
 
</source>
 
</source>

Latest revision as of 11:30, 14 April 2016

This script helps you extract the data associated with a symbol in a binary.

#!/usr/bin/env python
 
import argparse
import os, sys
import subprocess
import struct
 
ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
ap.add_argument('filename', type=str, help='the file to process')
ap.add_argument('symbol', type=str, help='the symbol to extract')
ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
args = ap.parse_args()
 
if not os.path.isfile(args.filename):
    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
 
if args.str and args.cbo:
    raise Exception('Arguments --cbo and --str are mutually exclusive...')
 
if 'CROSS_COMPILE' in os.environ:
    cross_compile = os.environ['CROSS_COMPILE']
else:
    cross_compile = ''
 
bin = {}
bin['readelf'] = '%sreadelf' % ( cross_compile )
 
def get_file_info(filename):
    info = {}
    info['class'] = None
    info['data'] = None
    info['type'] = None
    info['machine'] = None
    info['endianness'] = None
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
        fields = [ x.strip() for x in line.split(':', 1) ]
 
        if len(fields) != 2:
            continue
 
        key = fields[0].lower()
        value = fields[1]
        if key in info:
            info[key] = value
 
    if info['data'] != None:
        if 'little endian' in info['data']:
            info['endianness'] = 'little'
        elif 'bit endian' in info['data']:
            info['endianness'] = 'big'
        else:
            info['endianness'] = 'unknown'
 
    return info
 
def get_section_list(filename):
    sections = {}
    consume = False
    field_layouts = {}
 
    field_layouts[10] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize',          'link', 'info', 'align' ]
    field_layouts[11] = [ 'section_number', 'name', 'type', 'address', 'offset', 'size', 'entsize', 'flags', 'link', 'info', 'align' ]
 
    fields_int = {}
    fields_int['section_number'] = 10
    fields_int['address'] = 16
    fields_int['offset'] = 16
    fields_int['size'] = 16
    fields_int['entsize'] = 16
    fields_int['link'] = 10
    fields_int['info'] = 10
    fields_int['align'] = 10
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers', '--wide' ]).split('\n'):
        if not consume:
            if line == '  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al':
                consume = True
                continue
        else:
            if line == 'Key to Flags:':
                consume = False
                continue
 
        if not consume:
            continue
 
        fields = line.split()
 
        if '[' in fields:
            fields.remove('[')
        if len(fields) not in field_layouts:
            continue
        field_layout = field_layouts[len(fields)]
 
        try:
            fields[0] = fields[0].translate(None, ''.join([ '[', ']' ]))
 
            info = {}
            for i in range(len(fields)):
                field_name = field_layout[i]
 
                if field_name in fields_int:
                    field_value = int(fields[i], fields_int[field_name])
                else:
                    field_value = fields[i]
 
                info[field_name] = field_value
 
            sections[info['name']] = info;
        except:
            pass
 
    return sections
 
def get_section_info(filename, section):
    sections = get_section_list(filename)
 
    if section in sections:
        return sections[section]
    return None
 
def get_section_container(filename, base_addr, length = 0):
    sections = get_section_list(filename)
 
    for section_name in sections:
        section = sections[section_name]
 
        section_start = section['address']
        section_end = section_start + section['size']
 
        if base_addr < section_start or base_addr > section_end:
            continue
 
        if length > 0 and base_addr + length > section_end:
            continue
 
        return section
 
    return None
 
def get_section_data(filename, section_info):
    data = {}
    data['base_offset'] = None
    data['data'] = []
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section_info['name'] ]).split('\n'):
        if line[0:4] != '  0x':
            continue
 
        cols = line.split()
 
        if 'base_offset' is None:
            data['base_offset'] = int(cols[0][2:], 16)
            if data['base_offset'] != section_info['offset']:
                raise Exception('section "%s" has an unexpected starting address...' % ( section_info['name'] ))
 
        remain = section_info['size'] - len(data['data'])
        if remain > 16:
            r = range(16)
        else:
            r = range(remain)
 
        for i in r:
            byte = i % 4
            col = 1 + ((i - byte) / 4)
 
            o = byte * 2
            data['data'].append(int(cols[col][o:o+2], 16))
 
    return data
 
def get_symbol_info(filename, symbol):
    symbol_info = []
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
        fields = line.split()
 
        if len(fields) >= 7:
            try:
                fields[0] = int(fields[0][:-1])
                if fields[7] != symbol:
                    continue
 
                data = {}
                data['symbol_number'] = fields[0]
                data['value'] = int(fields[1], 16)
                data['size'] = int(fields[2], 10)
                data['type'] = fields[3]
                data['bind'] = fields[4]
                data['visibility'] = fields[5]
                data['Ndx'] = fields[6]
                data['name'] = fields[7]
                symbol_info.append(data)
            except:
                pass
 
    if not validate_symbol_info_list(symbol_info):
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 
    if len(symbol_info) == 0:
        raise Exception('symbol "%s" is not defined...' % (args.symbol))
 
    return symbol_info[0]
 
def validate_symbol_info_list(symbol_list):
    if len(symbol_list) > 1:
        for i in range(1, len(symbol_list)):
            for f in [ 'value', 'size', 'type', 'name']:
                if symbol_list[0][f] != symbol_list[i][f]:
                    return False
    return True
 
def get_symbol_data(filename, symbol):
    symbol_info = get_symbol_info(filename, symbol)
    section_info = get_section_container(filename, symbol_info['value'], symbol_info['size'])
    section_data = get_section_data(filename, section_info)
 
    offset = symbol_info['value'] - section_info['address']
    data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
 
    return data
 
data = get_symbol_data(args.filename, args.symbol)
 
if args.str:
    if data[-1] != 0:
        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
    data = ''.join(map(str, map(unichr, data[:-1])))
 
elif args.cbo:
    data = ''.join(map(str, map(chr, data)))
 
    n = len(data)
 
    if n % 8 == 0:
        t = 'q'
        s = 8
    elif len(data) % 4 == 0:
        t = 'i'
        s = 4
    elif len(data) % 2 == 0:
        t = 'h'
        s = 2
    elif len(data) == 1:
        t = 'c'
        s  = 1
    else:
        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
 
    n = (n - (n % s)) / s
 
    file_info = get_file_info(args.filename)
    if file_info['endianness'] == 'little':
        e = '<'
    elif file_info['endianness'] == 'big':
        e = '>'
    else:
        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
 
    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
    data = struct.pack('>%d%s' % ( n, t ), tmp)
 
else:
    data = ''.join(map(str, map(unichr, data)))
 
sys.stdout.write(data)

[edit] Example Usage

cat <<EOF | gcc -x c - -o eg
const char my_data[] = "test string";
void main(void) {}
EOF
 
./get_bin_info.py eg my_data -S
Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox