Get bin info.py

Revision as of 13:14, 16 October 2015

This script helps you extract the data associated with a symbol in a binary.

#!/usr/bin/env python
 
import argparse
import os, sys
import subprocess
import struct
 
ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
ap.add_argument('filename', type=str, help='the file to process')
ap.add_argument('symbol', type=str, help='the symbol to extract')
ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
args = ap.parse_args()
 
if not os.path.isfile(args.filename):
    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
 
if args.str and args.cbo:
    raise Exception('Arguments --cbo and --str are mutually exclusive...')
 
if 'CROSS_COMPILE' in os.environ:
    cross_compile = os.environ['CROSS_COMPILE']
else:
    cross_compile = ''
 
bin = {}
bin['readelf'] = '%sreadelf' % ( cross_compile )
 
def get_file_info(filename):
    info = {}
    info['class'] = None
    info['data'] = None
    info['type'] = None
    info['machine'] = None
    info['endianness'] = None
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
        fields = [ x.strip() for x in line.split(':', 1) ]
 
        if len(fields) != 2:
            continue
 
        key = fields[0].lower()
        value = fields[1]
        if key in info:
            info[key] = value
 
    if info['data'] != None:
        if 'little endian' in info['data']:
            info['endianness'] = 'little'
        elif 'bit endian' in info['data']:
            info['endianness'] = 'big'
        else:
            info['endianness'] = 'unknown'
 
    return info
 
def get_section_info(filename, section):
    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers' ]).split('\n'):
        fields = line.split()
        if '[' in fields:
            fields.remove('[')
        if len(fields) != 11:
            continue
        try:
            fields[0] = int(fields[0].translate(None, ''.join([ '[', ']' ])))
            if fields[1] != section:
                continue
 
            info = {}
            info['section_number'] = fields[0]
            info['name'] = fields[1]
            info['type'] = fields[2]
            info['address'] = int(fields[3], 16)
            info['offset'] = int(fields[4], 16)
            info['size'] = int(fields[5], 16)
            info['ES'] = fields[6]
            info['Flg'] = fields[7]
            info['Lk'] = fields[8]
            info['Inf'] = fields[9]
            info['Al'] = fields[10]
            return info
        except:
            pass
    return None
 
def get_section_data(filename, section, section_info=None):
    if section_info is None:
        section_info = get_section_info(filename, sectioN)
 
    data = {}
    data['base_offset'] = None
    data['data'] = []
 
    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section ]).split('\n'):
        if line[0:4] != '  0x':
            continue
 
        cols = line.split()
 
        if 'base_offset' is None:
            data['base_offset'] = int(cols[0][2:], 16)
            if data['base_offset'] != section_info['offset']:
                raise Exception('section "%s" has an unexpected starting address...' % ( section ))
 
        remain = section_info['size'] - len(data['data'])
        if remain > 16:
            r = range(16)
        else:
            r = range(remain)
 
        for i in r:
            byte = i % 4
            col = 1 + ((i - byte) / 4)
 
            o = byte * 2
            data['data'].append(int(cols[col][o:o+2], 16))
 
    return data
 
def get_sym_info(filename, symbol):
    sym_info = []
    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
        fields = line.split()
 
        if len(fields) >= 7:
            try:
                fields[0] = int(fields[0][:-1])
                if fields[7] != symbol:
                    continue
 
                data = {}
                data['symbol_number'] = fields[0]
                data['value'] = int(fields[1], 16)
                data['size'] = int(fields[2], 10)
                data['type'] = fields[3]
                data['bind'] = fields[4]
                data['visibility'] = fields[5]
                data['Ndx'] = fields[6]
                data['name'] = fields[7]
                sym_info.append(data)
            except:
                pass
 
    if not validate_sym_info_list(sym_info):
        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
 
    return sym_info[0]
 
def validate_sym_info_list(symbol_list):
    if len(symbol_list) > 1:
        for i in range(1, len(symbol_list)):
            for f in [ 'value', 'size', 'type', 'name']:
                if symbol_list[0][f] != symbol_list[i][f]:
                    return False
    return True
 
file_info = get_file_info(args.filename)
symbol_info = get_sym_info(args.filename, args.symbol)
section_info = get_section_info(args.filename, '.rodata')
section_data = get_section_data(args.filename, '.rodata', section_info=section_info)
 
offset = symbol_info['value'] - section_info['offset']
data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
 
if args.str:
    if data[-1] != 0:
        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
    data = ''.join(map(str, map(unichr, data[:-1])))
 
elif args.cbo:
    data = ''.join(map(str, map(unichr, data)))
    n = len(data)
 
    if n % 8 == 0:
        t = 'd'
        s = 8
    elif len(data) % 4 == 0:
        t = 'i'
        s = 4
    elif len(data) % 2 == 0:
        t = 'h'
        s = 2
    elif len(data) == 1:
        t = 'c'
        s  = 1
    else:
        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
 
    n = (n - (n % s)) / s
 
    if file_info['endianness'] == 'little':
        e = '<'
    elif file_info['endianness'] == 'big':
        e = '>'
    else:
        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
 
    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
    data = struct.pack('>%d%s' % ( n, t ), tmp)
 
else:
    data = ''.join(map(str, map(unichr, data)))
 
sys.stdout.write(data)

Get bin info.py

Revision as of 13:14, 16 October 2015

Personal tools

Namespaces

Variants

Views

Actions

Search

Navigation

Toolbox

@@ Line 1: / Line 1: @@
 This script helps you extract the data associated with a symbol in a binary.
-Remember that the output here will not necessarily be correctly ordered (see [http://en.wikipedia.org/wiki/Endianness Endianness]).
-<source lang="bash">
+<source lang="python">
-#!/bin/bash -eu
+#!/usr/bin/env python
-if [ -z ${CROSS_COMPILE+x} ]; then
+import argparse
-	CROSS_COMPILE=""
+import os, sys
-fi
+import subprocess
+import struct
-FILE=$1; shift
+ap = argparse.ArgumentParser(description='Extract data from an ELF binary')
-SYMBOL=$1; shift
+ap.add_argument('filename', type=str, help='the file to process')
+ap.add_argument('symbol', type=str, help='the symbol to extract')
+ap.add_argument('-S', '--str', action='store_true', help='print as a nul-terminated C string')
+ap.add_argument('-C', '--cbo', action='store_true', help='correct the byte order (endianness)')
+args = ap.parse_args()
-if [ ! -e ${FILE} ]; then
+if not os.path.isfile(args.filename):
-	echo "File '${FILE}' does not exist..." >&2
+    raise Exception('"%s" is not a file or does not exist...' % ( args.filename ))
-	exit 1
-fi
-# get the symbol information
+if args.str and args.cbo:
-SYM_INFO=$(${CROSS_COMPILE}readelf ${FILE} --syms | grep "${SYMBOL}\$" || true)
+    raise Exception('Arguments --cbo and --str are mutually exclusive...')
-if [ "${SYM_INFO}" == "" ]; then
-	echo "Symbol '${SYMBOL}' does not exist..." >&2
-	exit 1
-fi
-# validate the symbols (it may have been present in more than one symbol table)
+if 'CROSS_COMPILE' in os.environ:
-echo "${SYM_INFO}" | awk 'BEGIN{a=-1}{v=strtonum("0x"$2); if (a==-1) a=v; else if (a!=v) exit(1)}' || (
+    cross_compile = os.environ['CROSS_COMPILE']
-	echo "Symbol '${SYMBOL}' has conflicting definitions..." >&2
+else:
-	echo "${SYM_INFO}" >&2
+    cross_compile = ''
-	exit 1
-)
-SYM_INFO=$(echo "${SYM_INFO}" | head -n 1)
-# get the offset of the symbol's data (in hex), rounded down to the closes 16-byte boundary
+bin = {}
-SYM_OFFSET_ALIGNED=$(echo "${SYM_INFO}" | awk '{v=strtonum("0x"$2); printf("0x%08x", v - (v % 16))}')
+bin['readelf'] = '%sreadelf' % ( cross_compile )
-# get the offset of the symbol's data (in decomal), into this 16-byte aligned data
+def get_file_info(filename):
-SYM_OFFSET_INTO_ALIGNED_DATA=$(echo "${SYM_INFO}" | awk '{v=strtonum("0x"$2); printf("%d", (v % 16) * 2)}')
+    info = {}
+    info['class'] = None
+    info['data'] = None
+    info['type'] = None
+    info['machine'] = None
+    info['endianness'] = None
-# get the size of the symbol's data (in decimal), as nibbles / hex digits
+    for line in subprocess.check_output([ bin['readelf'], filename, '--file-header' ]).split('\n'):
-SYM_SIZE_IN_NIBBLES=$(echo "${SYM_INFO}" | awk '{v=strtonum($3); printf("%d",v * 2)}')
+        fields = [ x.strip() for x in line.split(':', 1) ]
-# get the number of 16-byte rows that this symbol spans
+        if len(fields) != 2:
-SYM_ROWS_SPANNED=$(echo "${SYM_INFO}" | awk '{v=strtonum($3); printf("%d", ((v - (v % 16)) / 16))}')
+            continue
-# get the section's full data
+        key = fields[0].lower()
-SECTION_DATA=$(${CROSS_COMPILE}readelf ${FILE} --hex-dump .rodata | tail -n +3 | head -n -1)
+        value = fields[1]
+        if key in info:
+            info[key] = value
-# get the alignment of the .rodata section with respect to the 16-byte boundary that we're working with
+    if info['data'] != None:
-SECTION_ALIGNMENT=$(echo "${SECTION_DATA}" 2>/dev/null | head -n 1 | awk '{v=strtonum($1); printf("%d", (v % 16))}')
+        if 'little endian' in info['data']:
+            info['endianness'] = 'little'
+        elif 'bit endian' in info['data']:
+            info['endianness'] = 'big'
+        else:
+            info['endianness'] = 'unknown'
-# adjust the symbol's aligned offset with the section's alignment
+    return info
-SYM_OFFSET_REALIGNED=$(echo "${SYM_OFFSET_ALIGNED} ${SECTION_ALIGNMENT}" | awk '{o=strtonum($1); a=strtonum($2); printf("0x%08x", o + a)}')
-# get the rows for the interesting region of the file
+def get_section_info(filename, section):
-REGION_DATA=$(${CROSS_COMPILE}readelf ${FILE} --hex-dump .rodata | grep "${SYM_OFFSET_REALIGNED}" -A${SYM_ROWS_SPANNED} | cut -b14-48 | tr -d ' \n')
+    for line in subprocess.check_output([ bin['readelf'], filename, '--section-headers' ]).split('\n'):
+        fields = line.split()
+        if '[' in fields:
+            fields.remove('[')
+        if len(fields) != 11:
+            continue
+        try:
+            fields[0] = int(fields[0].translate(None, ''.join([ '[', ']' ])))
+            if fields[1] != section:
+                continue
-# snip out the symbol's data
+            info = {}
-DATA=$(echo -n "${REGION_DATA}" | cut -b $((SYM_OFFSET_INTO_ALIGNED_DATA + 1))-$((SYM_OFFSET_INTO_ALIGNED_DATA + SYM_SIZE_IN_NIBBLES)))
+            info['section_number'] = fields[0]
+            info['name'] = fields[1]
+            info['type'] = fields[2]
+            info['address'] = int(fields[3], 16)
+            info['offset'] = int(fields[4], 16)
+            info['size'] = int(fields[5], 16)
+            info['ES'] = fields[6]
+            info['Flg'] = fields[7]
+            info['Lk'] = fields[8]
+            info['Inf'] = fields[9]
+            info['Al'] = fields[10]
+            return info
+        except:
+            pass
+    return None
-# print out the symbol's data...
+def get_section_data(filename, section, section_info=None):
-## ... as hex digits
+    if section_info is None:
-#echo ${DATA}
+        section_info = get_section_info(filename, sectioN)
-## ... as raw binary data
-echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{printf "%c", strtonum("0x"$0)}'
-## ... rendered by hexdump
-#echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{printf "%c", strtonum("0x"$0)}' | hexdump -C
-## ... a nul-terminated C string (without the nul)
-#echo -n ${DATA} | sed -e "s/.\{2\}/&\n/g" | awk -b '{v=strtonum("0x"$0); if (v==0) exit(0); printf "%c", v}'
-</source>
-==Example Usage==
+    data = {}
-<source lang="bash">
+    data['base_offset'] = None
-cat <<EOF | gcc -x c - -o eg
+    data['data'] = []
-const char my_data[] = "test string";
-void main(void) {}
-EOF
-./get_bin_info.sh eg my_data
+    for line in subprocess.check_output([ bin['readelf'], filename, '--hex-dump', section ]).split('\n'):
+        if line[0:4] != '  0x':
+            continue
+        cols = line.split()
+        if 'base_offset' is None:
+            data['base_offset'] = int(cols[0][2:], 16)
+            if data['base_offset'] != section_info['offset']:
+                raise Exception('section "%s" has an unexpected starting address...' % ( section ))
+        remain = section_info['size'] - len(data['data'])
+        if remain > 16:
+            r = range(16)
+        else:
+            r = range(remain)
+        for i in r:
+            byte = i % 4
+            col = 1 + ((i - byte) / 4)
+            o = byte * 2
+            data['data'].append(int(cols[col][o:o+2], 16))
+    return data
+def get_sym_info(filename, symbol):
+    sym_info = []
+    for line in subprocess.check_output([ bin['readelf'], filename, '--syms' ]).split('\n'):
+        fields = line.split()
+        if len(fields) >= 7:
+            try:
+                fields[0] = int(fields[0][:-1])
+                if fields[7] != symbol:
+                    continue
+                data = {}
+                data['symbol_number'] = fields[0]
+                data['value'] = int(fields[1], 16)
+                data['size'] = int(fields[2], 10)
+                data['type'] = fields[3]
+                data['bind'] = fields[4]
+                data['visibility'] = fields[5]
+                data['Ndx'] = fields[6]
+                data['name'] = fields[7]
+                sym_info.append(data)
+            except:
+                pass
+    if not validate_sym_info_list(sym_info):
+        raise Exception('symbol "%s" has multiple definitions...' % (args.symbol))
+    return sym_info[0]
+def validate_sym_info_list(symbol_list):
+    if len(symbol_list) > 1:
+        for i in range(1, len(symbol_list)):
+            for f in [ 'value', 'size', 'type', 'name']:
+                if symbol_list[0][f] != symbol_list[i][f]:
+                    return False
+    return True
+file_info = get_file_info(args.filename)
+symbol_info = get_sym_info(args.filename, args.symbol)
+section_info = get_section_info(args.filename, '.rodata')
+section_data = get_section_data(args.filename, '.rodata', section_info=section_info)
+offset = symbol_info['value'] - section_info['offset']
+data = [ section_data['data'][offset + i] for i in range(symbol_info['size']) ]
+if args.str:
+    if data[-1] != 0:
+        raise Exception('symbol "%s" is not a nul-terminated string...' % ( args.symbol ))
+    data = ''.join(map(str, map(unichr, data[:-1])))
+elif args.cbo:
+    data = ''.join(map(str, map(unichr, data)))
+    n = len(data)
+    if n % 8 == 0:
+        t = 'd'
+        s = 8
+    elif len(data) % 4 == 0:
+        t = 'i'
+        s = 4
+    elif len(data) % 2 == 0:
+        t = 'h'
+        s = 2
+    elif len(data) == 1:
+        t = 'c'
+        s  = 1
+    else:
+        raise Exception('symbol "%s" is not a suitable size to reorder...' % ( args.symbol ))
+    n = (n - (n % s)) / s
+    if file_info['endianness'] == 'little':
+        e = '<'
+    elif file_info['endianness'] == 'big':
+        e = '>'
+    else:
+        raise Exception('file "%s" has an unknown endianness...' % ( args.filename ))
+    tmp = struct.unpack('%s%d%s' % ( e, n, t ), data)[0]
+    data = struct.pack('>%d%s' % ( n, t ), tmp)
+else:
+    data = ''.join(map(str, map(unichr, data)))
+sys.stdout.write(data)
 </source>