forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract-section.py
executable file
·106 lines (98 loc) · 4.48 KB
/
extract-section.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
from __future__ import print_function
'''
Helper script to print out the raw content of an ELF section.
Example usages:
```
# print out as bits by default
extract-section.py .text --input-file=foo.o
```
```
# read from stdin and print out in hex
cat foo.o | extract-section.py -h .text
```
This is merely a wrapper around `llvm-readobj` that focuses on the binary
content as well as providing more formatting options.
'''
# Unfortunately reading binary from stdin is not so trivial in Python...
def read_raw_stdin():
import sys
if sys.version_info >= (3, 0):
reading_source = sys.stdin.buffer
else:
# Windows will always read as string so we need some
# special handling
if sys.platform == 'win32':
import os, msvcrt
msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
reading_source = sys.stdin
return reading_source.read()
def get_raw_section_dump(readobj_path, section_name, input_file):
import subprocess
cmd = [readobj_path, '--elf-output-style=GNU', '--hex-dump={}'.format(section_name),
input_file]
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
if input_file == '-':
# From stdin
out,_ = proc.communicate(input=read_raw_stdin())
else:
out,_ = proc.communicate()
return out.decode('utf-8') if type(out) is not str else out
if __name__ == '__main__':
import argparse
# The default '-h' (--help) will conflict with our '-h' (hex) format
arg_parser = argparse.ArgumentParser(add_help=False)
arg_parser.add_argument('--readobj-path', metavar='<executable path>', type=str,
help='Path to llvm-readobj')
arg_parser.add_argument('--input-file', metavar='<file>', type=str,
help='Input object file, or \'-\' to read from stdin')
arg_parser.add_argument('section', metavar='<name>', type=str,
help='Name of the section to extract')
# Output format
format_group = arg_parser.add_mutually_exclusive_group()
format_group.add_argument('-b', dest='format', action='store_const', const='bits',
help='Print out in bits')
arg_parser.add_argument('--byte-indicator', action='store_true',
help='Whether to print a \'.\' every 8 bits in bits printing mode')
arg_parser.add_argument('--bits-endian', metavar='<little/big>', type=str,
choices=['little', 'big'],
help='Print out bits in specified endianness (little or big); defaults to big')
format_group.add_argument('-h', dest='format', action='store_const', const='hex',
help='Print out in hexadecimal')
arg_parser.add_argument('--hex-width', metavar='<# of bytes>', type=int,
help='The width (in byte) of every element in hex printing mode')
arg_parser.add_argument('--help', action='help')
arg_parser.set_defaults(format='bits', tool_path='llvm-readobj', input_file='-',
byte_indicator=False, hex_width=4, bits_endian='big')
args = arg_parser.parse_args()
raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)
results = []
for line in raw_section.splitlines(False):
if line.startswith('Hex dump'):
continue
parts = line.strip().split(' ')[1:]
for part in parts[:4]:
# exclude any non-hex dump string
try:
val = int(part, 16)
if args.format == 'bits':
# divided into bytes first
offsets = (24, 16, 8, 0)
if args.bits_endian == 'little':
offsets = (0, 8, 16, 24)
for byte in [(val >> off) & 0xFF for off in offsets]:
for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
results.append(str(bit))
if args.byte_indicator:
results.append('.')
elif args.format == 'hex':
assert args.hex_width <= 4 and args.hex_width > 0
width_bits = args.hex_width * 8
offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
mask = (1 << width_bits) - 1
format_str = "{:0" + str(args.hex_width * 2) + "x}"
for word in [(val >> i) & mask for i in offsets]:
results.append(format_str.format(word))
except:
break
print(' '.join(results), end='')