forked from airbus-seclab/elfesteem
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcstruct.py
419 lines (372 loc) · 14.4 KB
/
cstruct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
#! /usr/bin/env python
import struct, re
# To be compatible with python 2 and python 3
data_empty = struct.pack("")
data_null = struct.pack("B",0)
import sys
if sys.version_info[0] < 3:
bytes_to_name = lambda s: s
name_to_bytes = lambda s: s
else:
bytes_to_name = lambda s: s.decode(encoding="latin1")
name_to_bytes = lambda s: s.encode(encoding="latin1")
class CBase(object):
"""
This is the base class, used to define CString, CStruct, CArray
Functions to manipulate a CBase object
unpack(): two args (c, o) the bytestring and the starting offset
pack(): creates a byte string from the object content
bytelen: length of this byte string
pprint(): representation of the object, that can be used by pprint
update(): named args, that change the object content
Parameters used to create a CBase object from a bytestring:
parent: parent object (mandatory)
content: binary stream to initialize the object
start: offset where to start parsing the content
sex and wsize: endianess and wordsize
"""
def __init__(self, *args, **kargs):
if not 'parent' in kargs:
# Old API of elfesteem
# e.g. used by miasm2's example/jitter/unpack_upx.py
kargs['parent'] = args[0]
self._parent_parse(kargs)
self._initialize()
if 'content' in kargs:
if not 'start' in kargs: kargs['start'] = 0
if 'count' in kargs:
self.count = lambda c=kargs['count']: c
del kargs['count']
self.unpack(kargs['content'], kargs['start'])
del kargs['content']
del kargs['start']
self.update(**kargs)
def _parent_parse(self, kargs):
self.parent = kargs['parent']
if not 'sex' in kargs: kargs['sex'] = self.parent.sex
if not 'wsize' in kargs: kargs['wsize'] = self.parent.wsize
self.sex = kargs['sex']
self.wsize = kargs['wsize']
del kargs['parent']
del kargs['sex']
del kargs['wsize']
def _initialize(self):
# For default values
pass
def update(self, **kargs):
pass
def __len__(self):
# We don't use __len__ for the length in bytes, because we want to be able
# to use it for the number of elements of a CArray
raise AttributeError("__len__ not defined for '%s'"%self.__class__.__name__)
def bytelen(self):
return self._size
bytelen = property(bytelen)
def _size_align(self, o):
s = o._size
if hasattr(self, '_align'):
s += ((self._align - s % self._align) % self._align)
return s
def _pack_align(self, o):
s = o.pack()
if hasattr(self, '_align'):
s += '\0' * ((self._align - o._size % self._align) % self._align)
return s
class CString(CBase):
def set_value(self, s):
self.X = s
self._size = len(self.X) + 1
def unpack(self, c, o):
self.set_value(c[o:c.find(data_null,o)])
self._off = o
def update(self, **kargs):
# If 's' is an argument, then the string value is set to s
if 's' in kargs:
self.set_value(kargs['s'])
def _initialize(self):
self.set_value(data_empty)
def pack(self):
return self.X + data_null
def __str__(self):
return bytes_to_name(self.X)
def __repr__(self):
return '<CString %r>' % self.X
def pprint(self):
return self.X
from elfesteem.strpatchwork import StrPatchwork
class CData(object):
# Generic class to be used at the end of a CStruct, to implement common
# cases implemented in C as struct s { ...; char data[]; }
# We use StrPatchwork because the data may be very long, and we want to
# be able to modify it very efficiently.
def __new__(self, f):
class CDataInstance(CBase):
def _initialize(self, f=f):
self._size = f(self.parent)
self._data = StrPatchwork()
def unpack(self, c, o):
self._data[0] = c[o:o+self._size]
def pack(self):
return self._data.pack()
def __str__(self):
return self.pack().decode('latin1')
def __getitem__(self, item):
return self._data[item]
def __setitem__(self, item, value):
self._data[item] = value
return CDataInstance
type_size = {}
size2type = {}
size2type_s = {}
for t in 'B', 'H', 'I', 'Q':
s = struct.calcsize(t)
type_size[t] = s*8
size2type[s*8] = t
for t in 'b', 'h', 'i', 'q':
s = struct.calcsize(t)
type_size[t] = s*8
size2type_s[s*8] = t
type_size['u08'] = size2type[8]
type_size['u16'] = size2type[16]
type_size['u32'] = size2type[32]
type_size['u64'] = size2type[64]
type_size['s08'] = size2type_s[8]
type_size['s16'] = size2type_s[16]
type_size['s32'] = size2type_s[32]
type_size['s64'] = size2type_s[64]
def convert_size2type(ftype, wsize):
if not isinstance(ftype, str):
return ''
elif re.match(r'\d+s', ftype):
return ftype
elif ftype == "ptr":
return size2type[wsize]
elif ftype in type_size:
return type_size[ftype]
else:
raise ValueError("unkown CStruct type", ftype)
class CStruct_metaclass(type):
"""
metaclass, with a syntax compatible with python2 and python3
"""
_prefix = "_field_" # To avoid namespace collisions
def __new__(cls, name, bases, dct):
if '_fields' in dct:
for fname, _ in dct['_fields']:
dct[fname] = property(
lambda self,fname=fname: self.getf(fname),
lambda self,v,fname=fname: self.setf(fname,v),
None)
return type.__new__(cls, name, bases, dct)
CStruct_base = CStruct_metaclass('CStruct_base', (CBase,), {})
class CStruct(CStruct_base):
"""
The class CStruct is inherited by classes that simply
represent a concatenation of typed fields
How to create a CStruct class:
_fields list the pairs (field_name, field_type)
if the last fields are (field_name, class), they are optional
_align: an optional integer value for alignment of optional fields
How to create a CStruct object:
the keywords not used by CBase initialise the object fields
How to use a CStruct object:
in addition to the CBase interface, the fields can be modified
Field types:
basic types with fixed size (u08, ..., 16s)
wsize-dependent type (ptr)
"""
def getf(self, fname):
return getattr(self,'_0'+fname)
def setf(self, fname, v):
return setattr(self,'_0'+fname,v)
_packformat = ""
def _parent_parse(self, kargs):
CBase._parent_parse(self, kargs)
if self._packformat:
self.sex = ""
self._format = {}
pstr = []
for fname, ftype in self._fields:
ftype = convert_size2type(ftype, self.wsize)
self._format[fname] = ftype
pstr.append(ftype)
self._packstring = self.sex + self._packformat+"".join(pstr)
self._names = [x[0] for x in self._fields if isinstance(x[1],str)]
self._opt = [x for x in self._fields if not isinstance(x[1],str)]
def unpack(self, c, o):
self._size = struct.calcsize(self._packstring)
s = c[o:o+self._size]
s += data_null*(self._size-len(s))
disas = struct.unpack(self._packstring, s)
for n,v in zip(self._names,disas):
setattr(self, n, v)
# If the last fields are optional data, their types are a class
for fname, fclass in self._opt:
v = fclass(parent=self, content=c, start=o+self._size)
self._size += self._size_align(v)
self.setf(fname, v)
def _initialize(self):
self._size = struct.calcsize(self._packstring)
for f in self._names:
# Default values
if self._format[f].endswith('s'): self.setf(f,data_empty)
else: self.setf(f,0)
for fname, fclass in self._opt:
v = fclass(parent=self)
self._size += self._size_align(v)
self.setf(fname, v)
def update(self, **kargs):
for f in [f for f in kargs if f in self._names]:
self.setf(f,kargs[f])
for fname, fclass in self._opt:
v = self.getf(fname)
self._size -= self._size_align(v)
v.update(**kargs)
self._size += self._size_align(v)
def pack(self):
fields = [getattr(self, x) for x in self._names]
s = struct.pack(self._packstring, *fields)
for fname, fclass in self._opt:
s += self._pack_align(self.getf(fname))
if self.bytelen != len(s):
raise ValueError("Inconsistent size %d != %d for %r"
% (self.bytelen,len(s), self.__class__.__name__))
return s
def __str__(self):
raise AttributeError("Use pack() instead of str()")
def pprint(self):
rep = { }
for fname, _ in self._fields:
rep[fname] = getattr(self, fname)
if hasattr(rep[fname], 'pprint'):
rep[fname] = rep[fname].pprint()
return ( "<%s>" % self.__class__.__name__, rep )
def __repr__(self):
return "<%s=%s>" % (self.__class__.__name__,
"/".join(map(lambda x:repr(getattr(self,x[0])),self._fields)))
def __getitem__(self, item): # to work with format strings
return getattr(self, item)
class CStructWithStrTable(CStruct):
# The attribute 'name' is computed from an integer index 'name_idx'
# and a link to the string table 'strtab'
def get_name(self):
return self.strtab.get_name(self.name_idx)
def set_name(self, name):
if self.name_idx == 0:
self.name_idx = self.strtab.add_name(name)
else:
self.strtab.mod_name(self.name_idx, name)
name = property(get_name, set_name)
def update(self, **kargs):
CStruct.update(self, **kargs)
if 'name' in kargs and 'name_idx' in self._names:
self.name = kargs['name']
class CArray_metaclass(type):
"""
metaclass, with a syntax compatible with python2 and python3
"""
def __new__(cls, name, bases, dct):
class_defined = '_cls' in dct
for c in bases:
class_defined = class_defined or '_cls' in c.__dict__
if not name.startswith('CArray') and not class_defined:
raise ValueError("Class %r should define '_cls'"%name)
return type.__new__(cls, name, bases, dct)
CArray_base = CArray_metaclass('CArray_base', (CBase,), {})
class CArray(CArray_base):
"""
The class CArray is inherited by classes that represent
a variable length array of objects of variable length.
How to create a CArray subclass:
_cls: the class of the array elements
count (optional): method that returns the number of elements
How to use a CArray object:
in addition to the CBase interface,
[item] gives access to an element of the array
len gives the number of elements
append adds an element to the array
_array is the whole array
_last is the terminating element, if count is not defined
"""
def _initialize(self):
self._array = [] # Elements of the array
self._size = 0
if not hasattr(self, 'count'):
# Array end is decided by a terminating element
# which is detected by 'stop', of by default by
# comparing with the default value of an object
# of class _cls
self._last = self._cls(parent=self)
self._size += self._size_align(self._last)
def pack(self):
s = data_empty.join([self._pack_align(o) for o in self._array])
if hasattr(self, '_last'): s += self._pack_align(self._last)
if self._size != len(s):
raise ValueError("Inconsistent size %d != %d for %r"
% (self._size,len(s), self.__class__.__name__))
return s
def stop(self, elt):
return elt.pack() == self._last.pack()
def unpack(self, c, o):
if o is None: return
self._off = o
if hasattr(self, 'count'):
# self.count() is recomputed each time
# This enables complicated conditions for array termination
idx = 0
while idx < self.count():
if o+self._size >= len(c):
break
elt = self._cls(parent=self, content=c, start=o+self._size)
self._array.append(elt)
self._size += self._size_align(elt)
idx += 1
else:
pos = 0
while True:
if o+pos >= len(c):
break
elt = self._cls(parent=self, content=c, start=o+pos)
if self.stop(elt):
break
self._array.append(elt)
pos += self._size_align(elt)
self._size += pos
def __getitem__(self, item):
return self._array[item]
def __len__(self):
return len(self._array)
def append(self, obj):
self._array.append(obj)
self._size += self._size_align(self._array[-1])
return obj
def pprint(self):
return ("<%s>"%self.__class__.__name__,
[x.pprint() for x in self._array],
)
def __repr__(self):
return "<%s of length %d>" % (self.__class__.__name__, len(self))
# Method that defines constants (as in .h headers) and tables that
# can recover the constant's name from its value.
def Constants(globs = None, table = None,
name = None, prefix = None,
no_name = (), **kargs):
if prefix is None:
# Use the prefix common to all value names
for k in kargs:
if prefix is None:
prefix = k
else:
while not k.startswith(prefix):
prefix = prefix[:-1]
if name is None:
if prefix.endswith('_'): name = prefix[:-1]
else: name = prefix
if name != '' and not name in table: table[name] = {}
for k in kargs:
globs[k] = kargs[k]
if name != '':
if k.startswith(prefix) and not k in no_name:
if kargs[k] in table[name]:
print("Duplicate at %s[%s]=%s; %s"%(name,kargs[k],table[name][kargs[k]],k))
table[name][kargs[k]] = k[len(prefix):]