1 /**
2  * Utilities to disassemble some X86/ X86_64 byte code.
3  */
4 module disassembler;
5 
6 import beaengine;
7 import std.format;
8 import std.algorithm;
9 
10 private string _prefix;
11 private string _suffix;
12 private string _addrFmtSpec;
13 private string _eol;
14 private EolMode _eolMode;
15 
16 static this()
17 {
18     addressSuffix = "h";
19     eolMode = EolMode.sys;
20 }
21 
22 /// end-of-line modes
23 enum EolMode
24 {
25     cr, lf, crlf, sys
26 }
27 
28 /**
29  * Defines the end-of-line used to format the disassembling.
30  * Useful under Windows when prettyDisasm() is written to stdout.
31  */
32 @property EolMode eolMode(){return _eolMode;}
33 /// ditto
34 @property void eolMode(EolMode mode)
35 {
36     _eolMode = mode;
37     import std.ascii : newline;
38     with(EolMode) final switch(mode)
39     {
40         case cr:    _eol = "\r"; break;
41         case lf:    _eol = "\n"; break;
42         case crlf:  _eol = "\r\n"; break;
43         case sys:   _eol = newline; break;
44     }
45 }
46 
47 /// defines the prefix used to format an address.
48 @property string addressPrefix(){return _prefix;}
49 /// ditto
50 @property void addressPrefix(string p)
51 {
52     _prefix = p;
53     updateAddressFormatSpecifier;
54 }
55 
56 
57 /// defines the suffix used to format an address.
58 @property string addressSuffix(){return _suffix;}
59 /// ditto
60 @property void addressSuffix(string s)
61 {
62     _suffix = s;
63     updateAddressFormatSpecifier;
64 }
65 
66 
67 private void updateAddressFormatSpecifier()
68 {
69     if (_prefix.length + _suffix.length > 6)
70     {
71         _prefix = "";
72         _suffix = "h";
73         throw new Exception("too long prefix and suffix, default value reset");
74     }
75     static if (size_t.sizeof == 4)
76         _addrFmtSpec = _prefix ~ "%.8X" ~ _suffix;
77     else static if (size_t.sizeof == 8)
78         _addrFmtSpec = _prefix ~ "%.16X" ~ _suffix;
79     else static assert(0, "unsupported pointer size");
80 }
81 
82 
83 /**
84  * Utility used to format an address
85  */
86 private struct Address
87 {
88     void* _addr;
89     alias _addr this;
90     string toString() const
91     {
92         return format(_addrFmtSpec, _addr);
93     }
94 }
95 
96 /// Array of Disasm. Usually used to store a full function.
97 alias Sub = DisasmParams* [];
98 
99 /// Associates an array of Disasm to a particular address.
100 alias Subs = Sub[void*];
101 
102 /// Associates an address to an array of address. Used to store the cross references.
103 alias SubsCrossRefs = void*[][void*];
104 
105 /// Handles the translation of the disassembler addresses to a symbol.
106 struct symbolTable
107 {
108     private static string[const(void*)] _symbols;
109     private static string* _lastSymbol;
110     private static void* _lastAddress;
111     private import std.traits: isImplicitlyConvertible, isSomeFunction, PointerTarget;
112     private import std.meta: Alias;
113 
114     /// Toggles on or off address translation. Works only for CALL and JMP
115     static bool enable;
116 
117     /// Associates the string symbol to address.
118     static void add(bool addType = false, T)(string symbol, const T address) @safe nothrow
119     if (isImplicitlyConvertible!(T,void*) || is(T==delegate) || is(PointerTarget!T==function))
120     {
121         enable = true;
122         static if (isImplicitlyConvertible!(T,void*) || is(T==function))
123         {
124             _symbols[address] = symbol;
125             static if (addType)
126                 _symbols[address] ~= " (" ~ PointerTarget!T.stringof ~ ")";
127         }
128         else static if (is(T==delegate))
129         {
130             _symbols[address.funcptr] = symbol;
131             static if (addType)
132                 _symbols[address.funcptr] ~= " (" ~ PointerTarget!T.stringof ~ ")";
133         }
134         else static assert(0, "unsupported argument type in "
135             ~ __PRETTY_FUNCTION__ ~ " : " ~ T.stringof);
136     }
137 
138     /// Adds a free or a static function to the symbol table
139     static void add(alias symbol, string name = "", bool addType = false)() @safe nothrow
140     if (is(typeof(symbol)) && isSomeFunction!(typeof(symbol)))
141     {
142         enable = true;
143         auto funPtr = &symbol;
144         static if (name.length)
145             _symbols[funPtr] = name;
146         else
147         {
148             alias P = Alias!(__traits(parent, symbol));
149             static if (is(P == struct) || is(P == union) || is(P == class))
150                 _symbols[funPtr] = __traits(parent, symbol).stringof ~ '.' ~ __traits(identifier, symbol);
151             else
152                 _symbols[funPtr] = __traits(identifier, symbol);
153             static if (addType)
154                 _symbols[funPtr] ~= " (" ~ PointerTarget!(typeof(funPtr)).stringof ~ ")";
155         }
156     }
157 
158     /// Removes the symbol matching to address.
159     static void remove(const void* address) @safe nothrow
160     {
161         _symbols.remove(address);
162     }
163 
164     /// Clears the internal container.
165     static void clear() nothrow
166     {
167         foreach(k; _symbols.byKey)
168             _symbols.remove(k);
169     }
170 
171     /// Indicates if a symbol is stored for address.
172     deprecated ("use the in operator instead")
173     static bool canFind(const void* address) nothrow
174     {
175         _lastAddress = cast(void*) address;
176         return (_lastSymbol = (_lastAddress in _symbols)) != null;
177     }
178 
179     /// Returns a pointer to the symbol name if address is stored.
180     static const(string)* opBinaryRight(string op : "in")(const void* address)
181     nothrow
182     {
183         _lastAddress = cast(void*) address;
184         return _lastAddress in _symbols;
185     }
186 
187     /// Returns the symbol associated to address.
188     deprecated ("use the in operator instead")
189     static string symbol(const void* address) nothrow
190     {
191         if (address != _lastAddress)
192             canFind(address);
193         if (_lastSymbol)
194             return *_lastSymbol;
195         else
196             return "";
197     }
198 
199     /**
200      * Scans an entire module and adds its functions to the table.
201      */
202     static void addModule(alias mod)() @safe nothrow
203     {
204         import std.traits: isSomeFunction;
205         import std.algorithm: canFind;
206         enable = true;
207         foreach(memb;__traits(allMembers,mod))
208         static if (is(typeof(__traits(getMember, mod, memb))))
209             static if (isSomeFunction! (__traits(getMember,mod,memb) ))
210                 foreach(ov;__traits(getOverloads,mod,memb))
211                     static if (canFind(["package","public","export"],__traits(getProtection,ov)))
212                         _symbols[&__traits(getMember,mod,memb)] = memb;
213     }
214 }
215 
216 nothrow unittest
217 {
218 
219     static void foo(uint a){}
220 
221     symbolTable.add("a", cast(void*) 0xF);
222     symbolTable.add("b", cast(void*) 0xFF);
223     symbolTable.add("c", cast(void*) 0xFFF);
224     symbolTable.add!(foo, "", true);
225 
226     assert((cast(void*) 0xF) in symbolTable);
227     assert((cast(void*) 0xFF) in symbolTable);
228     assert((cast(void*) 0xFFF) in symbolTable);
229     assert((cast(void*) &foo) in symbolTable);
230 
231     assert((cast(void*) 0xE) !in symbolTable);
232     assert((cast(void*) 0xEE) !in symbolTable);
233     assert((cast(void*) 0xEEE) !in symbolTable);
234     assert((cast(void*) 0x0) !in symbolTable);
235 
236     assert(symbolTable.symbol(cast(void*) 0xF) == "a");
237     assert(symbolTable.symbol(cast(void*) 0xFF) == "b");
238     assert(symbolTable.symbol(cast(void*) 0xFFF) == "c");
239 
240     assert(symbolTable.symbol(cast(void*) 0xEDD) != "c");
241     assert(symbolTable.symbol(cast(void*) 0xEDD) == "");
242 
243     assert(symbolTable.symbol(cast(void*) &foo) == "foo (void(uint))");
244 
245     symbolTable.clear;
246     assert(symbolTable._symbols.length == 0);
247 }
248 
249 
250 /**
251  * Formats an array of Disasm as a string.
252  *
253  * Params:
254  * sub = an array of Disasm.
255  * crossRefs = an array of address from where the first sub instruction is called.
256  *
257  * Returns:
258  * a readable representation of the the instructions contained in sub.
259  */
260 string formatSub(const ref Sub sub, const ref void*[]* crossRefs)
261 {
262     static immutable lastLine = ";--------------------------------------------";
263     import std.array: Appender;
264     import std..string: fromStringz;
265     Appender!string result;
266     // (numLine + 2 separators) * (address: 2*size_t*sizeof + instruction: 24 chars)
267     result.reserve((sub.length + 2) * (size_t.sizeof * 2 + 24));
268 
269     result ~= format(";------- SUB " ~ _addrFmtSpec ~ " -------" ~ _eol, sub[0].eip);
270     const size_t i = result.data.length;
271     if (symbolTable.enable) if (const(string)* s = sub[0].eip in symbolTable)
272         result ~= "; NAMED: " ~ *s ~ _eol;
273     if (crossRefs != null && crossRefs.length)
274         result ~= format("; XREFS: %s " ~ _eol, cast(Address[])*crossRefs);
275 
276     string line;
277     foreach(instr; sub)
278     {
279         auto instrText = fromStringz(instr.asString.ptr);
280         version(X86_64) if ((instr.instruction.opcode & 0xFF) == 0xE8
281             && instr.instruction.addrValue != 0)
282         {
283             import std..string: rightJustify, split;
284             auto splt = split(instrText);
285             if (splt[1].length < 16)
286             {
287                 auto len = splt[1].length - 8 + 16;
288                 instrText = format("%s %s", splt[0], rightJustify(splt[1], len, '0'));
289             }
290         }
291         result ~= format(_addrFmtSpec ~ "  %s", instr.eip, instrText);
292         if (symbolTable.enable && instr.instruction.addrValue)
293         {
294             void* ptr = cast(void*) instr.instruction.addrValue;
295             if (const(string)* s = ptr in symbolTable)
296                 result ~= format(" ; (%s)", *s);
297         }
298         result ~= _eol;
299     }
300     result ~= lastLine[0 .. i-1];
301     result ~= _eol;
302     result ~= _eol;
303     return result.data;
304 }
305 
306 
307 /**
308  * Disassembles a function and returns its string representation.
309  *
310  * Params:
311  * eip = the entry point, a pointer to a function.
312  * maxNesting = indicates how many consecutive sub fonctions can be disassembled.
313  *
314  * Returns:
315  * a string representing the function(s).
316  *
317  * Examples:
318  * ---
319  * import std.stdio;
320  * import disassembler;
321  *
322  * void main(string[] args)
323  * {
324  *     // prints the code generated for main().
325  *     writeln(prettyDisasm(&main, 1));
326  * }
327  * ---
328  */
329 string prettyDisasm(void* eip, short maxNesting = 1)
330 {
331     Subs subs;
332     SubsCrossRefs xrefs;
333     string result;
334     if (maxNesting <= 0) maxNesting = 1;
335     short nesting = cast(short) (maxNesting * -1);
336     //
337     disasmSub(eip, subs, xrefs, nesting);
338     auto sortedSubs = sort(subs.keys);
339     foreach(immutable i; 0 .. sortedSubs.length)
340     {
341         const(void*) subAddr = sortedSubs[i];
342         auto x = (subAddr in xrefs);
343         result ~= formatSub(subs[subAddr], x);
344     }
345     return result;
346 }
347 
348 /// ditto
349 string prettyDisasm(T)(T eip, short maxNesting = 1)
350 if (is(T==delegate))
351 {
352     return prettyDisasm(eip.funcptr, maxNesting);
353 }
354 
355 
356 /**
357  * Disassembling kernel.
358  *
359  * Disassembles recursively from an address.
360  *
361  * Params:
362  * eip = entry point of the function to disassemble. This must point to some byte-code.
363  * subs = the associative array filled with the functions instructions.
364  * xrefs = the associative array filled with the function callers.
365  * nesting = must initially match the inverse of the maximum nested function call the kernel will disassemble.
366  */
367 void disasmSub(void * eip, ref Subs subs, ref SubsCrossRefs xrefs, ref short nesting)
368 {
369     void*[] forward_jumps;
370 
371     scope(exit) --nesting;
372     if (++nesting > 0)
373         return;
374 
375     auto loc = eip;
376     Sub sub = new Sub(0);
377     scope(success) subs[eip] = sub;
378 
379     while(true)
380     {
381         DisasmParams * cur = new DisasmParams;
382 
383         // info for the disassembling
384         cur.eip = loc;
385         version(X86) cur.archi = Archi.ia32;
386         version(X86_64) cur.archi = Archi.intel64;
387 
388         // disassembles
389         const int len = disassemble(cur);
390         if (len <= SpecialInfo.OUT_OF_BLOCK)
391             break;
392 
393         // CALL: disassembles any valid target, stores the cross-reference
394         if (cur.instruction.opcode == 0xE8 && cur.instruction.addrValue != 0)
395         {
396             auto new_eip = cast(void*) cur.instruction.addrValue;
397             if (!(new_eip in subs)) disasmSub(new_eip, subs, xrefs, nesting);
398             auto refList = new_eip in xrefs;
399             if (!refList) xrefs[new_eip] ~= loc;
400             else if (!canFind(*refList, loc)) xrefs[new_eip] ~= loc;
401         }
402 
403         // JX/JNX/JMP, stores destination, it may be located after a RET (still in the same SUB)
404         if (cur.instruction.category == InstrCat.CONTROL_TRANSFER
405             && cur.instruction.branch != BranchType.RET
406             && cur.instruction.branch != BranchType.CALL
407             && cur.instruction.addrValue != 0)
408         {
409             void* forward_loc = cast(void*) cur.instruction.addrValue;
410             if (forward_loc > loc && !canFind(forward_jumps, forward_loc))
411                 forward_jumps ~= forward_loc;
412         }
413 
414         sub ~= cur;
415 
416         // check INT3 after JMP, "tail CALL", real RET is in the destination
417         if (sub.length && cur.instruction.opcode == 0xCC
418            && sub[$-1].instruction.category == InstrCat.CONTROL_TRANSFER)
419         {
420             if (sub[$-1].instruction.addrValue != 0)
421             {
422                 auto new_eip = cast(void*) sub[$-1].instruction.addrValue;
423                 if (!(new_eip in subs)) disasmSub(new_eip, subs, xrefs, nesting);
424                 auto refList = new_eip in xrefs;
425                 if (!refList) xrefs[new_eip] ~= loc;
426                 else if (!canFind(*refList, loc)) xrefs[new_eip] ~= loc;
427             }
428             break;
429         }
430 
431         // RET: end of SUB if no more "forward" location.
432         if (cur.instruction.category == InstrCat.CONTROL_TRANSFER
433             && cur.instruction.branch == BranchType.RET
434             && forward_jumps.length == 0) break;
435 
436         loc += len;
437 
438         // removes a forward reference if location is reached
439         auto i = countUntil(forward_jumps, loc);
440         if (i != -1)
441             forward_jumps = remove(forward_jumps, i);
442     }
443 }
444 
445 /// ditto
446 void disasmSub(T)(T eip, ref Subs subs, ref SubsCrossRefs xrefs, ref short nesting)
447 if (is(T==delegate))
448 {
449     disasmSub(eip.funcptr, subs, xrefs, nesting);
450 }
451