1 /** 2 * Utilities to disassemble some X86/ X86_64 byte code. 3 */ 4 module disassembler; 5 6 import beaengine; 7 import std.format; 8 import std.algorithm; 9 10 private string _prefix; 11 private string _suffix; 12 private string _addrFmtSpec; 13 private string _eol; 14 private EolMode _eolMode; 15 16 static this() 17 { 18 addressSuffix = "h"; 19 eolMode = EolMode.sys; 20 } 21 22 /// end-of-line modes 23 enum EolMode 24 { 25 cr, lf, crlf, sys 26 } 27 28 /** 29 * Defines the end-of-line used to format the disassembling. 30 * Useful under Windows when prettyDisasm() is written to stdout. 31 */ 32 @property EolMode eolMode(){return _eolMode;} 33 /// ditto 34 @property void eolMode(EolMode mode) 35 { 36 _eolMode = mode; 37 import std.ascii : newline; 38 with(EolMode) final switch(mode) 39 { 40 case cr: _eol = "\r"; break; 41 case lf: _eol = "\n"; break; 42 case crlf: _eol = "\r\n"; break; 43 case sys: _eol = newline; break; 44 } 45 } 46 47 /// defines the prefix used to format an address. 48 @property string addressPrefix(){return _prefix;} 49 /// ditto 50 @property void addressPrefix(string p) 51 { 52 _prefix = p; 53 updateAddressFormatSpecifier; 54 } 55 56 57 /// defines the suffix used to format an address. 58 @property string addressSuffix(){return _suffix;} 59 /// ditto 60 @property void addressSuffix(string s) 61 { 62 _suffix = s; 63 updateAddressFormatSpecifier; 64 } 65 66 67 private void updateAddressFormatSpecifier() 68 { 69 if (_prefix.length + _suffix.length > 6) 70 { 71 _prefix = ""; 72 _suffix = "h"; 73 throw new Exception("too long prefix and suffix, default value reset"); 74 } 75 static if (size_t.sizeof == 4) 76 _addrFmtSpec = _prefix ~ "%.8X" ~ _suffix; 77 else static if (size_t.sizeof == 8) 78 _addrFmtSpec = _prefix ~ "%.16X" ~ _suffix; 79 else static assert(0, "unsupported pointer size"); 80 } 81 82 83 /** 84 * Utility used to format an address 85 */ 86 private struct Address 87 { 88 void* _addr; 89 alias _addr this; 90 string toString() const 91 { 92 return format(_addrFmtSpec, _addr); 93 } 94 } 95 96 /// Array of Disasm. Usually used to store a full function. 97 alias Sub = DisasmParams* []; 98 99 /// Associates an array of Disasm to a particular address. 100 alias Subs = Sub[void*]; 101 102 /// Associates an address to an array of address. Used to store the cross references. 103 alias SubsCrossRefs = void*[][void*]; 104 105 /// Handles the translation of the disassembler addresses to a symbol. 106 struct symbolTable 107 { 108 private static string[const(void*)] _symbols; 109 private static string* _lastSymbol; 110 private static void* _lastAddress; 111 private import std.traits: isImplicitlyConvertible, isSomeFunction, PointerTarget; 112 private import std.meta: Alias; 113 114 /// Toggles on or off address translation. Works only for CALL and JMP 115 static bool enable; 116 117 /// Associates the string symbol to address. 118 static void add(bool addType = false, T)(string symbol, const T address) @safe nothrow 119 if (isImplicitlyConvertible!(T,void*) || is(T==delegate) || is(PointerTarget!T==function)) 120 { 121 enable = true; 122 static if (isImplicitlyConvertible!(T,void*) || is(T==function)) 123 { 124 _symbols[address] = symbol; 125 static if (addType) 126 _symbols[address] ~= " (" ~ PointerTarget!T.stringof ~ ")"; 127 } 128 else static if (is(T==delegate)) 129 { 130 _symbols[address.funcptr] = symbol; 131 static if (addType) 132 _symbols[address.funcptr] ~= " (" ~ PointerTarget!T.stringof ~ ")"; 133 } 134 else static assert(0, "unsupported argument type in " 135 ~ __PRETTY_FUNCTION__ ~ " : " ~ T.stringof); 136 } 137 138 /// Adds a free or a static function to the symbol table 139 static void add(alias symbol, string name = "", bool addType = false)() @safe nothrow 140 if (is(typeof(symbol)) && isSomeFunction!(typeof(symbol))) 141 { 142 enable = true; 143 auto funPtr = &symbol; 144 static if (name.length) 145 _symbols[funPtr] = name; 146 else 147 { 148 alias P = Alias!(__traits(parent, symbol)); 149 static if (is(P == struct) || is(P == union) || is(P == class)) 150 _symbols[funPtr] = __traits(parent, symbol).stringof ~ '.' ~ __traits(identifier, symbol); 151 else 152 _symbols[funPtr] = __traits(identifier, symbol); 153 static if (addType) 154 _symbols[funPtr] ~= " (" ~ PointerTarget!(typeof(funPtr)).stringof ~ ")"; 155 } 156 } 157 158 /// Removes the symbol matching to address. 159 static void remove(const void* address) @safe nothrow 160 { 161 _symbols.remove(address); 162 } 163 164 /// Clears the internal container. 165 static void clear() nothrow 166 { 167 foreach(k; _symbols.byKey) 168 _symbols.remove(k); 169 } 170 171 /// Indicates if a symbol is stored for address. 172 deprecated ("use the in operator instead") 173 static bool canFind(const void* address) nothrow 174 { 175 _lastAddress = cast(void*) address; 176 return (_lastSymbol = (_lastAddress in _symbols)) != null; 177 } 178 179 /// Returns a pointer to the symbol name if address is stored. 180 static const(string)* opBinaryRight(string op : "in")(const void* address) 181 nothrow 182 { 183 _lastAddress = cast(void*) address; 184 return _lastAddress in _symbols; 185 } 186 187 /// Returns the symbol associated to address. 188 deprecated ("use the in operator instead") 189 static string symbol(const void* address) nothrow 190 { 191 if (address != _lastAddress) 192 canFind(address); 193 if (_lastSymbol) 194 return *_lastSymbol; 195 else 196 return ""; 197 } 198 199 /** 200 * Scans an entire module and adds its functions to the table. 201 */ 202 static void addModule(alias mod)() @safe nothrow 203 { 204 import std.traits: isSomeFunction; 205 import std.algorithm: canFind; 206 enable = true; 207 foreach(memb;__traits(allMembers,mod)) 208 static if (is(typeof(__traits(getMember, mod, memb)))) 209 static if (isSomeFunction! (__traits(getMember,mod,memb) )) 210 foreach(ov;__traits(getOverloads,mod,memb)) 211 static if (canFind(["package","public","export"],__traits(getProtection,ov))) 212 _symbols[&__traits(getMember,mod,memb)] = memb; 213 } 214 } 215 216 nothrow unittest 217 { 218 219 static void foo(uint a){} 220 221 symbolTable.add("a", cast(void*) 0xF); 222 symbolTable.add("b", cast(void*) 0xFF); 223 symbolTable.add("c", cast(void*) 0xFFF); 224 symbolTable.add!(foo, "", true); 225 226 assert((cast(void*) 0xF) in symbolTable); 227 assert((cast(void*) 0xFF) in symbolTable); 228 assert((cast(void*) 0xFFF) in symbolTable); 229 assert((cast(void*) &foo) in symbolTable); 230 231 assert((cast(void*) 0xE) !in symbolTable); 232 assert((cast(void*) 0xEE) !in symbolTable); 233 assert((cast(void*) 0xEEE) !in symbolTable); 234 assert((cast(void*) 0x0) !in symbolTable); 235 236 assert(symbolTable.symbol(cast(void*) 0xF) == "a"); 237 assert(symbolTable.symbol(cast(void*) 0xFF) == "b"); 238 assert(symbolTable.symbol(cast(void*) 0xFFF) == "c"); 239 240 assert(symbolTable.symbol(cast(void*) 0xEDD) != "c"); 241 assert(symbolTable.symbol(cast(void*) 0xEDD) == ""); 242 243 assert(symbolTable.symbol(cast(void*) &foo) == "foo (void(uint))"); 244 245 symbolTable.clear; 246 assert(symbolTable._symbols.length == 0); 247 } 248 249 250 /** 251 * Formats an array of Disasm as a string. 252 * 253 * Params: 254 * sub = an array of Disasm. 255 * crossRefs = an array of address from where the first sub instruction is called. 256 * 257 * Returns: 258 * a readable representation of the the instructions contained in sub. 259 */ 260 string formatSub(const ref Sub sub, const ref void*[]* crossRefs) 261 { 262 static immutable lastLine = ";--------------------------------------------"; 263 import std.array: Appender; 264 import std..string: fromStringz; 265 Appender!string result; 266 // (numLine + 2 separators) * (address: 2*size_t*sizeof + instruction: 24 chars) 267 result.reserve((sub.length + 2) * (size_t.sizeof * 2 + 24)); 268 269 result ~= format(";------- SUB " ~ _addrFmtSpec ~ " -------" ~ _eol, sub[0].eip); 270 const size_t i = result.data.length; 271 if (symbolTable.enable) if (const(string)* s = sub[0].eip in symbolTable) 272 result ~= "; NAMED: " ~ *s ~ _eol; 273 if (crossRefs != null && crossRefs.length) 274 result ~= format("; XREFS: %s " ~ _eol, cast(Address[])*crossRefs); 275 276 string line; 277 foreach(instr; sub) 278 { 279 auto instrText = fromStringz(instr.asString.ptr); 280 version(X86_64) if ((instr.instruction.opcode & 0xFF) == 0xE8 281 && instr.instruction.addrValue != 0) 282 { 283 import std..string: rightJustify, split; 284 auto splt = split(instrText); 285 if (splt[1].length < 16) 286 { 287 auto len = splt[1].length - 8 + 16; 288 instrText = format("%s %s", splt[0], rightJustify(splt[1], len, '0')); 289 } 290 } 291 result ~= format(_addrFmtSpec ~ " %s", instr.eip, instrText); 292 if (symbolTable.enable && instr.instruction.addrValue) 293 { 294 void* ptr = cast(void*) instr.instruction.addrValue; 295 if (const(string)* s = ptr in symbolTable) 296 result ~= format(" ; (%s)", *s); 297 } 298 result ~= _eol; 299 } 300 result ~= lastLine[0 .. i-1]; 301 result ~= _eol; 302 result ~= _eol; 303 return result.data; 304 } 305 306 307 /** 308 * Disassembles a function and returns its string representation. 309 * 310 * Params: 311 * eip = the entry point, a pointer to a function. 312 * maxNesting = indicates how many consecutive sub fonctions can be disassembled. 313 * 314 * Returns: 315 * a string representing the function(s). 316 * 317 * Examples: 318 * --- 319 * import std.stdio; 320 * import disassembler; 321 * 322 * void main(string[] args) 323 * { 324 * // prints the code generated for main(). 325 * writeln(prettyDisasm(&main, 1)); 326 * } 327 * --- 328 */ 329 string prettyDisasm(void* eip, short maxNesting = 1) 330 { 331 Subs subs; 332 SubsCrossRefs xrefs; 333 string result; 334 if (maxNesting <= 0) maxNesting = 1; 335 short nesting = cast(short) (maxNesting * -1); 336 // 337 disasmSub(eip, subs, xrefs, nesting); 338 auto sortedSubs = sort(subs.keys); 339 foreach(immutable i; 0 .. sortedSubs.length) 340 { 341 const(void*) subAddr = sortedSubs[i]; 342 auto x = (subAddr in xrefs); 343 result ~= formatSub(subs[subAddr], x); 344 } 345 return result; 346 } 347 348 /// ditto 349 string prettyDisasm(T)(T eip, short maxNesting = 1) 350 if (is(T==delegate)) 351 { 352 return prettyDisasm(eip.funcptr, maxNesting); 353 } 354 355 356 /** 357 * Disassembling kernel. 358 * 359 * Disassembles recursively from an address. 360 * 361 * Params: 362 * eip = entry point of the function to disassemble. This must point to some byte-code. 363 * subs = the associative array filled with the functions instructions. 364 * xrefs = the associative array filled with the function callers. 365 * nesting = must initially match the inverse of the maximum nested function call the kernel will disassemble. 366 */ 367 void disasmSub(void * eip, ref Subs subs, ref SubsCrossRefs xrefs, ref short nesting) 368 { 369 void*[] forward_jumps; 370 371 scope(exit) --nesting; 372 if (++nesting > 0) 373 return; 374 375 auto loc = eip; 376 Sub sub = new Sub(0); 377 scope(success) subs[eip] = sub; 378 379 while(true) 380 { 381 DisasmParams * cur = new DisasmParams; 382 383 // info for the disassembling 384 cur.eip = loc; 385 version(X86) cur.archi = Archi.ia32; 386 version(X86_64) cur.archi = Archi.intel64; 387 388 // disassembles 389 const int len = disassemble(cur); 390 if (len <= SpecialInfo.OUT_OF_BLOCK) 391 break; 392 393 // CALL: disassembles any valid target, stores the cross-reference 394 if (cur.instruction.opcode == 0xE8 && cur.instruction.addrValue != 0) 395 { 396 auto new_eip = cast(void*) cur.instruction.addrValue; 397 if (!(new_eip in subs)) disasmSub(new_eip, subs, xrefs, nesting); 398 auto refList = new_eip in xrefs; 399 if (!refList) xrefs[new_eip] ~= loc; 400 else if (!canFind(*refList, loc)) xrefs[new_eip] ~= loc; 401 } 402 403 // JX/JNX/JMP, stores destination, it may be located after a RET (still in the same SUB) 404 if (cur.instruction.category == InstrCat.CONTROL_TRANSFER 405 && cur.instruction.branch != BranchType.RET 406 && cur.instruction.branch != BranchType.CALL 407 && cur.instruction.addrValue != 0) 408 { 409 void* forward_loc = cast(void*) cur.instruction.addrValue; 410 if (forward_loc > loc && !canFind(forward_jumps, forward_loc)) 411 forward_jumps ~= forward_loc; 412 } 413 414 sub ~= cur; 415 416 // check INT3 after JMP, "tail CALL", real RET is in the destination 417 if (sub.length && cur.instruction.opcode == 0xCC 418 && sub[$-1].instruction.category == InstrCat.CONTROL_TRANSFER) 419 { 420 if (sub[$-1].instruction.addrValue != 0) 421 { 422 auto new_eip = cast(void*) sub[$-1].instruction.addrValue; 423 if (!(new_eip in subs)) disasmSub(new_eip, subs, xrefs, nesting); 424 auto refList = new_eip in xrefs; 425 if (!refList) xrefs[new_eip] ~= loc; 426 else if (!canFind(*refList, loc)) xrefs[new_eip] ~= loc; 427 } 428 break; 429 } 430 431 // RET: end of SUB if no more "forward" location. 432 if (cur.instruction.category == InstrCat.CONTROL_TRANSFER 433 && cur.instruction.branch == BranchType.RET 434 && forward_jumps.length == 0) break; 435 436 loc += len; 437 438 // removes a forward reference if location is reached 439 auto i = countUntil(forward_jumps, loc); 440 if (i != -1) 441 forward_jumps = remove(forward_jumps, i); 442 } 443 } 444 445 /// ditto 446 void disasmSub(T)(T eip, ref Subs subs, ref SubsCrossRefs xrefs, ref short nesting) 447 if (is(T==delegate)) 448 { 449 disasmSub(eip.funcptr, subs, xrefs, nesting); 450 } 451