1 module beaengine; 2 3 //TODO: integrate documentation 4 5 // Instruction ----------------------------------------------------------------+ 6 7 /// Enumerates the possible consequences the instruction have on an Eflag. 8 enum FlagState : ubyte { 9 /// the flag is tested 10 TE_ = 1, 11 /// the falg is modified 12 MO_ = 2, 13 /// the flag is reset 14 RE_ = 4, 15 /// the flag is set 16 SE_ = 8, 17 /// undefined behaviour 18 UN_ = 0x10, 19 /// the flag is restored to its prior state 20 PR_ = 0x20 21 } 22 23 /// This structure gives information on the EFLAG registers. 24 struct Eflags { 25 align(1): const: 26 FlagState OF_; 27 FlagState SF_; 28 FlagState ZF_; 29 FlagState AF_; 30 FlagState PF_; 31 FlagState CF_; 32 FlagState TF_; 33 FlagState IF_; 34 FlagState DF_; 35 FlagState NT_; 36 FlagState RF_; 37 private ubyte alignment; 38 } 39 40 /// Enumerates the possible instruction sets 41 enum InstrSet : ushort { 42 GENERAL_PURPOSE_INSTRUCTION = 0x1, 43 FPU_INSTRUCTION = 0x2, 44 MMX_INSTRUCTION = 0x4, 45 SSE_INSTRUCTION = 0x8, 46 SSE2_INSTRUCTION = 0x10, 47 SSE3_INSTRUCTION = 0x20, 48 SSSE3_INSTRUCTION = 0x40, 49 SSE41_INSTRUCTION = 0x80, 50 SSE42_INSTRUCTION = 0x100, 51 SYSTEM_INSTRUCTION = 0x200, 52 VM_INSTRUCTION = 0x400, 53 UNDOCUMENTED_INSTRUCTION = 0x800, 54 AMD_INSTRUCTION = 0x1000, 55 ILLEGAL_INSTRUCTION = 0x2000, 56 AES_INSTRUCTION = 0x4000, 57 CLMUL_INSTRUCTION = 0x8000, 58 } 59 60 /// Enumerates the possible instruction categories 61 enum InstrCat : ushort { 62 DATA_TRANSFER = 0x1, 63 ARITHMETIC_INSTRUCTION, 64 LOGICAL_INSTRUCTION, 65 SHIFT_ROTATE, 66 BIT_UInt8, 67 CONTROL_TRANSFER, 68 STRING_INSTRUCTION, 69 InOutINSTRUCTION, 70 ENTER_LEAVE_INSTRUCTION, 71 FLAG_CONTROL_INSTRUCTION, 72 SEGMENT_REGISTER, 73 MISCELLANEOUS_INSTRUCTION, 74 COMPARISON_INSTRUCTION, 75 LOGARITHMIC_INSTRUCTION, 76 TRIGONOMETRIC_INSTRUCTION, 77 UNSUPPORTED_INSTRUCTION, 78 LOAD_CONSTANTS, 79 FPUCONTROL, 80 STATE_MANAGEMENT, 81 CONVERSION_INSTRUCTION, 82 SHUFFLE_UNPACK, 83 PACKED_SINGLE_PRECISION, 84 SIMD128bits, 85 SIMD64bits, 86 CACHEABILITY_CONTROL, 87 FP_INTEGER_CONVERSION, 88 SPECIALIZED_128bits, 89 SIMD_FP_PACKED, 90 SIMD_FP_HORIZONTAL , 91 AGENT_SYNCHRONISATION, 92 PACKED_ALIGN_RIGHT , 93 PACKED_SIGN, 94 PACKED_BLENDING_INSTRUCTION, 95 PACKED_TEST, 96 PACKED_MINMAX, 97 HORIZONTAL_SEARCH, 98 PACKED_EQUALITY, 99 STREAMING_LOAD, 100 INSERTION_EXTRACTION, 101 DOT_PRODUCT, 102 SAD_INSTRUCTION, 103 ACCELERATOR_INSTRUCTION, /* crc32, popcnt (sse4.2) */ 104 ROUND_INSTRUCTION 105 } 106 107 /// Enumerates the possible branche types 108 enum BranchType : int { 109 JO = 1, 110 JC = 2, 111 JE = 3, 112 JA = 4, 113 JS = 5, 114 JP = 6, 115 JL = 7, 116 JG = 8, 117 JB = 2, /* JC == JB */ 118 JECXZ = 10, 119 JMP = 11, 120 CALL= 12, 121 RET = 13, 122 JNO = -1, 123 JNC = -2, 124 JNE = -3, 125 JNA = -4, 126 JNS = -5, 127 JNP = -6, 128 JNL = -7, 129 JNG = -8, 130 JNB = -2 /* JNC == JNB */ 131 } 132 133 /// Output structure describing an instruction 134 struct InstrType { 135 align(1): const: 136 /// instruction category, see the InstrCat enum. 137 InstrCat category; 138 /// instruction set, see the InstrSet enum. 139 InstrSet set; 140 /// the instruction opcode, up to 3 bytes. 141 int opcode; 142 /// the instruction as text, up to 16 chars but null terminated. 143 char[16] mnemonic; 144 /// the instruction branch type, only valid when the category equal to CONTROL_TRANSFER. 145 BranchType branch; 146 /// the eflags modifications, see struct Eflags and enum FlagState. 147 Eflags flags; 148 /// destination address of a branch instruction if <> 0. 149 ulong addrValue; 150 /// If the instruction uses a constant, this immediat value is stored here. 151 long immediat; 152 /// can be interpreted using bit masking according to the ArgType enum 153 ArgType implicitModifiedRegs; 154 } 155 156 // ----------------------------------------------------------------------------- 157 // Argument -------------------------------------------------------------------+ 158 159 enum SegmentReg { 160 ESReg = 1, 161 DSReg = 2, 162 FSReg = 3, 163 GSReg = 4, 164 CSReg = 5, 165 SSReg = 6 166 } 167 168 enum ArgType : uint { 169 NO_ARGUMENT = 0x10000000, 170 REGISTER_TYPE = 0x20000000, 171 MEMORY_TYPE = 0x40000000, 172 CONSTANT_TYPE = 0x80000000, 173 174 MMX_REG = 0x10000, 175 GENERAL_REG = 0x20000, 176 FPU_REG = 0x40000, 177 SSE_REG = 0x80000, 178 CR_REG = 0x100000, 179 DR_REG = 0x200000, 180 SPECIAL_REG = 0x400000, 181 MEMORY_MANAGEMENT_REG = 0x800000, 182 SEGMENT_REG = 0x1000000, 183 184 RELATIVE_ = 0x4000000, 185 ABSOLUTE_ = 0x8000000, 186 187 READ = 0x1, 188 WRITE = 0x2, 189 190 // if ... & 0xF000F0000 = REGISTER_TYPE + FPU_REG then LowWord indicates REGX 191 REG0 = 0x1, //( RAX / MM0 / ST0 / XMM0 / CR0 / DR0 / GDTR / ES ) 192 REG1 = 0x2, //( RCX / MM1 / ST1 / XMM1 / CR1 / DR1 / LDTR / CS ) 193 REG2 = 0x4, //( RDX / MM2 / ST2 / XMM2 / CR2 / DR2 / IDTR / SS ) 194 REG3 = 0x8, //( RBX / MM3 / ST3 / XMM3 / CR3 / DR3 / TR / DS ) 195 REG4 = 0x10, //( RSP / MM4 / ST4 / XMM4 / CR4 / DR4 / ---- / FS ) 196 REG5 = 0x20, //( RBP / MM5 / ST5 / XMM5 / CR5 / DR5 / ---- / GS ) 197 REG6 = 0x40, //( RSI / MM6 / ST6 / XMM6 / CR6 / DR6 / ---- / -- ) 198 REG7 = 0x80, //( RDI / MM7 / ST7 / XMM7 / CR7 / DR7 / ---- / -- ) 199 REG8 = 0x100, //( R8 / --- / --- / XMM8 / CR8 / DR8 / ---- / -- ) 200 REG9 = 0x200, //( R9 / --- / --- / XMM9 / CR9 / DR9 / ---- / -- ) 201 REG10 = 0x400, //( R10 / --- / --- / XMM10 / CR10 / DR10 / ---- / -- ) 202 REG11 = 0x800, //( R11 / --- / --- / XMM11 / CR11 / DR11 / ---- / -- ) 203 REG12 = 0x1000, //( R12 / --- / --- / XMM12 / CR12 / DR12 / ---- / -- ) 204 REG13 = 0x2000, //( R13 / --- / --- / XMM13 / CR13 / DR13 / ---- / -- ) 205 REG14 = 0x4000, //( R14 / --- / --- / XMM14 / CR14 / DR14 / ---- / -- ) 206 REG15 = 0x8000, //( R15 / --- / --- / XMM15 / CR15 / DR15 / ---- / -- ) 207 } 208 209 /// This structure gives information on the memory access type, according to the formula BaseRegister + IndexRegister*Scale + Displacement] 210 struct MemType { 211 align(4): 212 const(int) BaseRegister; 213 const(int) IndexRegister; 214 // 1, 2, 4 or 8 215 const(int) Scale; 216 const(long) Displacement; 217 } 218 219 /// Describes an instruction argument 220 struct Argument { 221 align(1): const: 222 /// the argument as text, up to 64 chars but null terminated 223 char[64] mnemonic; 224 ArgType type; 225 int size; 226 int position; 227 uint accessMode; 228 MemType memory; 229 SegmentReg segmentReg; // only if arg1 or arg2 230 } 231 232 // ----------------------------------------------------------------------------- 233 // High end -------------------------------------------------------------------+ 234 235 /** 236 * Specify the architecture used for the decoding 237 */ 238 enum Archi : uint { 239 ia32 = 0, 240 a8086 = 16, 241 intel64 = 64 242 } 243 244 /** 245 * This field allows you to define some display options. 246 * You can specify the syntax : masm, nasm, goasm or AT&T. 247 * You can specify the number format you want to use : prefixed numbers or suffixed ones. 248 * You can even add a tabulation between the mnemonic and the first operand or 249 * display the segment registers used by the memory addressing. 250 */ 251 enum DisasmOpts : ulong { 252 noTabs = 0x0, 253 Tabs = 0x1, 254 synMasm = 0x000, 255 synGoAsm = 0x100, 256 synNasm = 0x200, 257 synAT = 0x400, 258 prefixedNumeral = 0x10000, 259 suffixedNumeral = 0x00000, 260 showSegmentRegs = 0x01000000 261 } 262 263 enum LockPrefix : ubyte{ 264 NotUsedPrefix = 0, 265 InUsePrefix = 1, 266 SuperfluousPrefix = 2, 267 InvalidPrefix = 4, 268 MandatoryPrefix = 8 269 } 270 271 struct Rex { 272 align(1): 273 ubyte W_; 274 ubyte R_; 275 ubyte X_; 276 ubyte B_; 277 ubyte state; 278 } 279 280 struct PrefixInfo { 281 align(1): const: 282 int Number; 283 int NbUndefined; 284 LockPrefix lock; 285 ubyte OperandSize; 286 ubyte AddressSize; 287 ubyte RepnePrefix; 288 ubyte RepPrefix; 289 ubyte FSPrefix; 290 ubyte SSPrefix; 291 ubyte GSPrefix; 292 ubyte ESPrefix; 293 ubyte CSPrefix; 294 ubyte DSPrefix; 295 ubyte BranchTaken; 296 ubyte BranchNotTaken; 297 Rex rex; 298 char[2] alignment; 299 } 300 301 /** 302 * This structure is used to store the mnemonic, source and destination operands. 303 * You just have to specify the address where the engine has to make the analysis. 304 */ 305 struct DisasmParams { 306 align(1): 307 public: 308 /// input, the entry point 309 void* eip; 310 /// input, when set CALL - JMP - JX/JNX - LOOP are based on this value, not eip 311 ulong virtualAddress; 312 /// input, limits the possible instruction length. the default value, 15, is also the longest possible. 313 uint securityBlock = 15; 314 /// output, instruction and argument as text, up to 64 chars but null terminated. 315 const char[64] asString; 316 /// input, specifies the target architecture, according to the enum Archi. 317 Archi archi; 318 /// input, sepcifies the result format. 319 DisasmOpts options; 320 const: 321 /// output, describes the instruction. 322 InstrType instruction; 323 /// output, optional, describes the first argument. 324 Argument arg1; 325 /// output, optional, describes the second argument. 326 Argument arg2; 327 /// output, optional, describes the third argument. 328 Argument arg3; 329 /// output, PrefixInfo containing an exhaustive list of used prefixes. 330 PrefixInfo prefix; 331 private: 332 uint[40] Reserved_; 333 } 334 335 // ----------------------------------------------------------------------------- 336 // Functions ------------------------------------------------------------------+ 337 338 extern(C) { 339 private int Disasm (DisasmParams * params); 340 const(ubyte*) BeaEngineVersion(); 341 const(ubyte*) BeaEngineRevision(); 342 } 343 344 /** 345 * The Disasm function allows you to decode all instructions coded according to 346 * the rules of IA-32 and Intel 64 architectures. It makes a precise analysis of 347 * the focused instruction and sends back a complete structure that is usable to 348 * make data-flow and control-flow studies. Disasm is able to decode all the 349 * documented intel instructions (standard instructions, FPU, MMX, SSE, SSE2, 350 * SSE3, SSSE3 ,SSE4.1, SSE4.2, VMX, CLMUL and AES technologies) and undocumented 351 * ones like SALC, FEMMS (instruction AMD), HINT_NOP, ICEBP and aliases. 352 * 353 * Params: 354 * params = the parameter to disassemble at a particular address. The structs contains 355 * the result after the call. 356 * 357 * Return: 358 * If the operation is sucessful then the result is equal to the length of the instruction, so a value between 1 and 15. 359 * If the operation fails then the result is either equal to SpecialInfo.UNKNOWN_OPCODE or to SpecialInfo.OUT_OF_BLOCK. 360 * 361 * Examples: 362 * --- 363 * DisasmParams p; 364 * p.eip = &myFunction; 365 * disassemble(&p); 366 * writeln(p.asString); 367 * --- 368 */ 369 int disassemble(DisasmParams * params) 370 { 371 return Disasm(params); 372 } 373 374 // ----------------------------------------------------------------------------- 375 // Other ----------------------------------------------------------------------+ 376 377 static enum LowPosition = 0; 378 static enum HighPosition = 1; 379 380 enum SpecialInfo 381 { 382 UNKNOWN_OPCODE = -1, 383 OUT_OF_BLOCK = 0, 384 385 /* === mask = 0xff */ 386 NoTabulation = 0x00000000, 387 Tabulation = 0x00000001, 388 389 /* === mask = 0xff00 */ 390 MasmSyntax = 0x00000000, 391 GoAsmSyntax = 0x00000100, 392 NasmSyntax = 0x00000200, 393 ATSyntax = 0x00000400, 394 395 /* === mask = 0xff0000 */ 396 PrefixedNumeral = 0x00010000, 397 SuffixedNumeral = 0x00000000, 398 399 /* === mask = 0xff000000 */ 400 ShowSegmentRegs = 0x01000000 401 } 402 403 // -----------------------------------------------------------------------------