1 module beaengine;
2 
3 //TODO: integrate documentation
4 
5 // Instruction ----------------------------------------------------------------+
6 
7 /// Enumerates the possible consequences the instruction have on an Eflag.
8 enum FlagState : ubyte {
9     /// the flag is tested
10     TE_ = 1, 
11     /// the falg is modified
12     MO_ = 2,
13     /// the flag is reset
14     RE_ = 4,
15     /// the flag is set
16     SE_ = 8,
17     /// undefined behaviour
18     UN_ = 0x10, 
19     /// the flag is restored to its prior state
20     PR_ = 0x20  
21 }
22 
23 /// This structure gives information on the EFLAG registers.
24 struct Eflags {
25 align(1): const:
26     FlagState OF_;
27     FlagState SF_;
28     FlagState ZF_;
29     FlagState AF_;
30     FlagState PF_;
31     FlagState CF_;
32     FlagState TF_;
33     FlagState IF_;
34     FlagState DF_;
35     FlagState NT_;
36     FlagState RF_;
37     private ubyte alignment;
38 }
39 
40 /// Enumerates the possible instruction sets
41 enum InstrSet : ushort {
42     GENERAL_PURPOSE_INSTRUCTION   =    0x1,
43     FPU_INSTRUCTION               =    0x2,
44     MMX_INSTRUCTION               =    0x4,
45     SSE_INSTRUCTION               =    0x8,
46     SSE2_INSTRUCTION              =   0x10,
47     SSE3_INSTRUCTION              =   0x20,
48     SSSE3_INSTRUCTION             =   0x40,
49     SSE41_INSTRUCTION             =   0x80,
50     SSE42_INSTRUCTION             =  0x100,
51     SYSTEM_INSTRUCTION            =  0x200,
52     VM_INSTRUCTION                =  0x400,
53     UNDOCUMENTED_INSTRUCTION      =  0x800,
54     AMD_INSTRUCTION               = 0x1000,
55     ILLEGAL_INSTRUCTION           = 0x2000,
56     AES_INSTRUCTION               = 0x4000,
57     CLMUL_INSTRUCTION             = 0x8000,
58 }
59 
60 /// Enumerates the possible instruction categories
61 enum InstrCat : ushort {
62     DATA_TRANSFER = 0x1,
63     ARITHMETIC_INSTRUCTION,
64     LOGICAL_INSTRUCTION,
65     SHIFT_ROTATE,
66     BIT_UInt8,
67     CONTROL_TRANSFER,
68     STRING_INSTRUCTION,
69     InOutINSTRUCTION,
70     ENTER_LEAVE_INSTRUCTION,
71     FLAG_CONTROL_INSTRUCTION,
72     SEGMENT_REGISTER,
73     MISCELLANEOUS_INSTRUCTION,
74     COMPARISON_INSTRUCTION,
75     LOGARITHMIC_INSTRUCTION,
76     TRIGONOMETRIC_INSTRUCTION,
77     UNSUPPORTED_INSTRUCTION,
78     LOAD_CONSTANTS,
79     FPUCONTROL,
80     STATE_MANAGEMENT,
81     CONVERSION_INSTRUCTION,
82     SHUFFLE_UNPACK,
83     PACKED_SINGLE_PRECISION,
84     SIMD128bits,
85     SIMD64bits,
86     CACHEABILITY_CONTROL,
87     FP_INTEGER_CONVERSION,
88     SPECIALIZED_128bits,
89     SIMD_FP_PACKED,
90     SIMD_FP_HORIZONTAL ,
91     AGENT_SYNCHRONISATION,
92     PACKED_ALIGN_RIGHT  ,
93     PACKED_SIGN,
94     PACKED_BLENDING_INSTRUCTION,
95     PACKED_TEST,
96     PACKED_MINMAX,
97     HORIZONTAL_SEARCH,
98     PACKED_EQUALITY,
99     STREAMING_LOAD,
100     INSERTION_EXTRACTION,
101     DOT_PRODUCT,
102     SAD_INSTRUCTION,
103     ACCELERATOR_INSTRUCTION,    /* crc32, popcnt (sse4.2) */
104     ROUND_INSTRUCTION
105 }
106 
107 /// Enumerates the possible branche types
108 enum BranchType : int {
109     JO = 1,
110     JC = 2,
111     JE = 3,
112     JA = 4,
113     JS = 5,
114     JP = 6,
115     JL = 7,
116     JG = 8,
117     JB = 2,       /* JC == JB */
118     JECXZ = 10,
119     JMP = 11,
120     CALL= 12,
121     RET = 13,
122     JNO = -1,
123     JNC = -2,
124     JNE = -3,
125     JNA = -4,
126     JNS = -5,
127     JNP = -6,
128     JNL = -7,
129     JNG = -8,
130     JNB = -2      /* JNC == JNB */
131 }
132 
133 /// Output structure describing an instruction
134 struct InstrType {
135 align(1): const:
136     /// instruction category, see the InstrCat enum.
137     InstrCat     category;
138     /// instruction set, see the InstrSet enum.
139     InstrSet     set;
140     /// the instruction opcode, up to 3 bytes.
141     int          opcode;
142     /// the instruction as text, up to 16 chars but null terminated.
143     char[16]     mnemonic;
144     /// the instruction branch type, only valid when the category equal to CONTROL_TRANSFER. 
145     BranchType   branch;
146     /// the eflags modifications, see struct Eflags and enum FlagState.
147     Eflags       flags;
148     /// destination address of a branch instruction if <> 0. 
149     ulong        addrValue;
150     /// If the instruction uses a constant, this immediat value is stored here.
151     long         immediat;
152     /// can be interpreted using bit masking according to the ArgType enum
153     ArgType      implicitModifiedRegs;
154 }
155 
156 // -----------------------------------------------------------------------------
157 // Argument -------------------------------------------------------------------+
158 
159 enum SegmentReg {
160     ESReg = 1,
161     DSReg = 2,
162     FSReg = 3,
163     GSReg = 4,
164     CSReg = 5,
165     SSReg = 6
166 }
167 
168 enum ArgType : uint {
169     NO_ARGUMENT   = 0x10000000,
170     REGISTER_TYPE = 0x20000000,
171     MEMORY_TYPE   = 0x40000000,
172     CONSTANT_TYPE = 0x80000000,
173     
174     MMX_REG       = 0x10000,
175     GENERAL_REG   = 0x20000,
176     FPU_REG       = 0x40000,
177     SSE_REG       = 0x80000,
178     CR_REG        = 0x100000,
179     DR_REG        = 0x200000,
180     SPECIAL_REG           = 0x400000,
181     MEMORY_MANAGEMENT_REG = 0x800000,
182     SEGMENT_REG           = 0x1000000,
183     
184     RELATIVE_ = 0x4000000,
185     ABSOLUTE_ = 0x8000000,
186     
187     READ  = 0x1,
188     WRITE = 0x2,
189     
190     // if ... & 0xF000F0000 = REGISTER_TYPE + FPU_REG then LowWord indicates REGX
191     REG0 = 0x1,     //( RAX / MM0 / ST0 / XMM0  / CR0  / DR0  / GDTR / ES )
192     REG1 = 0x2,     //( RCX / MM1 / ST1 / XMM1  / CR1  / DR1  / LDTR / CS )
193     REG2 = 0x4,     //( RDX / MM2 / ST2 / XMM2  / CR2  / DR2  / IDTR / SS )
194     REG3 = 0x8,     //( RBX / MM3 / ST3 / XMM3  / CR3  / DR3  / TR   / DS )
195     REG4 = 0x10,    //( RSP / MM4 / ST4 / XMM4  / CR4  / DR4  / ---- / FS )
196     REG5 = 0x20,    //( RBP / MM5 / ST5 / XMM5  / CR5  / DR5  / ---- / GS )
197     REG6 = 0x40,    //( RSI / MM6 / ST6 / XMM6  / CR6  / DR6  / ---- / -- )
198     REG7 = 0x80,    //( RDI / MM7 / ST7 / XMM7  / CR7  / DR7  / ---- / -- )
199     REG8 = 0x100,   //( R8  / --- / --- / XMM8  / CR8  / DR8  / ---- / -- )
200     REG9 = 0x200,   //( R9  / --- / --- / XMM9  / CR9  / DR9  / ---- / -- )
201     REG10 = 0x400,  //( R10 / --- / --- / XMM10 / CR10 / DR10 / ---- / -- )
202     REG11 = 0x800,  //( R11 / --- / --- / XMM11 / CR11 / DR11 / ---- / -- )
203     REG12 = 0x1000, //( R12 / --- / --- / XMM12 / CR12 / DR12 / ---- / -- )
204     REG13 = 0x2000, //( R13 / --- / --- / XMM13 / CR13 / DR13 / ---- / -- )
205     REG14 = 0x4000, //( R14 / --- / --- / XMM14 / CR14 / DR14 / ---- / -- )
206     REG15 = 0x8000, //( R15 / --- / --- / XMM15 / CR15 / DR15 / ---- / -- )
207 }
208 
209 /// This structure gives information on the memory access type, according to the formula BaseRegister + IndexRegister*Scale + Displacement]
210 struct MemType {
211 align(4):
212     const(int) BaseRegister;
213     const(int) IndexRegister;
214     // 1, 2, 4 or 8
215     const(int) Scale;
216     const(long) Displacement;
217 }
218 
219 /// Describes an instruction argument
220 struct Argument {
221 align(1): const:
222     /// the argument as text, up to 64 chars but null terminated
223     char[64]    mnemonic;
224     ArgType     type;
225     int         size;
226     int         position;
227     uint        accessMode;
228     MemType     memory;
229     SegmentReg  segmentReg; // only if arg1 or arg2
230 }
231 
232 // -----------------------------------------------------------------------------
233 // High end -------------------------------------------------------------------+
234 
235 /**
236  * Specify the architecture used for the decoding
237  */
238 enum Archi : uint {
239     ia32    = 0,
240     a8086   = 16,
241     intel64 = 64
242 }
243 
244 /**
245  * This field allows you to define some display options. 
246  * You can specify the syntax : masm, nasm, goasm or AT&T. 
247  * You can specify the number format you want to use : prefixed numbers or suffixed ones. 
248  * You can even add a tabulation between the mnemonic and the first operand or 
249  * display the segment registers used by the memory addressing. 
250  */
251 enum DisasmOpts : ulong {
252     noTabs          = 0x0,
253     Tabs            = 0x1,
254     synMasm         = 0x000,
255     synGoAsm        = 0x100,
256     synNasm         = 0x200,
257     synAT           = 0x400,
258     prefixedNumeral = 0x10000,
259     suffixedNumeral = 0x00000,
260     showSegmentRegs = 0x01000000
261 }
262 
263 enum LockPrefix : ubyte{
264     NotUsedPrefix      = 0,
265     InUsePrefix        = 1,
266     SuperfluousPrefix  = 2,
267     InvalidPrefix      = 4,
268     MandatoryPrefix    = 8
269 }
270 
271 struct Rex {
272 align(1):
273     ubyte W_;
274     ubyte R_;
275     ubyte X_;
276     ubyte B_;
277     ubyte state;
278 }
279 
280 struct PrefixInfo {
281 align(1): const:
282     int Number;
283     int NbUndefined;
284     LockPrefix lock;
285     ubyte OperandSize;
286     ubyte AddressSize;
287     ubyte RepnePrefix;
288     ubyte RepPrefix;
289     ubyte FSPrefix;
290     ubyte SSPrefix;
291     ubyte GSPrefix;
292     ubyte ESPrefix;
293     ubyte CSPrefix;
294     ubyte DSPrefix;
295     ubyte BranchTaken;
296     ubyte BranchNotTaken;
297     Rex   rex;
298     char[2] alignment;
299 }   
300 
301 /**
302  * This structure is used to store the mnemonic, source and destination operands. 
303  * You just have to specify the address where the engine has to make the analysis.
304  */
305 struct DisasmParams {
306 align(1):
307 public:
308     /// input, the entry point
309     void*       eip;
310     /// input, when set CALL - JMP - JX/JNX - LOOP are based on this value, not eip
311     ulong       virtualAddress;
312     /// input, limits the possible instruction length. the default value, 15, is also the longest possible.
313     uint        securityBlock = 15;
314     /// output, instruction and argument as text, up to 64 chars but null terminated.
315     const char[64]    asString;
316     /// input, specifies the target architecture, according to the enum Archi.
317     Archi       archi;
318     /// input, sepcifies the result format.
319     DisasmOpts  options;
320 const:
321     /// output, describes the instruction. 
322     InstrType   instruction;
323     /// output, optional, describes the first argument.
324     Argument    arg1;
325     /// output, optional, describes the second argument.
326     Argument    arg2;
327     /// output, optional, describes the third argument.
328     Argument    arg3;
329     /// output, PrefixInfo containing an exhaustive list of used prefixes.
330     PrefixInfo  prefix;
331 private: 
332     uint[40] Reserved_;
333 }
334 
335 // -----------------------------------------------------------------------------
336 // Functions ------------------------------------------------------------------+
337 
338 extern(C) {
339     private int Disasm (DisasmParams * params);
340     const(ubyte*) BeaEngineVersion();
341     const(ubyte*) BeaEngineRevision();
342 }
343 
344 /**
345  * The Disasm function allows you to decode all instructions coded according to 
346  * the rules of IA-32 and Intel 64 architectures. It makes a precise analysis of 
347  * the focused instruction and sends back a complete structure that is usable to 
348  * make data-flow and control-flow studies. Disasm is able to decode all the 
349  * documented intel instructions (standard instructions, FPU, MMX, SSE, SSE2, 
350  * SSE3, SSSE3 ,SSE4.1, SSE4.2, VMX, CLMUL and AES technologies) and undocumented 
351  * ones like SALC, FEMMS (instruction AMD), HINT_NOP, ICEBP and aliases.
352  * 
353  * Params:
354  * params = the parameter to disassemble at a particular address. The structs contains
355  * the result after the call.
356  *
357  * Return:
358  * If the operation is sucessful then the result is equal to the length of the instruction, so a value between 1 and 15.
359  * If the operation fails then the result is either equal to SpecialInfo.UNKNOWN_OPCODE or to SpecialInfo.OUT_OF_BLOCK. 
360  *
361  * Examples:
362  * ---
363  * DisasmParams p;
364  * p.eip = &myFunction;
365  * disassemble(&p);
366  * writeln(p.asString);
367  * ---
368  */
369 int disassemble(DisasmParams * params)
370 {
371     return Disasm(params);
372 }
373 
374 // -----------------------------------------------------------------------------
375 // Other ----------------------------------------------------------------------+
376 
377 static enum LowPosition    = 0;
378 static enum HighPosition   = 1;
379 
380 enum SpecialInfo
381 {
382   UNKNOWN_OPCODE = -1,
383   OUT_OF_BLOCK = 0,
384 
385   /* === mask = 0xff */
386   NoTabulation      = 0x00000000,
387   Tabulation        = 0x00000001,
388 
389   /* === mask = 0xff00 */
390   MasmSyntax        = 0x00000000,
391   GoAsmSyntax       = 0x00000100,
392   NasmSyntax        = 0x00000200,
393   ATSyntax          = 0x00000400,
394 
395   /* === mask = 0xff0000 */
396   PrefixedNumeral   = 0x00010000,
397   SuffixedNumeral   = 0x00000000,
398 
399   /* === mask = 0xff000000 */
400   ShowSegmentRegs   = 0x01000000
401 }
402 
403 // -----------------------------------------------------------------------------