-
-
Save jmiserez/797ed1929543e1c4a860 to your computer and use it in GitHub Desktop.
X86 (subset) interpreter, prototype. Reads instructions from plaintext file, instructions separated by spaces.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* vm.c - Simple X86 interpreter | |
* | |
* Copyright (c) 2013, Jeremie Miserez <[email protected]> | |
*/ | |
/* | |
* Highlights/Features not specified in assignment | |
* =============================================== | |
* - Memory simulation of all 4GB with paging | |
* - Endianness is ensured by correctly reading and writing all bytes | |
* in the correct order (little endian) byte-by-byte. | |
* | |
* - Program is loaded into simulated memory and executed from there | |
* -> The interpreter terminates only once the instruction at %eip is no | |
* longer readable/valid (e.g. all zeros). Thus, self-modifying code | |
* is possible! | |
* | |
* - Graphical visualization of memory/registers/instructions: | |
* -> The memory around EBP and ESP is shown as in textbooks. | |
* -> The individual bytes can be seen in a similar fashion as in a HEX-editor, | |
* with interpretations as bytes, 16bit integers, 32bit integers. | |
* | |
* - Disassembler supports decoding of all addressing modes and possible bytes. | |
* -> New opcodes can be easily added to the decoder with having to change any of | |
* the parts not directly concerned with the opcode itself. | |
* | |
* - Dispatcher/Interpreter supports generalized interpretation of addresses and | |
* registers: | |
* -> Supports all addressing modes. Although not all opcodes are implemented for | |
* addressing modes, the LEA instruction is implemented fully (32/16bits) as an | |
* example. Note that the functions used for LEA could also be reused for | |
* all other operations. | |
* -> Concept of an vm_effective_location, which can be a register or an actual | |
* memory address. This makes it easy to specify a "source" and a "target", | |
* similar to the descriptions in the Intel x86 manual. | |
* -> This makes the interpreter very general, new opcodes can more or less | |
* be implemented without modification of the framework. | |
* | |
* - Support for JNZ, NOP, and additional XOR functions. | |
*/ | |
/* | |
* Memory simulation: 4GB of virtual memory using on-demand paging | |
* =============================================================== | |
* General idea: | |
* - Provide a 4GiB 32bit address space while only allocating the memory actually needed. | |
* - Use a form of paging with 64KiB pages and a fixed page table with 65536 entries. | |
* - Whenever a memory address is accessed, look up the appropriate memory address and if | |
* necessary malloc the appropriate space. | |
* | |
* Pagetable size: | |
* - 65536 entries of 64bits each = 512KiB pagetable | |
* | |
* Reading and writing is done byte by byte in order to ensure correct access across page | |
* boundaries. A virtual address (between 0 and 4GB) as used by the X86 program is used | |
* to access the contents of the simulated memory. | |
* | |
* | |
* | |
* | |
* | |
* A note about segments selectors/descriptor tables: | |
* - As the segment descriptor tables are not available and need not be implemented, we can | |
* (implicitely) assume that our segments look as follows (same as usermode/Linux) | |
* - Code segment: Starts at 0x0, limit 4GB, (Permissions (unchecked): Read/Execute) | |
* -> CS register | |
* - Data segment: Starts at 0x0, limit 4GB, (Permissions (unchecked): Read/Write) | |
* -> SS, DS, ES registers. | |
* - Unused/Unimplemented: Segments pointed to by: | |
* -> FS, GS registers | |
* Also, we assume that the D/B flags are set to 1 (32bit addressing mode) | |
* | |
* Therefore, no translation between logical addresses and effective addresses is necessary, | |
* as the base address is always 0. Also, no check is necessary to determine whether the | |
* address is inside the correct segment, as they all have a 32bit address space. | |
* | |
*/ | |
#include <stdlib.h> | |
#include <stdint.h> // data types | |
#include <stdio.h> // IO etc. | |
#include <ctype.h> //characters | |
#include <stdarg.h> //variadic functions | |
#include <inttypes.h> //PRI macros | |
#include <string.h> | |
#include <errno.h> | |
#include <unistd.h> | |
/* | |
* Constants and macros | |
*/ | |
//Decoder constants | |
#define MAX_INSTRUCTION_LENGTH 15 | |
#define DECODE_STATE_END 0 | |
#define DECODE_STATE_PREFIX 1 | |
#define DECODE_STATE_OPCODE 2 | |
#define DECODE_STATE_CODE_OFFSET 3 | |
#define DECODE_STATE_MODRM 4 | |
#define DECODE_STATE_SIB 5 | |
#define DECODE_STATE_DISPLACEMENT 6 | |
#define DECODE_STATE_IMMEDIATE 7 | |
//Instructions | |
#define OP_NOP 0 | |
#define OP_LEA_8D 1 | |
#define OP_AND_83_4 2 | |
#define OP_PUSH_50 3 | |
#define OP_SAR_C1_7 4 | |
#define OP_XOR_34 5 | |
#define OP_MOV_89 6 | |
#define OP_INC_40 7 | |
#define OP_SUB_83_5 8 | |
#define OP_CMP_80_7 9 | |
#define OP_POP_58 10 | |
#define OP_JNZ_75 11 | |
#define OP_XOR_31 12 | |
//Shared opcodes | |
#define OP_0F 100 | |
#define OP_83 101 | |
#define OP_C1 102 | |
#define OP_80 103 | |
//Register numbers | |
#define EAX 0 | |
#define ECX 1 | |
#define EDX 2 | |
#define EBX 3 | |
#define ESP 4 | |
#define EBP 5 | |
#define ESI 6 | |
#define EDI 7 | |
//For 16bit addressing | |
#define AL 0 | |
#define CL 1 | |
#define DL 2 | |
#define BL 3 | |
#define AH 4 | |
#define CH 5 | |
#define DH 6 | |
#define BH 7 | |
//Segment registers | |
#define CS 0 | |
#define SS 1 | |
#define DS 2 | |
#define ES 3 | |
#define FS 4 | |
#define GS 5 | |
//EFLAGS bits | |
#define CF 0 //Carry | |
#define PF 2 //Parity | |
#define AF 4 //Adjust flag, not implemented/simulated | |
#define ZF 6 //Zero | |
#define SF 7 //Sign | |
#define OF 11 //Overflow | |
//Printing of the stack (how much memory around the stack frame should be output) | |
#define PRINT_MEM_STACK_OUTSIDE_LINES 4 | |
#define PRINT_MEM_STACK_INSIDE_LINES 4 | |
//Vitual memory constants | |
#define MMU_PAGESIZE 0x10000 // =64KiB | |
#define MMU_PAGES 0x10000 // =(4GiB/64KiB) = 65536 pages = 0x100000000 / 0x10000 | |
/* | |
* Types | |
*/ | |
//Singleton: cmdline arguments | |
typedef struct { | |
int loglevel; | |
int interactive; | |
char *inputfile; | |
} vm_cmdline; | |
//Singleton: File input (not used after loading into virtual memory) | |
typedef struct { | |
int length; | |
uint8_t* buffer; | |
} vm_input; | |
//Singleton: Register state | |
typedef struct { | |
uint32_t gp[8]; | |
uint32_t eip; | |
uint32_t eflags; | |
uint32_t seg[6]; | |
} vm_registerfile; | |
//Singleton: Virtual memory state | |
typedef struct { | |
uint8_t** pagetable; //pointer to array of pointers to memory addresses | |
} vm_mmu; | |
//Singleton: Current instruction being executed | |
typedef struct { | |
int halt; | |
int op; | |
uint8_t text[MAX_INSTRUCTION_LENGTH]; | |
uint32_t length; | |
uint8_t operandsizeoverride; | |
uint8_t addrsizeoverride; | |
uint16_t code_offset_1; | |
uint32_t code_offset_2; | |
uint8_t rbrwrd; //hardcoded register as part of opcode (e.g. PUSH 50-57) | |
uint8_t mod; | |
uint8_t rm; | |
uint8_t reg; | |
uint32_t ss; | |
uint8_t index; | |
uint8_t base; | |
uint32_t displacement; | |
uint32_t immediate; | |
int has_modrm; | |
int has_sib; | |
int disp_size; | |
int immed_size; | |
int code_offset_size; | |
int modrm_contains_opcode_extension; | |
} vm_op; | |
//Either a register or memory location | |
typedef struct { | |
int is_gp_register; // is this a register address or a memory address? | |
int reg; //general purpose register number | |
uint32_t address; //virtual address | |
} vm_effective_location; | |
/******************** | |
* Global variables | |
*******************/ | |
vm_cmdline g_cmdline; | |
vm_input g_input; | |
vm_op g_op; | |
vm_registerfile g_regs; | |
vm_mmu g_mmu; | |
int print_counter= 0; | |
/********************* | |
* Function prototypes | |
*********************/ | |
void run_interpreter(); | |
void usage(); | |
void read_input(); | |
void load_program_into_memory(); | |
void decode(); | |
void dispatch(); | |
void set_subtract_eflags32(uint32_t old, uint32_t new, uint32_t value); | |
void set_SF_ZF_PF_eflags32(uint32_t new); | |
vm_effective_location null_location(); | |
vm_effective_location effective_loc32(); | |
vm_effective_location effective_loc16(); | |
uint32_t sib_addr32(); | |
uint16_t rm_addr16(); | |
vm_effective_location register_location(int reg); | |
vm_effective_location memory_location(uint32_t address); | |
uint32_t read_effective_loc32(vm_effective_location src); | |
uint32_t read_effective_loc16(vm_effective_location src); | |
uint32_t read_effective_loc8(vm_effective_location src); | |
void write_effective_loc32(vm_effective_location dest, uint32_t value); | |
void write_effective_loc16(vm_effective_location dest, uint32_t value); | |
void write_effective8(vm_effective_location dest, uint32_t value); | |
void init_g_regs(); | |
void init_g_op(); | |
void print_g_regs(); | |
void print_g_op(); | |
void print_memory(); | |
void print_memory_location(uint32_t address); | |
void print_input(); | |
uint8_t extract_bits8(uint8_t input, int lsb, int num); | |
uint32_t extract_bits32(uint32_t input, int lsb, int num); | |
uint32_t sign_extend8(uint32_t input); | |
uint32_t sign_extend16(uint32_t input); | |
uint32_t zero_extend8(uint32_t input); | |
uint32_t zero_extend16(uint32_t input); | |
uint8_t get_eflag(int flag); | |
uint32_t get_eflag_bit(int flag); | |
void set_eflag(int flag); | |
void write_eflag(int flag, int value); | |
void clear_eflag(int flag); | |
void toggle_eflag(int flag); | |
int parity_lsb(uint32_t value); | |
void init_g_mmu(); | |
void mmu_allocate_page(int pagenumber); | |
uint32_t mmu_read(uint32_t addr, int bytes); | |
uint8_t mmu_readbyte(int pagenumber, uint32_t offset); | |
void mmu_writebyte(int pagenumber, uint32_t offset, uint8_t value); | |
void mmu_write(uint32_t addr, int bytes, uint32_t value); | |
void internal_log(char *tag, char *template, va_list variadic_args, int level); | |
void log_fatal_error(char *template, ...); | |
void log_debug(char *template, ...); | |
/* | |
* Main function, handles cmdline arguments | |
*/ | |
int main(int argc, char **argv) { | |
char opt; | |
g_cmdline.loglevel= 0; | |
g_cmdline.interactive= 0; | |
g_cmdline.inputfile= NULL; | |
int show_usage= 1; | |
// Read and interpret the command line arguments | |
while ((opt= getopt(argc, argv, "f:si")) != EOF) { | |
switch (opt) { | |
case 'f': // input file | |
show_usage= 0; | |
g_cmdline.inputfile= optarg; | |
log_debug("Input file is %s", g_cmdline.inputfile); | |
break; | |
case 's': // stdin | |
show_usage= 0; | |
log_debug("Input is stdin"); | |
break; | |
case 'i': // interactive mode | |
g_cmdline.interactive= 1; | |
log_debug("Interactive mode"); | |
break; | |
default: | |
break; | |
} | |
} | |
if (show_usage) { | |
usage(); | |
} else { | |
run_interpreter(); | |
} | |
exit(0); | |
} | |
/* | |
* Prints usage | |
*/ | |
void usage() { | |
fprintf(stderr, "Usage: vm [i] [-f <file>] [-s] < input \n"); | |
fprintf(stderr, "Options\n"); | |
fprintf(stderr, "\t-i Interactive mode (wait for ENTER after every step)\n"); | |
fprintf(stderr, "\t-f <file> Use file for input\n"); | |
fprintf(stderr, "\t-s Use stdin for input\n"); | |
fprintf(stderr, "Example: vm -f myinput.txt\n"); | |
fprintf(stderr, "Example: vm -s < myinput.txt\n"); | |
} | |
/* | |
* Waits for the user to press enter if in interactive mode. | |
*/ | |
void interactive_wait() { | |
if (g_cmdline.interactive) { | |
printf("(Press ENTER to continue...)"); | |
getchar(); //wait for user input | |
} | |
} | |
/* | |
* Allocate memory for page and add entry to pagetable | |
*/ | |
void mmu_allocate_page(int pagenumber) { | |
//allocate the page | |
void* page= (void*) calloc(1, MMU_PAGESIZE); //cleared to zeros | |
if (page == NULL ) { | |
log_fatal_error("Out of memory"); | |
} | |
//add pagetable entry | |
g_mmu.pagetable[pagenumber]= page; | |
} | |
/* | |
* Read from virtual memory | |
*/ | |
uint32_t mmu_read(uint32_t addr, int bytes) { //bytes must be 0..4 | |
int pagenumber; | |
uint32_t offset; | |
uint32_t value= 0; | |
uint32_t tmp; | |
int total= bytes; | |
int remaining= bytes; | |
while (remaining > 0) { | |
//might need to read across page boundaries, therefore read 1 byte at a time | |
pagenumber= addr / MMU_PAGESIZE; | |
offset= addr % MMU_PAGESIZE; | |
tmp= (uint32_t) mmu_readbyte(pagenumber, offset); | |
// Endianness! | |
// uint32_t: 0xAABBCCDD 0xDEADBEEF | |
// uint16_t: 0xCCDD 0xAABB 0xBEEF 0xDEAD | |
// in memory; DD CC BB AA EF BE AD DE | |
value= value | (tmp << (8 * (total - remaining))); | |
remaining--; | |
addr++; | |
} | |
return value; | |
} | |
/* | |
* Read byte from virtual memory | |
*/ | |
uint8_t mmu_readbyte(int pagenumber, uint32_t offset) { | |
uint8_t* page= g_mmu.pagetable[pagenumber]; | |
if (page == NULL ) { | |
return 0; //read 0x0 if page is unallocated | |
} else { | |
uint8_t* mem= page + offset; | |
return *mem; | |
} | |
} | |
/* | |
* Write byte to virtual memory | |
*/ | |
void mmu_writebyte(int pagenumber, uint32_t offset, uint8_t value) { | |
uint8_t* page= g_mmu.pagetable[pagenumber]; | |
if (page == NULL ) { | |
mmu_allocate_page(pagenumber); | |
page= g_mmu.pagetable[pagenumber]; | |
} | |
uint8_t* mem= page + offset; | |
*mem= value; | |
} | |
/* | |
* Write to virtual memory | |
*/ | |
void mmu_write(uint32_t addr, int bytes, uint32_t value) { | |
int pagenumber; | |
uint32_t offset; | |
int remaining= bytes; | |
int tmp= value; | |
while (remaining > 0) { | |
pagenumber= addr / MMU_PAGESIZE; | |
offset= addr % MMU_PAGESIZE; | |
mmu_writebyte(pagenumber, offset, tmp & ((1 << 8) - 1)); //write only lsb | |
tmp= tmp >> 8; | |
remaining--; | |
addr++; | |
} | |
} | |
/* | |
* Logger using variadic functions, checks loglevel before printing | |
*/ | |
void internal_log(char *tag, char *template, va_list variadic_args, int level) { | |
if (level <= g_cmdline.loglevel) { | |
printf("%s: ", tag); | |
vprintf(template, variadic_args); | |
printf("\n"); | |
} | |
} | |
/* | |
* Log error and terminate | |
*/ | |
void log_fatal_error(char *template, ...) { | |
va_list args; | |
va_start(args, template); | |
internal_log("Fatal error", template, args, 0); | |
va_end(args); | |
exit(1); | |
} | |
/* | |
* Log debug | |
*/ | |
void log_debug(char *template, ...) { | |
va_list args; | |
va_start(args, template); | |
internal_log("DEBUG", template, args, 1); | |
va_end(args); | |
} | |
/* | |
* Reset global singleton | |
*/ | |
void init_g_regs() { | |
g_regs.gp[EAX]= 0xbf8db144; | |
g_regs.gp[ECX]= 0x88c5cffb; | |
g_regs.gp[EDX]= 0x1; | |
g_regs.gp[EBX]= 0xae5ff4; | |
g_regs.gp[ESP]= 0xbf8db0bc; | |
g_regs.gp[EBP]= 0xbf8db118; | |
g_regs.gp[ESI]= 0x9a0ca0; | |
g_regs.gp[EDI]= 0x0; | |
g_regs.eip= 0x8048354; | |
g_regs.eflags= 0x246; | |
g_regs.seg[CS]= 0x73; | |
g_regs.seg[SS]= 0x7b; | |
g_regs.seg[DS]= 0x7b; | |
g_regs.seg[ES]= 0x7b; | |
g_regs.seg[FS]= 0x0; | |
g_regs.seg[GS]= 0x33; | |
} | |
/* | |
* Reset global singleton | |
*/ | |
void init_g_op() { | |
g_op.halt= 0; | |
g_op.op= 0; | |
g_op.length= 0; | |
g_op.operandsizeoverride= 0; | |
g_op.addrsizeoverride= 0; | |
g_op.code_offset_1= 0; | |
g_op.code_offset_2= 0; | |
g_op.rbrwrd= 0; | |
g_op.mod= 0; | |
g_op.rm= 0; | |
g_op.reg= 0; | |
g_op.ss= 0; | |
g_op.index= 0; | |
g_op.base= 0; | |
g_op.displacement= 0; | |
g_op.immediate= 0; | |
g_op.has_modrm= 0; | |
g_op.has_sib= 0; | |
g_op.disp_size= 0; | |
g_op.immed_size= 0; | |
g_op.code_offset_size= 0; | |
g_op.modrm_contains_opcode_extension= 0; | |
} | |
/* | |
* Initialize virtual memory, allocate memory for pagetable | |
*/ | |
void init_g_mmu() { | |
//allocate the pagetable with 4096 pointers. | |
g_mmu.pagetable= (uint8_t**) calloc(MMU_PAGES, sizeof(uint8_t*)); //cleared to zeros | |
if (g_mmu.pagetable == NULL ) { | |
log_fatal_error("Out of memory"); | |
} | |
} | |
/* | |
* Convert int to binary string | |
*/ | |
const char *to_binary8(uint8_t x) { | |
static char bin[9]; //initialized to zeros on program start due to static-ness | |
bin[0]= '\0'; | |
int mask; | |
int bit; | |
for (bit= 7; bit >= 0; bit--) { | |
mask= (1 << bit); | |
strcat(bin, (x & mask) ? "1" : "0"); | |
} | |
return bin; | |
} | |
/* | |
* Convert int to binary string | |
*/ | |
const char *to_binary32(uint32_t x) { | |
static char bin[33]; //initialized to zeros on program start due to static-ness | |
bin[0]= '\0'; | |
strcat(bin, to_binary8(x >> 24)); | |
strcat(bin, to_binary8(x >> 16)); | |
strcat(bin, to_binary8(x >> 8)); | |
strcat(bin, to_binary8(x)); | |
return bin; | |
} | |
/* | |
* Print registers | |
* | |
* |31..16|15-8|7-0| | |
* |AH.|AL.| | |
* |AX.....| | |
* |EAX............| | |
*/ | |
void print_g_regs() { | |
printf("+---------------------------------------------------------------------------------------+\n"); | |
printf("| EAX_AHAL | ECX_CHCL | EDX_DHDL | EBX_BHBL | ESP_SP__ | EBP_BP__ | ESI_SI__ | EDI_DI__ |\n"); | |
printf("| %8x | %8x | %8x | %8x | %8x | %8x | %8x | %8x |\n", g_regs.gp[EAX], g_regs.gp[ECX], g_regs.gp[EDX], g_regs.gp[EBX], g_regs.gp[ESP], g_regs.gp[EBP], g_regs.gp[ESI], g_regs.gp[EDI]); | |
printf("+---------------------------------------------------------------------------------------+\n"); | |
printf("| EFLAGS__ | ____________________O___SZ_A_P_C |"); | |
printf(" C: carry P: parity (A: adjust) |\n"); | |
printf("| %8x | %s |", g_regs.eflags, to_binary32(g_regs.eflags)); | |
printf(" Z: zero S: sign O: overflow |\n"); | |
printf("+---------------------------------------------------------------------------------------+\n"); | |
printf("| CS______ | SS______ | DS______ | ES______ | FS______ | GS______ | | EIP_____ |\n"); | |
printf("| %8x | %8x | %8x | %8x | %8x | %8x | | %8x |\n", | |
g_regs.seg[CS], g_regs.seg[SS], g_regs.seg[DS], g_regs.seg[ES], g_regs.seg[FS], g_regs.seg[GS], g_regs.eip); | |
printf("+---------------------------------------------------------------------------------------+\n"); | |
} | |
/* | |
* Print stack | |
* | |
* ebp+8 esp+40 (memory contents) | |
* ebp+4 esp+44 | |
* ebp+0 esp+48 | |
* ebp-4 esp+4C | |
* ebp-8 esp+50 | |
* ebp-12 esp+54 | |
* ... ... | |
* ebp-80 esp+12 | |
* ebp-84 esp+8 | |
* ebp-88 esp+4 | |
* ebp-9C esp+0 | |
* ebp-90 esp-4 | |
* ebp-94 esp-8 | |
* | |
*/ | |
void print_memory() { | |
printf("+--------------------------------------------------------------------------------+\n"); | |
printf("| Memory | ADDRESS_ | +/- %%esp | +/- %%ebp | Bytes______ | as_int32 | as int16s |\n"); | |
printf("+--------------------------------------------------------------------------------+\n"); | |
printf("| (%%eax) "); | |
print_memory_location(g_regs.gp[EAX]); | |
printf("| (%%ecx) "); | |
print_memory_location(g_regs.gp[ECX]); | |
printf("| (%%edx) "); | |
print_memory_location(g_regs.gp[EDX]); | |
printf("| (%%ebx) "); | |
print_memory_location(g_regs.gp[EBX]); | |
printf("| (%%esp) "); | |
print_memory_location(g_regs.gp[ESP]); | |
printf("| (%%ebp) "); | |
print_memory_location(g_regs.gp[EBP]); | |
printf("| (%%esi) "); | |
print_memory_location(g_regs.gp[ESI]); | |
printf("| (%%edi) "); | |
print_memory_location(g_regs.gp[ESI]); | |
printf("| (%%eip) "); | |
print_memory_location(g_regs.eip); | |
printf("| (%%cs) "); | |
print_memory_location(g_regs.seg[CS]); | |
printf("| (%%ss) "); | |
print_memory_location(g_regs.seg[SS]); | |
printf("| (%%ds) "); | |
print_memory_location(g_regs.seg[DS]); | |
printf("| (%%es) "); | |
print_memory_location(g_regs.seg[ES]); | |
printf("| (%%fs) "); | |
print_memory_location(g_regs.seg[FS]); | |
printf("| (%%gs) "); | |
print_memory_location(g_regs.seg[GS]); | |
printf("|--------------------------------------------------------------------------------|\n"); | |
//start with highest address, work our way down. | |
uint32_t esp= g_regs.gp[ESP]; | |
uint32_t ebp= g_regs.gp[EBP]; | |
uint32_t higher; | |
uint32_t lower; | |
if (ebp >= esp) { | |
higher= ebp; | |
lower= esp; | |
} else { | |
higher= esp; | |
lower= ebp; | |
} | |
int i; | |
for (i= PRINT_MEM_STACK_OUTSIDE_LINES; i > 0; i--) { | |
printf("| Stack "); | |
print_memory_location(higher + 4 * i); | |
} | |
uint32_t curr= higher; | |
while (curr > lower) { | |
printf("| Stack "); | |
print_memory_location(curr); | |
curr= curr - 4; | |
//if there is a gap, print "..." and skip ahead | |
if ((higher - curr) / 4 > PRINT_MEM_STACK_INSIDE_LINES && ((curr - lower) / 4 > PRINT_MEM_STACK_INSIDE_LINES)) { | |
printf("| ... "); | |
printf("| ... | ... | ... | ... | ... | ... |\n"); | |
curr= lower + 4 * PRINT_MEM_STACK_INSIDE_LINES; | |
} | |
} | |
printf("| Stack "); | |
print_memory_location(lower); | |
for (i= 1; i <= PRINT_MEM_STACK_OUTSIDE_LINES; i++) { | |
printf("| Stack "); | |
print_memory_location(lower - 4 * i); | |
} | |
printf("+--------------------------------------------------------------------------------+\n"); | |
} | |
void print_memory_location(uint32_t address) { | |
char sign_espoff= ((int32_t) (address - g_regs.gp[ESP])) >= 0 ? (address == g_regs.gp[ESP] ? ' ' : '+') : '-'; | |
char sign_ebpoff= ((int32_t) (address - g_regs.gp[EBP])) >= 0 ? (address == g_regs.gp[EBP] ? ' ' : '+') : '-'; | |
uint32_t abs_espoff= abs((int32_t) (address - g_regs.gp[ESP])); | |
uint32_t abs_ebpoff= abs((int32_t) (address - g_regs.gp[EBP])); | |
//Read multiple times, do not convert after read to enable spotting memory read bugs. | |
uint32_t byte0= mmu_read(address, 1); | |
uint32_t byte1= mmu_read(address + 1, 1); | |
uint32_t byte2= mmu_read(address + 2, 1); | |
uint32_t byte3= mmu_read(address + 3, 1); | |
uint32_t asint32= mmu_read(address, 4); | |
uint32_t asint16_0= mmu_read(address, 2); | |
uint32_t asint16_1= mmu_read(address + 2, 2); | |
printf("| %8x | %c%8x | %c%8x | %02x %02x %02x %02x | %8x | %4x %4x |\n", address, sign_espoff, abs_espoff, sign_ebpoff, abs_ebpoff, byte0, byte1, byte2, byte3, asint32, asint16_0, asint16_1); | |
} | |
/* | |
* Print decoded instruction | |
*/ | |
void print_g_op() { | |
printf("Instruction: "); | |
int i; | |
for (i= 0; i < g_op.length; i++) { | |
printf("%02x ", g_op.text[i]); | |
} | |
printf("\n"); | |
printf(" - Length: %x (decimal: %d)\n", g_op.length, g_op.length); | |
printf(" - Operand size override: "); | |
printf(g_op.operandsizeoverride ? "yes\n" : "no\n"); | |
printf(" - Address size override: "); | |
printf(g_op.addrsizeoverride ? "yes\n" : "no\n"); | |
printf(" - Code offset: %x:%x\n", g_op.code_offset_1, g_op.code_offset_2); | |
printf(" - rb/rw/rd register: %x\n", g_op.rbrwrd); | |
printf(" - modRM: "); | |
printf(g_op.has_modrm ? "yes\n" : "no\n"); | |
printf(" * mod: %x\n", g_op.mod); | |
printf(" * r/m: %x\n", g_op.rm); | |
printf(" * reg: %x\n", g_op.reg); | |
printf(" - is part of opcode: "); | |
printf(g_op.modrm_contains_opcode_extension ? "yes\n" : "no\n"); | |
printf(" - SIB: "); | |
printf(g_op.has_sib ? "yes\n" : "no\n"); | |
printf(" * ss: %x \n", g_op.ss); | |
printf(" * index: %x \n", g_op.index); | |
printf(" * base: %x \n", g_op.base); | |
printf(" - Displacement bytes: %x \n", g_op.disp_size); | |
printf(" * disp32/disp16: %x (decimal: %d)\n", (int32_t) g_op.displacement, (int32_t) g_op.displacement); | |
printf(" * disp8: %x (decimal: %d)\n", (int32_t) sign_extend8(g_op.displacement), (int32_t) sign_extend8(g_op.displacement)); | |
printf(" - Immediate bytes: %x (decimal: %d)\n", g_op.immed_size, g_op.immed_size); | |
printf(" * 32bit, unsigned: %x (decimal: %d)\n", g_op.immediate, g_op.immediate); | |
printf(" * 32bit, signed: %x (decimal: %d)\n", ((int32_t) g_op.immediate), ((int32_t) g_op.immediate)); | |
printf(" * 16bit, unsigned: %x (decimal: %d)\n", ((uint16_t) g_op.immediate), ((uint16_t) g_op.immediate)); | |
printf(" * 16bit, signed: %x (decimal: %d)\n", ((int16_t) g_op.immediate), ((int16_t) g_op.immediate)); | |
printf(" * 8bit, unsigned: %x (decimal: %d)\n", ((uint8_t) g_op.immediate), ((uint8_t) g_op.immediate)); | |
printf(" * 8bit, signed: %x (decimal: %d)\n", ((int8_t) g_op.immediate), ((int8_t) g_op.immediate)); | |
log_debug("op:\t%x", g_op.op); | |
} | |
/* | |
* Print input read from file | |
*/ | |
void print_input() { | |
int i; | |
for (i= 0; i < g_input.length; i++) { | |
if (i % 16 == 0) { | |
printf("\n"); | |
} | |
printf("%02x ", g_input.buffer[i]); | |
} | |
printf("\n"); | |
} | |
/* | |
* Read file | |
*/ | |
void read_input() { | |
g_input.buffer= NULL; | |
g_input.length= 0; | |
int buffer_length= MAX_INSTRUCTION_LENGTH; | |
g_input.buffer= (uint8_t*) malloc(sizeof(uint8_t) * buffer_length); | |
if (g_input.buffer == NULL ) { | |
log_fatal_error("Out of memory."); | |
} | |
FILE *in_file= NULL; | |
if (g_cmdline.inputfile != NULL ) { | |
in_file= fopen(g_cmdline.inputfile, "rt"); | |
if (in_file == NULL ) { | |
log_fatal_error("Error opening file."); | |
} else { | |
log_debug("Opened file for reading."); | |
} | |
} else { | |
log_debug("Reading from stdin."); | |
} | |
char c; | |
char tmp[3]; | |
int tmppos= 0; | |
tmp[2]= '\0'; | |
do { | |
if (in_file != NULL ) { | |
c= fgetc(in_file); | |
} else { | |
c= getchar(); | |
} | |
if (isxdigit(c)) { | |
tmp[tmppos++]= c; | |
if (tmppos == 2) { //2 characters read | |
tmppos= 0; | |
if (g_input.length >= buffer_length) { | |
buffer_length= buffer_length * 2; | |
g_input.buffer= (uint8_t*) realloc(g_input.buffer, sizeof(uint8_t) * buffer_length); | |
if (g_input.buffer == NULL ) { | |
log_fatal_error("Out of memory: Could not realloc buffer."); | |
} | |
} | |
g_input.buffer[g_input.length]= (uint8_t) strtol(&tmp[0], (char **) NULL, 16); | |
log_debug("Read: %x", g_input.buffer[g_input.length]); | |
g_input.length++; | |
} | |
} | |
} while (c != EOF); | |
log_debug("End of input reached"); | |
if (in_file != NULL ) { | |
fclose(in_file); | |
} | |
} | |
/* | |
* Extract num bits out of input, starting at lsb | |
*/ | |
uint32_t extract_bits32(uint32_t input, int lsb, int num) { | |
return (input >> lsb) & ((1 << num) - 1); | |
} | |
uint8_t extract_bits8(uint8_t input, int lsb, int num) { | |
return (input >> lsb) & ((1 << num) - 1); | |
} | |
/* | |
* Sign extend. | |
* 0000 0000 0000 1000 -> 1111 1111 1111 1000 | |
*/ | |
uint32_t sign_extend8(uint32_t input) { | |
int8_t value= input; | |
return value; | |
} | |
uint32_t sign_extend16(uint32_t input) { | |
int16_t value= input; | |
return value; | |
} | |
/* | |
* Zero extend | |
*/ | |
uint32_t zero_extend8(uint32_t input) { | |
uint8_t value= input; | |
return value; | |
} | |
uint32_t zero_extend16(uint32_t input) { | |
uint16_t value= input; | |
return value; | |
} | |
/* | |
* EFLAG manipulation | |
*/ | |
uint8_t get_eflag(int flag) { | |
return extract_bits32(g_regs.eflags, flag, 1); | |
} | |
uint32_t get_eflag_bit(int flag) { | |
return g_regs.eflags & (1 << flag); | |
} | |
void set_eflag(int flag) { | |
g_regs.eflags|= (1 << flag); | |
} | |
void clear_eflag(int flag) { | |
g_regs.eflags&= ~(1 << flag); | |
} | |
void toggle_eflag(int flag) { | |
g_regs.eflags^= (1 << flag); | |
} | |
void write_eflag(int flag, int value) { | |
if (value) { | |
set_eflag(flag); | |
} else { | |
clear_eflag(flag); | |
} | |
} | |
/* | |
* Calculate parity of LSB | |
* Note: | |
* - There are fast ways to calculate parity using bit operations and multiply only. | |
* - X86 processors only calculate the parity of the LSB for the parity flag | |
*/ | |
int parity_lsb(uint32_t value) { | |
int parity= 0; | |
value= value & 0xFF; //select LSB | |
while (value) { | |
value= value & (value - 1); //remove lowest remaining 1 | |
parity= parity ^ 1; //toggle parity | |
} | |
return parity; | |
} | |
/* | |
* Load program into virtual memory at %eip | |
*/ | |
void load_program_into_memory() { | |
int i; | |
for (i= 0; i < g_input.length; i++) { | |
mmu_write(g_regs.eip + i, 1, g_input.buffer[i]); | |
} | |
} | |
/* | |
* Decode and produce an abstract operation which contains all the information necessary to execute it. | |
*/ | |
void decode() { | |
int state= DECODE_STATE_PREFIX; | |
int numread_disp= 0; | |
int numread_immed= 0; | |
int numread_code_offset= 0; | |
int reread_byte= 0; | |
uint32_t current_address; | |
uint8_t current_byte; | |
init_g_op(); | |
current_address= g_regs.eip; | |
while (state != DECODE_STATE_END) { | |
if (!reread_byte) { | |
current_byte= mmu_read(current_address, 1); | |
g_op.text[g_op.length]= current_byte; //store original bytes | |
g_op.length++; | |
} | |
reread_byte= 0; | |
switch (state) { | |
case DECODE_STATE_PREFIX: | |
switch (current_byte) { | |
case 0xf0: //Group 1: Lock and repeat prefixes | |
case 0xf2: | |
case 0xf3: | |
case 0x2e: //Group 2: Segment override prefixes / branch hints | |
case 0x36: | |
case 0x3e: | |
case 0x26: | |
case 0x64: | |
case 0x65: | |
//(fallthrough of all cases above) | |
break; | |
case 0x66: //Group 3: Operand size override prefix | |
g_op.operandsizeoverride= 1; //use e.g. AX instead of EAX (reg) | |
break; | |
case 0x67: //Group 4: Address size override prefix | |
g_op.addrsizeoverride= 1; //use 16bit addressing table for mod/rm | |
break; | |
default: | |
//no more prefixes found, go to next mode | |
reread_byte= 1; | |
state= DECODE_STATE_OPCODE; | |
break; | |
} | |
break; | |
case DECODE_STATE_OPCODE: | |
switch (current_byte) { | |
case 0x0f: //2 or 3 byte opcode | |
g_op.op= OP_0F; | |
break; | |
case 0x90: //NOP | |
g_op.op= OP_NOP; | |
state= DECODE_STATE_END; | |
break; | |
case 0x8d: //LEA | |
g_op.op= OP_LEA_8D; | |
state= DECODE_STATE_MODRM; | |
break; | |
case 0x83: //83 | |
g_op.op= OP_83; | |
g_op.modrm_contains_opcode_extension= 1; | |
state= DECODE_STATE_MODRM; | |
break; | |
case 0x50: //PUSH | |
case 0x51: | |
case 0x52: | |
case 0x53: | |
case 0x54: | |
case 0x55: | |
case 0x56: | |
case 0x57: | |
g_op.op= OP_PUSH_50; | |
g_op.rbrwrd= current_byte - 0x50; //hardcoded register | |
state= DECODE_STATE_END; | |
break; | |
case 0xC1: //C1 | |
g_op.op= OP_C1; | |
g_op.modrm_contains_opcode_extension= 1; | |
state= DECODE_STATE_MODRM; | |
break; | |
case 0x34: //XOR | |
g_op.op= OP_XOR_34; | |
g_op.immed_size= 1; | |
state= DECODE_STATE_IMMEDIATE; | |
break; | |
case 0x31: //XOR | |
g_op.op= OP_XOR_31; | |
state= DECODE_STATE_MODRM; | |
break; | |
case 0x89: //MOV | |
g_op.op= OP_MOV_89; | |
state= DECODE_STATE_MODRM; | |
break; | |
case 0x40: //INC | |
case 0x41: | |
case 0x42: | |
case 0x43: | |
case 0x44: | |
case 0x45: | |
case 0x46: | |
case 0x47: | |
g_op.op= OP_INC_40; | |
g_op.rbrwrd= current_byte - 0x40; | |
state= DECODE_STATE_END; | |
break; | |
case 0x80: //80 | |
g_op.op= OP_80; | |
g_op.modrm_contains_opcode_extension= 1; | |
state= DECODE_STATE_MODRM; | |
break; | |
case 0x58: //POP | |
case 0x59: | |
case 0x5A: | |
case 0x5B: | |
case 0x5C: | |
case 0x5D: | |
case 0x5E: | |
case 0x5F: | |
g_op.op= OP_POP_58; | |
g_op.rbrwrd= current_byte - 0x58; | |
state= DECODE_STATE_END; | |
break; | |
case 0x75: | |
g_op.op= OP_JNZ_75; | |
state= DECODE_STATE_CODE_OFFSET; | |
break; | |
default: | |
g_op.halt= 1; //invalid instruction | |
state= DECODE_STATE_END; | |
break; | |
} | |
break; | |
case DECODE_STATE_CODE_OFFSET: | |
if (numread_code_offset < 2) { | |
g_op.code_offset_1|= (current_byte << (8 * numread_code_offset)); | |
} else { | |
g_op.code_offset_2|= (current_byte << (8 * numread_code_offset)); | |
} | |
numread_code_offset++; | |
if (numread_code_offset < g_op.code_offset_size) { | |
state= DECODE_STATE_CODE_OFFSET; | |
} else { | |
switch (g_op.op) { | |
default: | |
state= DECODE_STATE_END; | |
} | |
} | |
break; | |
case DECODE_STATE_MODRM: | |
g_op.has_modrm= 1; | |
g_op.mod= extract_bits8(current_byte, 6, 2); | |
g_op.rm= extract_bits8(current_byte, 0, 3); | |
g_op.reg= extract_bits8(current_byte, 3, 3); | |
int general_opcode= g_op.op; | |
if (g_op.modrm_contains_opcode_extension) { | |
switch (g_op.op) { | |
case OP_80: | |
switch (g_op.reg) { | |
case 7: //CMP | |
g_op.op= OP_CMP_80_7; | |
g_op.immed_size= 1; | |
break; | |
} | |
break; | |
case OP_83: | |
switch (g_op.reg) { | |
case 4: //AND | |
g_op.op= OP_AND_83_4; | |
g_op.immed_size= 1; | |
break; | |
case 5: //SUB | |
g_op.op= OP_SUB_83_5; | |
g_op.immed_size= 1; | |
break; | |
} | |
break; | |
case OP_C1: | |
switch (g_op.reg) { | |
case 7: //SAR | |
g_op.op= OP_SAR_C1_7; | |
g_op.immed_size= 1; | |
break; | |
} | |
break; | |
} | |
if (g_op.op == general_opcode) { | |
g_op.halt= 1; //no instruction found, thus invalid op | |
} | |
} | |
//32bit addressing | |
//General case | |
switch (g_op.mod) { | |
case 0: | |
case 3: | |
g_op.disp_size= 0; | |
break; | |
case 1: | |
g_op.disp_size= 1; | |
break; | |
case 2: | |
if (g_op.addrsizeoverride) { | |
g_op.disp_size= 2; //16bits | |
} else { | |
g_op.disp_size= 4; //32bits | |
} | |
break; | |
} | |
// Special cases, exceptions to the rule | |
if (g_op.addrsizeoverride) { //16bit addressing | |
if (g_op.mod == 0 && g_op.rm == 6) { | |
g_op.disp_size= 2; | |
} | |
} else { //32bit addressing | |
if (g_op.rm == 4 && (g_op.mod == 0 || g_op.mod == 1 || g_op.mod == 2)) { | |
g_op.has_sib= 1; | |
} | |
if (g_op.mod == 0 && g_op.rm == 5) { | |
g_op.disp_size= 4; | |
} | |
} | |
if (g_op.has_sib) { | |
state= DECODE_STATE_SIB; | |
} else if (numread_disp < g_op.disp_size) { | |
state= DECODE_STATE_DISPLACEMENT; | |
} else if (numread_immed < g_op.immed_size) { | |
state= DECODE_STATE_IMMEDIATE; | |
} else { | |
state= DECODE_STATE_END; | |
} | |
break; | |
case DECODE_STATE_SIB: | |
g_op.ss= extract_bits8(current_byte, 6, 2); | |
g_op.index= extract_bits8(current_byte, 0, 3); | |
g_op.base= extract_bits8(current_byte, 3, 3); | |
if (numread_disp < g_op.disp_size) { | |
state= DECODE_STATE_DISPLACEMENT; | |
} else if (numread_immed < g_op.immed_size) { | |
state= DECODE_STATE_IMMEDIATE; | |
} else { | |
state= DECODE_STATE_END; | |
} | |
break; | |
case DECODE_STATE_DISPLACEMENT: | |
// in text: EF BE AD DE | |
// uint32_t: 0xAABBCCDD 0xDEADBEEF | |
// uint16_t: 0xCCDD 0xAABB 0xBEEF 0xDEAD | |
// i.e LSB is the first byte read | |
g_op.displacement= g_op.displacement | (current_byte << (8 * numread_disp)); | |
numread_disp++; | |
if (numread_disp < g_op.disp_size) { | |
state= DECODE_STATE_DISPLACEMENT; | |
} else if (numread_immed < g_op.immed_size) { | |
state= DECODE_STATE_IMMEDIATE; | |
} else { | |
state= DECODE_STATE_END; | |
} | |
break; | |
case DECODE_STATE_IMMEDIATE: | |
g_op.immediate= g_op.immediate | (current_byte << (8 * numread_immed)); | |
numread_immed++; | |
if (numread_immed < g_op.immed_size) { | |
state= DECODE_STATE_IMMEDIATE; | |
} else { | |
state= DECODE_STATE_END; | |
} | |
break; | |
} | |
if (!reread_byte) { | |
current_address++; //advance by 1 byte | |
} | |
} | |
if (!g_op.halt) { | |
log_debug("Decoded instruction."); | |
} else { | |
log_debug("Could not decode instruction."); | |
} | |
} | |
/* | |
* Return a new NULL location | |
*/ | |
vm_effective_location null_location() { | |
vm_effective_location e; | |
e.is_gp_register= 0; | |
e.reg= 0; | |
e.address= 0; | |
return e; | |
} | |
/* | |
* Calculate effective location in 32bit addressing mode | |
*/ | |
vm_effective_location effective_loc32() { | |
vm_effective_location effective= null_location(); | |
switch (g_op.mod) { | |
case 0: | |
switch (g_op.rm) { | |
case 4: | |
effective= memory_location(sib_addr32()); | |
break; | |
case 5: | |
effective= memory_location(g_op.displacement); | |
break; | |
default: | |
effective= memory_location(g_regs.gp[g_op.rm]); | |
break; | |
} | |
break; | |
case 1: | |
switch (g_op.rm) { | |
case 4: | |
effective= memory_location(sib_addr32() + sign_extend8(g_op.displacement)); | |
break; | |
default: | |
effective= memory_location(g_regs.gp[g_op.rm] + sign_extend8(g_op.displacement)); | |
break; | |
} | |
break; | |
case 2: | |
switch (g_op.rm) { | |
case 4: | |
effective= memory_location(sib_addr32() + g_op.displacement); | |
break; | |
default: | |
effective= memory_location(g_regs.gp[g_op.rm] + g_op.displacement); | |
break; | |
} | |
break; | |
case 3: | |
effective= register_location(g_op.rm); | |
break; | |
} | |
return effective; | |
} | |
/* | |
* Interpret SIB byte | |
*/ | |
uint32_t sib_addr32() { | |
uint32_t scaled_index= 0; | |
if (g_op.index != 4) { | |
scaled_index= g_regs.gp[g_op.index] << g_op.ss; | |
} | |
if (g_op.base == 5 && g_op.mod == 0) { | |
return scaled_index + g_op.displacement; | |
} else { | |
return g_regs.gp[g_op.base] + scaled_index; | |
} | |
} | |
/* | |
* Calculate effective location in 16bit addressing mode | |
*/ | |
vm_effective_location effective_loc16() { | |
vm_effective_location effective= null_location(); | |
switch (g_op.mod) { | |
case 0: | |
switch (g_op.rm) { | |
case 6: | |
effective= memory_location(g_op.displacement); | |
break; | |
default: | |
effective= memory_location(rm_addr16()); | |
break; | |
} | |
break; | |
case 1: | |
effective= memory_location(rm_addr16() + sign_extend8(g_op.displacement)); | |
break; | |
case 2: | |
effective= memory_location(rm_addr16() + g_op.displacement); | |
break; | |
case 3: | |
effective.is_gp_register= 1; | |
effective.reg= g_op.rm; | |
break; | |
} | |
effective= memory_location(zero_extend16(effective.address)); //clear 2 MSBs | |
return effective; | |
} | |
/* | |
* Calculate base addresses from ModRM byte in 16bit addressing mode | |
* | |
* 0: [BX+SI] | |
* 1: [BX+DI] | |
* 2: [BP+SI] | |
* 3: [BP+DI] | |
* 4: [SI] | |
* 5: [DI] | |
* 6: [BP] | |
* 7: [BX] | |
*/ | |
uint16_t rm_addr16() { | |
switch (g_op.rm) { | |
case 0: | |
return (uint16_t) (zero_extend16(g_regs.gp[EBX]) + zero_extend16(g_regs.gp[ESI])); | |
case 1: | |
return (uint16_t) (zero_extend16(g_regs.gp[EBX]) + zero_extend16(g_regs.gp[EDI])); | |
case 2: | |
return (uint16_t) (zero_extend16(g_regs.gp[EBP]) + zero_extend16(g_regs.gp[ESI])); | |
case 3: | |
return (uint16_t) (zero_extend16(g_regs.gp[EBP]) + zero_extend16(g_regs.gp[EDI])); | |
case 4: | |
return (uint16_t) zero_extend16(g_regs.gp[ESI]); | |
case 5: | |
return (uint16_t) zero_extend16(g_regs.gp[EDI]); | |
case 6: | |
return (uint16_t) zero_extend16(g_regs.gp[EBP]); | |
case 7: | |
return (uint16_t) zero_extend16(g_regs.gp[EBX]); | |
} | |
log_fatal_error("rm not in range 0-7"); | |
return 0; //get rid of compiler warnings | |
} | |
/* | |
* Return a register location | |
*/ | |
vm_effective_location register_location(int reg) { | |
vm_effective_location addr= null_location(); | |
addr.is_gp_register= 1; | |
addr.reg= reg; | |
return addr; | |
} | |
/* | |
* Return a memory location | |
*/ | |
vm_effective_location memory_location(uint32_t address) { | |
vm_effective_location addr= null_location(); | |
addr.address= address; | |
return addr; | |
} | |
/* | |
* Read from a location | |
*/ | |
uint32_t read_effective_loc32(vm_effective_location src) { | |
if (src.is_gp_register) { | |
return g_regs.gp[src.reg]; | |
} else { | |
return mmu_read(src.address, 4); | |
} | |
} | |
uint32_t read_effective_loc16(vm_effective_location src) { | |
if (src.is_gp_register) { | |
return g_regs.gp[src.reg] & 0xFFFF; | |
} else { | |
return mmu_read(src.address, 2); | |
} | |
} | |
/* | |
* Read only from 8bits. For 8-bit registers, the allocations are different: | |
* 0: AL 4: AH | |
* 1: CL 5: CH | |
* 2: DL 6: DH | |
* 3: BL 7: BH | |
*/ | |
uint32_t read_effective_loc8(vm_effective_location src) { | |
if (src.is_gp_register) { | |
if (src.reg < 4) { | |
return g_regs.gp[src.reg] & 0xFF; | |
} else { | |
return g_regs.gp[src.reg - 4] & 0xFF00; | |
} | |
} else { | |
return mmu_read(src.address, 1); | |
} | |
} | |
/* | |
* Write to a location. | |
*/ | |
void write_effective_loc32(vm_effective_location dest, uint32_t value) { | |
if (dest.is_gp_register) { | |
g_regs.gp[dest.reg]= value; | |
} else { | |
mmu_write(dest.address, 4, value); | |
} | |
} | |
/* | |
* Write only to lower 16bits | |
*/ | |
void write_effective_loc16(vm_effective_location dest, uint32_t value) { | |
if (dest.is_gp_register) { | |
g_regs.gp[dest.reg]= (g_regs.gp[dest.reg] & 0xFFFF0000) | (value & 0xFFFF); | |
} else { | |
mmu_write(dest.address, 2, value); | |
} | |
} | |
/* | |
* Write only to 8bits. For 8-bit registers, the allocations are different: | |
* 0: AL 4: AH | |
* 1: CL 5: CH | |
* 2: DL 6: DH | |
* 3: BL 7: BH | |
*/ | |
void write_effective8(vm_effective_location dest, uint32_t value) { | |
if (dest.is_gp_register) { | |
if (dest.reg < 4) { | |
g_regs.gp[dest.reg]= (g_regs.gp[dest.reg] & 0xFFFFFF00) | (value & 0xFF); | |
} else { | |
g_regs.gp[dest.reg - 4]= (g_regs.gp[dest.reg - 4] & 0xFFFF00FF) | (value & 0xFF00); | |
} | |
} else { | |
mmu_write(dest.address, 1, value); | |
} | |
} | |
void set_SF_ZF_PF_eflags32(uint32_t new) { | |
write_eflag(SF, ((int32_t) new) < 0); | |
write_eflag(ZF, new == 0); | |
write_eflag(PF, parity_lsb(new)); | |
} | |
void set_subtract_eflags32(uint32_t old, uint32_t new, uint32_t value) { | |
// CF for invalid unsigned | |
if (value > old) { //unsigned comparison | |
set_eflag(CF); //borrowed | |
} else { | |
clear_eflag(CF); | |
} | |
// OF for invalid signed | |
if (((((int32_t) new) > ((int32_t) old)) && ((int32_t) value) > 0) || ((((int32_t) new) > ((int32_t) old)) && ((int32_t) value) > 0)) { | |
set_eflag(OF); //result is impossible in signed | |
} else { | |
clear_eflag(OF); | |
} | |
set_SF_ZF_PF_eflags32(new); | |
} | |
void set_subtract_eflags8(uint8_t old, uint8_t new, uint8_t value) { | |
// CF for invalid unsigned | |
if (value > old) { //unsigned comparison | |
set_eflag(CF); //borrowed | |
} else { | |
clear_eflag(CF); | |
} | |
// OF for invalid signed | |
if (((((int8_t) new) > ((int8_t) old)) && ((int8_t) value) > 0) || ((((int8_t) new) > ((int8_t) old)) && ((int8_t) value) > 0)) { | |
set_eflag(OF); //result is impossible in signed | |
} else { | |
clear_eflag(OF); | |
} | |
set_SF_ZF_PF_eflags32(new); | |
} | |
/* | |
* Main dispatcher | |
*/ | |
void dispatch() { | |
//temporary variables for general usage | |
uint32_t old; | |
uint32_t new; | |
uint32_t value; | |
uint32_t cf; | |
vm_effective_location dest; | |
vm_effective_location src; | |
int i; | |
uint32_t times; | |
int no_increment_eip= 0; | |
switch (g_op.op) { | |
case OP_NOP: | |
printf("#### %d #### EXECUTE: 90 --> NOP <-- No operation ####\n", print_counter++); | |
//do nothing | |
break; | |
case OP_LEA_8D: | |
if (g_op.operandsizeoverride) { | |
//16bit operand | |
printf("#### %d #### EXECUTE: 8D /r --> LEA r16,m <-- Store effective address for m in register r16 ####\n", print_counter++); | |
if (g_op.addrsizeoverride) { | |
//16 bit address | |
src= effective_loc16(); | |
} else { | |
//32bit address | |
src= effective_loc32(); | |
} | |
if (src.is_gp_register) { | |
log_fatal_error("r/m not valid, need m"); | |
} | |
dest= register_location(g_op.reg); | |
write_effective_loc16(dest, src.address); //Load effective address into destination register | |
} else { | |
//32bit operand | |
printf("#### %d #### EXECUTE: 8D /r --> LEA r32,m <-- Store effective address for m in register r32 ####\n", print_counter++); | |
if (g_op.addrsizeoverride) { | |
//16 bit address | |
src= effective_loc16(); | |
} else { | |
//32bit address | |
src= effective_loc32(); | |
} | |
if (src.is_gp_register) { | |
log_fatal_error("r/m not valid, need m"); | |
} | |
dest= register_location(g_op.reg); | |
write_effective_loc32(dest, src.address); //Load effective address into destination register | |
} | |
break; | |
case OP_AND_83_4: | |
printf("#### %d #### EXECUTE: 83 /4 ib --> AND r/m32,imm8 <-- r/m16 AND imm8 (sign-extended) ####\n", print_counter++); | |
dest= effective_loc32(); | |
old= read_effective_loc32(dest); | |
new= old & sign_extend8(g_op.immediate); | |
write_effective_loc32(dest, new); //write to r/m | |
clear_eflag(OF); | |
clear_eflag(CF); | |
set_SF_ZF_PF_eflags32(new); | |
break; | |
case OP_PUSH_50: | |
printf("#### %d #### EXECUTE: 50+rd --> PUSH r32 <-- Push r32 ####\n", print_counter++); | |
src= register_location(g_op.rbrwrd); | |
new= read_effective_loc32(src); //value to be pushed onto the stack | |
g_regs.gp[ESP]= g_regs.gp[ESP] - 4; //decrement ESP | |
dest= memory_location(g_regs.gp[ESP]); | |
write_effective_loc32(dest, new); | |
break; | |
case OP_SAR_C1_7: | |
printf("#### %d #### EXECUTE: C1 /7 ib --> SAR r/m32,imm8 <-- Signed divide* r/m32 by 2, imm8 times ####\n", print_counter++); | |
dest= effective_loc32(); | |
old= read_effective_loc32(dest); | |
new= old; | |
times= g_op.immediate; | |
//simple implementation as in specification | |
i= times; | |
while (i > 0) { | |
cf= new & 0x1; | |
new= (((int32_t) new) >> 1); | |
i--; | |
} | |
write_effective_loc32(dest, new); //write to r/m | |
if (times != 0) { | |
if (times == 1) { | |
clear_eflag(OF); | |
} | |
write_eflag(CF, cf); | |
set_SF_ZF_PF_eflags32(new); | |
} | |
break; | |
case OP_XOR_34: | |
printf("#### %d #### EXECUTE: 34 ib --> XOR AL,imm8 <-- AL XOR imm8 ####\n", print_counter++); | |
src= register_location(AL); | |
old= read_effective_loc8(src); | |
new= old ^ g_op.immediate; | |
write_effective8(register_location(AL), new); | |
clear_eflag(CF); | |
clear_eflag(OF); | |
set_SF_ZF_PF_eflags32(new); | |
break; | |
case OP_XOR_31: | |
printf("#### %d #### EXECUTE: 31 /r --> XOR r/m32,r32 <-- r/m32 XOR r32 ####\n", print_counter++); | |
src= effective_loc32(); | |
old= read_effective_loc32(src); | |
value= read_effective_loc32(register_location(g_op.reg)); | |
new= old ^ value; | |
write_effective_loc32(src, new); | |
clear_eflag(CF); | |
clear_eflag(OF); | |
set_SF_ZF_PF_eflags32(new); | |
break; | |
case OP_MOV_89: | |
printf("#### %d #### EXECUTE: 89 /r --> MOV r/m32,r32 <-- Move r32 to r/m32 ####\n", print_counter++); | |
src= register_location(g_op.reg); | |
old= read_effective_loc32(src); | |
dest= effective_loc32(); | |
write_effective_loc32(dest, old); | |
break; | |
case OP_INC_40: | |
printf("#### %d #### EXECUTE: 40+ rd --> INC r32 <-- Increment doubleword register by 1 ####\n", print_counter++); | |
src= register_location(g_op.rbrwrd); | |
old= read_effective_loc32(src); | |
new= old + 1; | |
write_effective_loc32(src, new); | |
set_subtract_eflags32(old, new, 1); | |
break; | |
case OP_SUB_83_5: | |
printf("#### %d #### EXECUTE: 83 /5 ib --> SUB r/m32,imm8 <-- Subtract sign-extended imm8 from r/m32 ####\n", print_counter++); | |
src= effective_loc32(); | |
; | |
old= read_effective_loc32(src); | |
value= sign_extend8(g_op.immediate); | |
new= old - value; | |
write_effective_loc32(src, new); | |
set_subtract_eflags32(old, new, (value * -1)); | |
break; | |
case OP_CMP_80_7: | |
printf("#### %d #### EXECUTE: 80 /7 ib --> CMP r/m8, imm8 <-- Compare imm8 with r/m8 ####\n", print_counter++); | |
src= effective_loc32(); | |
; | |
old= read_effective_loc32(src); | |
value= sign_extend8(g_op.immediate); | |
new= (((uint8_t) old) - ((uint8_t) value)); | |
set_subtract_eflags8(((uint8_t) old), ((uint8_t) new), ((uint8_t) value)); | |
break; | |
case OP_POP_58: | |
printf("#### %d #### EXECUTE: 58+ rd --> POP r32 <-- Pop top of stack into r32; increment stack pointer ####\n", print_counter++); | |
dest= register_location(g_op.rbrwrd); | |
src= memory_location(g_regs.gp[ESP]); | |
new= read_effective_loc32(src); | |
write_effective_loc32(dest, new); | |
g_regs.gp[ESP]= g_regs.gp[ESP] + 4; //increment ESP | |
break; | |
case OP_JNZ_75: | |
printf("#### %d #### EXECUTE: 75 cb --> JNZ rel8 <-- Jump short if not zero (ZF=0) ####\n", print_counter++); | |
if (get_eflag(ZF) == 0) { | |
no_increment_eip= 1; | |
g_regs.eip= g_regs.eip + g_op.length + sign_extend8(g_op.code_offset_1); | |
} | |
break; | |
default: | |
log_fatal_error("Instruction unknown."); | |
break; | |
} | |
if (!no_increment_eip) { | |
g_regs.eip= g_regs.eip + g_op.length; | |
} | |
} | |
/* | |
* Main loop | |
*/ | |
void run_interpreter() { | |
init_g_regs(); | |
init_g_mmu(); | |
printf("#### %d #### ACTION: Reading program from input ####\n", print_counter++); | |
read_input(); | |
printf("#### %d #### PRINT: Complete program that was read: ####\n", print_counter++); | |
interactive_wait(); | |
print_input(); | |
printf("#### %d #### ACTION: Loading program into simulated memory ####\n", print_counter++); | |
load_program_into_memory(); | |
printf("#### %d #### PRINT: Initial register/memory/stack contents: ####\n", print_counter++); | |
interactive_wait(); | |
print_g_regs(); | |
print_memory(); | |
do { | |
printf("#### %d #### ACTION: Decoding next instruction at (%%eip) ####\n", print_counter++); | |
decode(); | |
if (!g_op.halt) { | |
printf("#### %d #### PRINT: Decoded instruction: ####\n", print_counter++); | |
interactive_wait(); | |
print_g_op(); | |
printf("#### %d #### ACTION: Dispatching instruction ####\n", print_counter++); | |
dispatch(); | |
printf("#### %d #### PRINT: Register/memory/stack contents: ####\n", print_counter++); | |
interactive_wait(); | |
print_g_regs(); | |
print_memory(); | |
} else { | |
printf("#### %d #### ACTION: Halting execution as next instruction is NULL or unknown ####\n", print_counter++); | |
} | |
} while (!g_op.halt); | |
log_debug("Finished execution of program without errors."); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment