Last active
March 8, 2019 21:53
-
-
Save easyaspi314/649f45698b0ba1b2c3e4469b94a7a254 to your computer and use it in GitHub Desktop.
A WIP Thumb-like 8-bit assembly language which interprets C structs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdbool.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#if __STDC_VERSION__ >= 201112L | |
# include <stdnoreturn.h> | |
#elif defined(__GNUC__) | |
# define noreturn __attribute__((__noreturn__)) | |
#else | |
# define noreturn | |
#endif | |
typedef enum { | |
RET, MOV, ADD, SUB, MUL, DIV, | |
LSL, LSR, ASR, AND, | |
OR, XOR, LDR, STR, SWI, LBL, CMP, PUSH, POP, JMP | |
} InstructionCode; | |
typedef enum { | |
sp, r0, r1, r2, r3, r4, r5, r6 | |
} Register; | |
typedef enum { | |
AL, EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE | |
} ConditionCode; | |
typedef enum { | |
PUT_INT, | |
PUT_CHAR, | |
PUT_HEX, | |
PUT_STR, | |
PUT_NEWLINE, | |
GET_INT, | |
GET_CHAR, | |
GET_HEX, | |
DUMP | |
} SysCall; | |
enum { | |
LR = 7 | |
}; | |
// The magic. | |
typedef struct { | |
const unsigned short code : 5; | |
unsigned short reg1 : 3; | |
unsigned short reg2 : 3; | |
unsigned short data : 5; | |
} Instruction; | |
// Flags | |
static bool N, Z, C, V; | |
// Register data | |
static int8_t registers[8] = { 0 }; | |
// Stack memory. You get 127 bytes. A 16 or 32-bit core is coming soon. | |
static int8_t memory[128] = { 0 }; | |
typedef struct { | |
const Instruction *inst; | |
int line; | |
} Label; | |
// Current line | |
static int instruction_line = 0; | |
// The last label we used. | |
static Label last_label = { NULL, 0 }; | |
// Labels. TODO: Improve this mess. | |
static Label labels[7]; | |
// How many labels | |
static uint32_t label_count = 0; | |
// Dumps some information. | |
static void dump(void) | |
{ | |
fputs("registers: ", stderr); | |
for (int i = 0; i < 8; i++) { | |
fprintf(stderr, "%d ", registers[i]); | |
} | |
fputs("\nmemory: ", stderr); | |
for (size_t i = 0; i < sizeof(memory) / sizeof(memory[0]); i++) { | |
if (memory[i] != 0) { | |
fprintf(stderr, "memory[%zu] = %d\n", i, memory[i]); | |
} | |
} | |
} | |
// Prints an error message, dumps, and aborts. | |
noreturn static void crash(const char *error_msg) | |
{ | |
fprintf(stderr, "ERROR: line %d: %s\n", instruction_line, error_msg); | |
dump(); | |
abort(); | |
} | |
// Handles syscalls. | |
static void inst_swi(Instruction inst) | |
{ | |
int tmp; | |
switch (inst.data) { | |
case PUT_CHAR: | |
printf("%c", registers[inst.reg1]); | |
break; | |
case PUT_INT: | |
printf("%d", registers[inst.reg1]); | |
break; | |
case PUT_HEX: | |
printf("%x", (uint8_t)registers[inst.reg1]); | |
break; | |
case PUT_STR: | |
if (registers[inst.reg1] < 0) { | |
crash("Segmentation fault"); | |
} | |
puts((const char *)&memory[registers[inst.reg1]]); | |
break; | |
case PUT_NEWLINE: | |
putchar('\n'); | |
break; | |
case GET_CHAR: | |
scanf(" %c", (char*)®isters[inst.reg1]); | |
break; | |
case GET_INT: | |
scanf("%d", &tmp); | |
registers[inst.reg1] = (int8_t)tmp; | |
break; | |
case GET_HEX: | |
scanf("%x", &tmp); | |
registers[inst.reg1] = (int8_t)tmp; | |
break; | |
case DUMP: | |
dump(); | |
break; | |
} | |
fflush(stdout); | |
} | |
// Moves an immediate or a register value into another register. | |
// TODO: negative values, larger immediates, reading sp. | |
static void inst_mov(Instruction inst) | |
{ | |
if (inst.data || inst.reg2 == 0) | |
registers[inst.reg1] = inst.data; | |
else | |
registers[inst.reg1] = registers[inst.reg2]; | |
} | |
// Just basic operators. | |
#define INST_BASIC_OPERATOR(name, op) \ | |
static void inst_##name(Instruction inst) \ | |
{ \ | |
if (inst.data || inst.reg2 == 0) { \ | |
if (inst.reg2 != 0) \ | |
registers[inst.reg1] = registers[inst.reg2] op inst.data; \ | |
else \ | |
registers[inst.reg1] = registers[inst.reg1] op inst.data; \ | |
} else { \ | |
registers[inst.reg1] = registers[inst.reg1] op registers[inst.reg2]; \ | |
} \ | |
} | |
INST_BASIC_OPERATOR(add, +) | |
INST_BASIC_OPERATOR(sub, -) | |
INST_BASIC_OPERATOR(mul, *) | |
INST_BASIC_OPERATOR(lsl, <<) | |
INST_BASIC_OPERATOR(asr, >>) | |
INST_BASIC_OPERATOR(and, &) | |
INST_BASIC_OPERATOR(or, |) | |
INST_BASIC_OPERATOR(xor, ^) | |
// Logical shift right | |
static void inst_lsr(Instruction inst) | |
{ | |
if (inst.data || inst.reg2 == 0) { | |
if (inst.reg2 != 0) | |
registers[inst.reg1] = (uint32_t)registers[inst.reg2] >> inst.data; | |
else | |
registers[inst.reg1] = (uint32_t)registers[inst.reg1] >> inst.data; | |
} else { | |
registers[inst.reg1] = (uint32_t)registers[inst.reg1] >> registers[inst.reg2]; | |
} | |
} | |
// Divide | |
static void inst_div(Instruction inst) | |
{ | |
if (inst.data || inst.reg2 == 0) { | |
if (inst.data == 0) | |
crash("Floating point exception"); | |
if (inst.reg2 != sp) | |
registers[inst.reg1] = registers[inst.reg2] / inst.data; | |
else | |
registers[inst.reg1] = registers[inst.reg1] / inst.data; | |
} else { | |
if (registers[inst.reg2] == 0) | |
crash("Floating point exception"); | |
registers[inst.reg1] = registers[inst.reg1] / registers[inst.reg2]; | |
} | |
} | |
// Load | |
static void inst_ldr(Instruction inst) | |
{ | |
if (registers[inst.reg2] < 0) | |
crash("Segmentation fault"); | |
registers[inst.reg1] = memory[registers[inst.reg2]]; | |
} | |
// Store | |
static void inst_str(Instruction inst) | |
{ | |
if (registers[inst.reg2] < 0) | |
crash("Segmentation fault"); | |
memory[registers[inst.reg2]] = registers[inst.reg1]; | |
} | |
// Sets a label. | |
// TODO: Allow setting a label below the code, or with a specific number. | |
// TODO: More than 7 labels. | |
static void inst_lbl(const Instruction *inst) | |
{ | |
if (label_count + 1 < LR) { | |
labels[label_count++] = (Label) { inst, instruction_line }; | |
} else { | |
crash("Out of labels"); | |
} | |
} | |
// Jumps to a label or continues. Returns a pointer to the struct holding the | |
// LBL statement if the condition is true, or the same instruction if false. | |
static const Instruction *inst_jmp(const Instruction *inst) | |
{ | |
Label new_inst = { inst, instruction_line }; | |
if (inst->reg1 == LR && last_label.inst != NULL) { | |
new_inst = last_label; | |
} else if (inst->reg1 < label_count) { | |
if (inst->data) { | |
if (inst->data == EQ && Z) { | |
new_inst = labels[inst->reg1]; | |
} else if (inst->data == NE && !Z) { | |
new_inst = labels[inst->reg1]; | |
} | |
// TODO: finish the other flags | |
} else { | |
new_inst = labels[inst->reg1]; | |
} | |
} | |
if (new_inst.inst != inst) { | |
last_label = new_inst; | |
} | |
instruction_line = new_inst.line; | |
return new_inst.inst; | |
} | |
// Does some comparison. | |
// TODO: Maybe update flags in all arithmetic instructions | |
static void inst_cmp(Instruction inst) | |
{ | |
uint32_t a = (uint32_t)registers[inst.reg1]; | |
uint32_t b = (uint32_t)((inst.reg2 == sp) ? inst.data : registers[inst.reg2]); | |
uint32_t result = a - b; | |
N = (int32_t)result < 0; | |
Z = result == 0; | |
V = ((int32_t)a > 0 && (int32_t)b > 0 && (int32_t)result > 0); | |
C = a > result || b > result; | |
} | |
// Pushes to the stack | |
static void inst_push(Instruction inst) | |
{ | |
registers[sp]++; | |
if (registers[sp] < 0) | |
crash("Out of memory"); | |
memory[registers[sp] - 1] = registers[inst.reg1]; | |
} | |
// Pops from the stack and sets the correct register | |
static void inst_pop(Instruction inst) | |
{ | |
if (registers[sp] <= 0) | |
crash("Segmentation fault"); | |
registers[inst.reg1] = memory[registers[sp] - 1]; | |
registers[sp]--; | |
} | |
// Read an instruction tape. | |
void DecodeInstructions(const Instruction* inst) | |
{ | |
instruction_line = 1; | |
for (;; ++inst, ++instruction_line) { | |
switch (inst->code) { | |
case MOV: | |
inst_mov(*inst); | |
break; | |
case ADD: | |
inst_add(*inst); | |
break; | |
case SUB: | |
inst_sub(*inst); | |
break; | |
case MUL: | |
inst_mul(*inst); | |
break; | |
case DIV: | |
inst_div(*inst); | |
break; | |
case LSL: | |
inst_lsl(*inst); | |
break; | |
case LSR: | |
inst_lsr(*inst); | |
break; | |
case ASR: | |
inst_asr(*inst); | |
break; | |
case AND: | |
inst_and(*inst); | |
break; | |
case OR: | |
inst_or(*inst); | |
break; | |
case XOR: | |
inst_xor(*inst); | |
break; | |
case LDR: | |
inst_ldr(*inst); | |
break; | |
case STR: | |
inst_str(*inst); | |
break; | |
case SWI: | |
inst_swi(*inst); | |
break; | |
case LBL: | |
inst_lbl(inst); | |
break; | |
case PUSH: | |
inst_push(*inst); | |
break; | |
case POP: | |
inst_pop(*inst); | |
break; | |
case JMP: | |
inst = inst_jmp(inst); | |
break; | |
case CMP: | |
inst_cmp(*inst); | |
break; | |
default: | |
// Zero out the state. | |
label_count = 0; | |
last_label = (Label) { NULL, 0 }; | |
bzero(labels, sizeof(labels)); | |
bzero(memory, sizeof(memory)); | |
bzero(registers, sizeof(registers)); | |
return; | |
} | |
} | |
} | |
int main() | |
{ | |
// Prints "HI" to the console. | |
const Instruction instructions[] = { | |
{ MOV, r0, .data = 8 }, | |
{ MUL, r0, r0 }, // 8 * 8 = 64 = ascii @ | |
{ ADD, r1, r0, .data = 8 }, // 64 + 8 = 72 = ascii H | |
{ PUSH, r1 }, | |
{ ADD, r1, r0, .data = 9 }, // 64 + 9 = 73 = ascii I | |
{ PUSH, r1 }, | |
{ MOV, r0, .data = 0 }, // null terminate | |
{ PUSH, r0 }, | |
{ SUB, sp, .data = 3 }, | |
{ SWI, sp, .data = PUT_STR }, // print | |
{ RET } | |
}; | |
DecodeInstructions(instructions); | |
// Can you figure out how this works? | |
const Instruction instructions2[] = { | |
{ MOV, r0, .data = 8 }, | |
{ MOV, r1, .data = 9 }, | |
{ MOV, r3, .data = 0 }, | |
{ PUSH, r3 }, | |
{ SUB, r5, r3, .data = 7 }, | |
{ SUB, r6, r3, .data = 8 }, | |
{ MUL, r5, r0 }, | |
{ SUB, r5, .data = 2 }, | |
{ PUSH, r5 }, | |
{ SUB, r4, r3, .data = 4 }, | |
{ PUSH, r6 }, | |
{ MOV, r6, .data = 5 }, | |
{ SUB, r4, .data = 2 }, | |
{ PUSH, r4 }, | |
{ MOV, r2, .data = 3 }, | |
{ PUSH, r2 }, | |
{ SUB, r2, r3, .data = 8 }, | |
{ PUSH, r2 }, | |
{ MUL, r6, .data = 11 }, | |
{ PUSH, r6 }, | |
{ SUB, r4, r3, .data = 8 }, | |
{ ADD, r2, r6 }, | |
{ SUB, r3, r2 }, | |
{ PUSH, r3 }, | |
{ MOV, r2, .data = 0 }, | |
{ MOV, r3, .data = 3 }, | |
{ PUSH, r3 }, | |
{ PUSH, r2 }, | |
{ MOV, r4, .data = 7 }, | |
{ PUSH, r4 }, | |
{ MOV, r2, .data = 0 }, | |
{ SUB, r3, r2, .data = 3 }, | |
{ PUSH, r3 }, | |
{ MUL, r0, r1 }, | |
{ MOV, r1, .data = 12 }, | |
// begin loop | |
{ LBL }, | |
{ SWI, r0, .data = PUT_CHAR }, | |
{ POP, r3 }, | |
{ ADD, r0, r3 }, | |
{ SUB, r1, .data = 1 }, | |
{ CMP, r1, .data = 0 }, | |
{ JMP, 0, .data = NE }, | |
// end loop | |
{ RET } | |
}; | |
DecodeInstructions(instructions2); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment