Skip to content

Instantly share code, notes, and snippets.

@easyaspi314
Last active March 8, 2019 21:53
Show Gist options
  • Save easyaspi314/649f45698b0ba1b2c3e4469b94a7a254 to your computer and use it in GitHub Desktop.
Save easyaspi314/649f45698b0ba1b2c3e4469b94a7a254 to your computer and use it in GitHub Desktop.
A WIP Thumb-like 8-bit assembly language which interprets C structs
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#if __STDC_VERSION__ >= 201112L
# include <stdnoreturn.h>
#elif defined(__GNUC__)
# define noreturn __attribute__((__noreturn__))
#else
# define noreturn
#endif
typedef enum {
RET, MOV, ADD, SUB, MUL, DIV,
LSL, LSR, ASR, AND,
OR, XOR, LDR, STR, SWI, LBL, CMP, PUSH, POP, JMP
} InstructionCode;
typedef enum {
sp, r0, r1, r2, r3, r4, r5, r6
} Register;
typedef enum {
AL, EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE
} ConditionCode;
typedef enum {
PUT_INT,
PUT_CHAR,
PUT_HEX,
PUT_STR,
PUT_NEWLINE,
GET_INT,
GET_CHAR,
GET_HEX,
DUMP
} SysCall;
enum {
LR = 7
};
// The magic.
typedef struct {
const unsigned short code : 5;
unsigned short reg1 : 3;
unsigned short reg2 : 3;
unsigned short data : 5;
} Instruction;
// Flags
static bool N, Z, C, V;
// Register data
static int8_t registers[8] = { 0 };
// Stack memory. You get 127 bytes. A 16 or 32-bit core is coming soon.
static int8_t memory[128] = { 0 };
typedef struct {
const Instruction *inst;
int line;
} Label;
// Current line
static int instruction_line = 0;
// The last label we used.
static Label last_label = { NULL, 0 };
// Labels. TODO: Improve this mess.
static Label labels[7];
// How many labels
static uint32_t label_count = 0;
// Dumps some information.
static void dump(void)
{
fputs("registers: ", stderr);
for (int i = 0; i < 8; i++) {
fprintf(stderr, "%d ", registers[i]);
}
fputs("\nmemory: ", stderr);
for (size_t i = 0; i < sizeof(memory) / sizeof(memory[0]); i++) {
if (memory[i] != 0) {
fprintf(stderr, "memory[%zu] = %d\n", i, memory[i]);
}
}
}
// Prints an error message, dumps, and aborts.
noreturn static void crash(const char *error_msg)
{
fprintf(stderr, "ERROR: line %d: %s\n", instruction_line, error_msg);
dump();
abort();
}
// Handles syscalls.
static void inst_swi(Instruction inst)
{
int tmp;
switch (inst.data) {
case PUT_CHAR:
printf("%c", registers[inst.reg1]);
break;
case PUT_INT:
printf("%d", registers[inst.reg1]);
break;
case PUT_HEX:
printf("%x", (uint8_t)registers[inst.reg1]);
break;
case PUT_STR:
if (registers[inst.reg1] < 0) {
crash("Segmentation fault");
}
puts((const char *)&memory[registers[inst.reg1]]);
break;
case PUT_NEWLINE:
putchar('\n');
break;
case GET_CHAR:
scanf(" %c", (char*)&registers[inst.reg1]);
break;
case GET_INT:
scanf("%d", &tmp);
registers[inst.reg1] = (int8_t)tmp;
break;
case GET_HEX:
scanf("%x", &tmp);
registers[inst.reg1] = (int8_t)tmp;
break;
case DUMP:
dump();
break;
}
fflush(stdout);
}
// Moves an immediate or a register value into another register.
// TODO: negative values, larger immediates, reading sp.
static void inst_mov(Instruction inst)
{
if (inst.data || inst.reg2 == 0)
registers[inst.reg1] = inst.data;
else
registers[inst.reg1] = registers[inst.reg2];
}
// Just basic operators.
#define INST_BASIC_OPERATOR(name, op) \
static void inst_##name(Instruction inst) \
{ \
if (inst.data || inst.reg2 == 0) { \
if (inst.reg2 != 0) \
registers[inst.reg1] = registers[inst.reg2] op inst.data; \
else \
registers[inst.reg1] = registers[inst.reg1] op inst.data; \
} else { \
registers[inst.reg1] = registers[inst.reg1] op registers[inst.reg2]; \
} \
}
INST_BASIC_OPERATOR(add, +)
INST_BASIC_OPERATOR(sub, -)
INST_BASIC_OPERATOR(mul, *)
INST_BASIC_OPERATOR(lsl, <<)
INST_BASIC_OPERATOR(asr, >>)
INST_BASIC_OPERATOR(and, &)
INST_BASIC_OPERATOR(or, |)
INST_BASIC_OPERATOR(xor, ^)
// Logical shift right
static void inst_lsr(Instruction inst)
{
if (inst.data || inst.reg2 == 0) {
if (inst.reg2 != 0)
registers[inst.reg1] = (uint32_t)registers[inst.reg2] >> inst.data;
else
registers[inst.reg1] = (uint32_t)registers[inst.reg1] >> inst.data;
} else {
registers[inst.reg1] = (uint32_t)registers[inst.reg1] >> registers[inst.reg2];
}
}
// Divide
static void inst_div(Instruction inst)
{
if (inst.data || inst.reg2 == 0) {
if (inst.data == 0)
crash("Floating point exception");
if (inst.reg2 != sp)
registers[inst.reg1] = registers[inst.reg2] / inst.data;
else
registers[inst.reg1] = registers[inst.reg1] / inst.data;
} else {
if (registers[inst.reg2] == 0)
crash("Floating point exception");
registers[inst.reg1] = registers[inst.reg1] / registers[inst.reg2];
}
}
// Load
static void inst_ldr(Instruction inst)
{
if (registers[inst.reg2] < 0)
crash("Segmentation fault");
registers[inst.reg1] = memory[registers[inst.reg2]];
}
// Store
static void inst_str(Instruction inst)
{
if (registers[inst.reg2] < 0)
crash("Segmentation fault");
memory[registers[inst.reg2]] = registers[inst.reg1];
}
// Sets a label.
// TODO: Allow setting a label below the code, or with a specific number.
// TODO: More than 7 labels.
static void inst_lbl(const Instruction *inst)
{
if (label_count + 1 < LR) {
labels[label_count++] = (Label) { inst, instruction_line };
} else {
crash("Out of labels");
}
}
// Jumps to a label or continues. Returns a pointer to the struct holding the
// LBL statement if the condition is true, or the same instruction if false.
static const Instruction *inst_jmp(const Instruction *inst)
{
Label new_inst = { inst, instruction_line };
if (inst->reg1 == LR && last_label.inst != NULL) {
new_inst = last_label;
} else if (inst->reg1 < label_count) {
if (inst->data) {
if (inst->data == EQ && Z) {
new_inst = labels[inst->reg1];
} else if (inst->data == NE && !Z) {
new_inst = labels[inst->reg1];
}
// TODO: finish the other flags
} else {
new_inst = labels[inst->reg1];
}
}
if (new_inst.inst != inst) {
last_label = new_inst;
}
instruction_line = new_inst.line;
return new_inst.inst;
}
// Does some comparison.
// TODO: Maybe update flags in all arithmetic instructions
static void inst_cmp(Instruction inst)
{
uint32_t a = (uint32_t)registers[inst.reg1];
uint32_t b = (uint32_t)((inst.reg2 == sp) ? inst.data : registers[inst.reg2]);
uint32_t result = a - b;
N = (int32_t)result < 0;
Z = result == 0;
V = ((int32_t)a > 0 && (int32_t)b > 0 && (int32_t)result > 0);
C = a > result || b > result;
}
// Pushes to the stack
static void inst_push(Instruction inst)
{
registers[sp]++;
if (registers[sp] < 0)
crash("Out of memory");
memory[registers[sp] - 1] = registers[inst.reg1];
}
// Pops from the stack and sets the correct register
static void inst_pop(Instruction inst)
{
if (registers[sp] <= 0)
crash("Segmentation fault");
registers[inst.reg1] = memory[registers[sp] - 1];
registers[sp]--;
}
// Read an instruction tape.
void DecodeInstructions(const Instruction* inst)
{
instruction_line = 1;
for (;; ++inst, ++instruction_line) {
switch (inst->code) {
case MOV:
inst_mov(*inst);
break;
case ADD:
inst_add(*inst);
break;
case SUB:
inst_sub(*inst);
break;
case MUL:
inst_mul(*inst);
break;
case DIV:
inst_div(*inst);
break;
case LSL:
inst_lsl(*inst);
break;
case LSR:
inst_lsr(*inst);
break;
case ASR:
inst_asr(*inst);
break;
case AND:
inst_and(*inst);
break;
case OR:
inst_or(*inst);
break;
case XOR:
inst_xor(*inst);
break;
case LDR:
inst_ldr(*inst);
break;
case STR:
inst_str(*inst);
break;
case SWI:
inst_swi(*inst);
break;
case LBL:
inst_lbl(inst);
break;
case PUSH:
inst_push(*inst);
break;
case POP:
inst_pop(*inst);
break;
case JMP:
inst = inst_jmp(inst);
break;
case CMP:
inst_cmp(*inst);
break;
default:
// Zero out the state.
label_count = 0;
last_label = (Label) { NULL, 0 };
bzero(labels, sizeof(labels));
bzero(memory, sizeof(memory));
bzero(registers, sizeof(registers));
return;
}
}
}
int main()
{
// Prints "HI" to the console.
const Instruction instructions[] = {
{ MOV, r0, .data = 8 },
{ MUL, r0, r0 }, // 8 * 8 = 64 = ascii @
{ ADD, r1, r0, .data = 8 }, // 64 + 8 = 72 = ascii H
{ PUSH, r1 },
{ ADD, r1, r0, .data = 9 }, // 64 + 9 = 73 = ascii I
{ PUSH, r1 },
{ MOV, r0, .data = 0 }, // null terminate
{ PUSH, r0 },
{ SUB, sp, .data = 3 },
{ SWI, sp, .data = PUT_STR }, // print
{ RET }
};
DecodeInstructions(instructions);
// Can you figure out how this works?
const Instruction instructions2[] = {
{ MOV, r0, .data = 8 },
{ MOV, r1, .data = 9 },
{ MOV, r3, .data = 0 },
{ PUSH, r3 },
{ SUB, r5, r3, .data = 7 },
{ SUB, r6, r3, .data = 8 },
{ MUL, r5, r0 },
{ SUB, r5, .data = 2 },
{ PUSH, r5 },
{ SUB, r4, r3, .data = 4 },
{ PUSH, r6 },
{ MOV, r6, .data = 5 },
{ SUB, r4, .data = 2 },
{ PUSH, r4 },
{ MOV, r2, .data = 3 },
{ PUSH, r2 },
{ SUB, r2, r3, .data = 8 },
{ PUSH, r2 },
{ MUL, r6, .data = 11 },
{ PUSH, r6 },
{ SUB, r4, r3, .data = 8 },
{ ADD, r2, r6 },
{ SUB, r3, r2 },
{ PUSH, r3 },
{ MOV, r2, .data = 0 },
{ MOV, r3, .data = 3 },
{ PUSH, r3 },
{ PUSH, r2 },
{ MOV, r4, .data = 7 },
{ PUSH, r4 },
{ MOV, r2, .data = 0 },
{ SUB, r3, r2, .data = 3 },
{ PUSH, r3 },
{ MUL, r0, r1 },
{ MOV, r1, .data = 12 },
// begin loop
{ LBL },
{ SWI, r0, .data = PUT_CHAR },
{ POP, r3 },
{ ADD, r0, r3 },
{ SUB, r1, .data = 1 },
{ CMP, r1, .data = 0 },
{ JMP, 0, .data = NE },
// end loop
{ RET }
};
DecodeInstructions(instructions2);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment