Last active
August 2, 2025 14:39
-
-
Save zevaverbach/520f9724d62df16d45549c53907edfcf to your computer and use it in GitHub Desktop.
decoding 8086 instructions (Computer Enhance) -- first homework
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from enum import Enum, StrEnum, auto | |
from itertools import batched | |
import pathlib as p | |
PATH_ASSETS = "~/repos/learn-asm/2025/computer_enhance/repo/perfaware" | |
PATH_ASSETS_PT1 = f"{PATH_ASSETS}/part1" | |
PATH_L37_SINGLE_REGISTER_MOV_BINARY = f"{PATH_ASSETS_PT1}/listing_0037_single_register_mov" | |
PATH_L37_SINGLE_REGISTER_MOV_ASM = PATH_L37_SINGLE_REGISTER_MOV_BINARY + ".asm" | |
PATH_L38_MANY_REGISTER_MOV_BINARY = f"{PATH_ASSETS_PT1}/listing_0038_many_register_mov" | |
PATH_L38_MANY_REGISTER_MOV_ASM = PATH_L38_MANY_REGISTER_MOV_BINARY + ".asm" | |
class NotFound(Exception): | |
pass | |
def read_binary_file(to_open: str) -> bytes: | |
return p.Path(to_open).expanduser().read_bytes() | |
class NumBits(Enum): | |
EIGHT = 8 | |
SIXTEEN = 16 | |
MASK_BIT0 = 0b00000001 | |
MASK_BITS_0_to_2 = 0b111 | |
BYTE_INDEX_RM = 0 | |
BYTE_INDEX_REG = 3 | |
BYTE_INDEX_MODE = 6 | |
# ops | |
class Op(StrEnum): | |
MOV_REG_TO_REG_OR_MEM = auto() | |
MOV_TO_REG_OR_MEM = auto() | |
MOV_TO_REG = auto() | |
MOV_MEM_TO_ACCUM = auto() | |
MOV_REG_OR_MEM_TO_SEG_REG = auto() | |
MOV_SEG_REG_TO_REG_OR_MEM = auto() | |
MOV_REG_TO_REG_OR_MEM = 0b100010 # 6 bits | |
MOV_TO_REG_OR_MEM = 0b1100011 # 6 bits | |
MOV_TO_REG = 0b1011 # 4 bits | |
MOV_MEM_TO_ACCUM = 0b1010000 # 7 bits | |
MOV_REG_OR_MEM_TO_SEG_REG = 0b10001110 # 8 bits | |
MOV_SEG_REG_TO_REG_OR_MEM = 0b10001100 # 8 bits | |
OP_BITS_TO_NAMES = ( | |
(MOV_REG_TO_REG_OR_MEM, Op.MOV_REG_TO_REG_OR_MEM), | |
(MOV_TO_REG_OR_MEM, Op.MOV_TO_REG_OR_MEM), | |
(MOV_TO_REG, Op.MOV_TO_REG_OR_MEM), | |
(MOV_MEM_TO_ACCUM, Op.MOV_MEM_TO_ACCUM), | |
(MOV_REG_OR_MEM_TO_SEG_REG, Op.MOV_REG_OR_MEM_TO_SEG_REG), | |
(MOV_SEG_REG_TO_REG_OR_MEM, Op.MOV_SEG_REG_TO_REG_OR_MEM), | |
) | |
OP_NAME_LOOKUP = { | |
Op.MOV_REG_TO_REG_OR_MEM: "mov", | |
Op.MOV_TO_REG_OR_MEM: "mov", | |
Op.MOV_TO_REG: "mov", | |
Op.MOV_MEM_TO_ACCUM: "mov", | |
Op.MOV_REG_OR_MEM_TO_SEG_REG: "mov", | |
Op.MOV_SEG_REG_TO_REG_OR_MEM: "mov", | |
} | |
# registers | |
class Register8(StrEnum): | |
AL = auto() | |
CL = auto() | |
DL = auto() | |
BL = auto() | |
AH = auto() | |
CH = auto() | |
DH = auto() | |
BH = auto() | |
class Register16(StrEnum): | |
AX = auto() | |
CX = auto() | |
DX = auto() | |
BX = auto() | |
SP = auto() | |
BP = auto() | |
SI = auto() | |
DI = auto() | |
REG_LOOKUP_8 = { | |
0b000: Register8.AL, | |
0b001: Register8.CL, | |
0b010: Register8.DL, | |
0b011: Register8.BL, | |
0b100: Register8.AH, | |
0b101: Register8.CH, | |
0b110: Register8.DH, | |
0b111: Register8.BH, | |
} | |
REG_LOOKUP_16 = { | |
0b000: Register16.AX, | |
0b001: Register16.CX, | |
0b010: Register16.DX, | |
0b011: Register16.BX, | |
0b100: Register16.SP, | |
0b101: Register16.BP, | |
0b110: Register16.SI, | |
0b111: Register16.DI, | |
} | |
NUM_BITS_TO_REG_LOOKUP = { | |
NumBits.SIXTEEN: REG_LOOKUP_16, | |
NumBits.EIGHT: REG_LOOKUP_8, | |
} | |
# mode | |
class Mode(StrEnum): | |
MEMORY_NO_DISPLACEMENT = auto() | |
MEMORY_8BIT_DISPLACEMENT = auto() | |
MEMORY_16BIT_DISPLACEMENT = auto() | |
REGISTER = auto() | |
MOD_LOOKUP = { | |
0b00: Mode.MEMORY_NO_DISPLACEMENT, | |
0b01: Mode.MEMORY_8BIT_DISPLACEMENT, | |
0b10: Mode.MEMORY_16BIT_DISPLACEMENT, | |
0b11: Mode.REGISTER, | |
} | |
def get_op(first_byte: int) -> Op: | |
for identifier, op in OP_BITS_TO_NAMES: | |
len_identifier = len(bin(identifier)) - len("0b") | |
shift_right_n_times = 8 - len_identifier | |
if (first_byte >> shift_right_n_times) == identifier: | |
return op | |
raise NotFound | |
def get_num_bits(first_byte: int) -> NumBits: | |
bit_0 = first_byte & MASK_BIT0 | |
return NumBits.SIXTEEN if bit_0 else NumBits.EIGHT | |
def get_register(second_byte: int, num_bits: NumBits, dest: bool = False) -> Register16 | Register8: | |
lookup = NUM_BITS_TO_REG_LOOKUP[num_bits] | |
return lookup[(second_byte >> (BYTE_INDEX_RM if dest else BYTE_INDEX_REG)) & MASK_BITS_0_to_2] | |
def get_mode(second_byte: int) -> Mode: | |
return MOD_LOOKUP[second_byte >> BYTE_INDEX_MODE] | |
def generate_asm_line(op: Op, reg_dest: Register16 | Register8, reg_src: Register16 | Register8) -> str: | |
op_str = OP_NAME_LOOKUP[op] | |
return f"{op_str} {reg_dest}, {reg_src}\n" | |
def main(): | |
binary = read_binary_file(PATH_L38_MANY_REGISTER_MOV_BINARY) | |
string = "bits 16\n\n" | |
# 0b[OP:6][D:1][W:1], 0b[MOD:2][REG:3][R/M:3] | |
for first, second in batched(binary, 2): | |
op = get_op(first) # OP | |
match op: | |
case Op.MOV_REG_TO_REG_OR_MEM: | |
num_bits = get_num_bits(first) # W | |
mode = get_mode(second) # MOD | |
match mode: | |
case Mode.REGISTER: | |
register_src = get_register(second, num_bits) # REG | |
register_dest = get_register(second, num_bits, dest=True) # R/M | |
string += generate_asm_line(op, register_dest, register_src) | |
case _: | |
raise NotImplementedError | |
case _: | |
raise NotImplementedError | |
p.Path("solution.asm").write_text(string) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment