Skip to content

Instantly share code, notes, and snippets.

@zevaverbach
Last active August 2, 2025 14:39
Show Gist options
  • Save zevaverbach/520f9724d62df16d45549c53907edfcf to your computer and use it in GitHub Desktop.
Save zevaverbach/520f9724d62df16d45549c53907edfcf to your computer and use it in GitHub Desktop.
decoding 8086 instructions (Computer Enhance) -- first homework
from enum import Enum, StrEnum, auto
from itertools import batched
import pathlib as p
PATH_ASSETS = "~/repos/learn-asm/2025/computer_enhance/repo/perfaware"
PATH_ASSETS_PT1 = f"{PATH_ASSETS}/part1"
PATH_L37_SINGLE_REGISTER_MOV_BINARY = f"{PATH_ASSETS_PT1}/listing_0037_single_register_mov"
PATH_L37_SINGLE_REGISTER_MOV_ASM = PATH_L37_SINGLE_REGISTER_MOV_BINARY + ".asm"
PATH_L38_MANY_REGISTER_MOV_BINARY = f"{PATH_ASSETS_PT1}/listing_0038_many_register_mov"
PATH_L38_MANY_REGISTER_MOV_ASM = PATH_L38_MANY_REGISTER_MOV_BINARY + ".asm"
class NotFound(Exception):
pass
def read_binary_file(to_open: str) -> bytes:
return p.Path(to_open).expanduser().read_bytes()
class NumBits(Enum):
EIGHT = 8
SIXTEEN = 16
MASK_BIT0 = 0b00000001
MASK_BITS_0_to_2 = 0b111
BYTE_INDEX_RM = 0
BYTE_INDEX_REG = 3
BYTE_INDEX_MODE = 6
# ops
class Op(StrEnum):
MOV_REG_TO_REG_OR_MEM = auto()
MOV_TO_REG_OR_MEM = auto()
MOV_TO_REG = auto()
MOV_MEM_TO_ACCUM = auto()
MOV_REG_OR_MEM_TO_SEG_REG = auto()
MOV_SEG_REG_TO_REG_OR_MEM = auto()
MOV_REG_TO_REG_OR_MEM = 0b100010 # 6 bits
MOV_TO_REG_OR_MEM = 0b1100011 # 6 bits
MOV_TO_REG = 0b1011 # 4 bits
MOV_MEM_TO_ACCUM = 0b1010000 # 7 bits
MOV_REG_OR_MEM_TO_SEG_REG = 0b10001110 # 8 bits
MOV_SEG_REG_TO_REG_OR_MEM = 0b10001100 # 8 bits
OP_BITS_TO_NAMES = (
(MOV_REG_TO_REG_OR_MEM, Op.MOV_REG_TO_REG_OR_MEM),
(MOV_TO_REG_OR_MEM, Op.MOV_TO_REG_OR_MEM),
(MOV_TO_REG, Op.MOV_TO_REG_OR_MEM),
(MOV_MEM_TO_ACCUM, Op.MOV_MEM_TO_ACCUM),
(MOV_REG_OR_MEM_TO_SEG_REG, Op.MOV_REG_OR_MEM_TO_SEG_REG),
(MOV_SEG_REG_TO_REG_OR_MEM, Op.MOV_SEG_REG_TO_REG_OR_MEM),
)
OP_NAME_LOOKUP = {
Op.MOV_REG_TO_REG_OR_MEM: "mov",
Op.MOV_TO_REG_OR_MEM: "mov",
Op.MOV_TO_REG: "mov",
Op.MOV_MEM_TO_ACCUM: "mov",
Op.MOV_REG_OR_MEM_TO_SEG_REG: "mov",
Op.MOV_SEG_REG_TO_REG_OR_MEM: "mov",
}
# registers
class Register8(StrEnum):
AL = auto()
CL = auto()
DL = auto()
BL = auto()
AH = auto()
CH = auto()
DH = auto()
BH = auto()
class Register16(StrEnum):
AX = auto()
CX = auto()
DX = auto()
BX = auto()
SP = auto()
BP = auto()
SI = auto()
DI = auto()
REG_LOOKUP_8 = {
0b000: Register8.AL,
0b001: Register8.CL,
0b010: Register8.DL,
0b011: Register8.BL,
0b100: Register8.AH,
0b101: Register8.CH,
0b110: Register8.DH,
0b111: Register8.BH,
}
REG_LOOKUP_16 = {
0b000: Register16.AX,
0b001: Register16.CX,
0b010: Register16.DX,
0b011: Register16.BX,
0b100: Register16.SP,
0b101: Register16.BP,
0b110: Register16.SI,
0b111: Register16.DI,
}
NUM_BITS_TO_REG_LOOKUP = {
NumBits.SIXTEEN: REG_LOOKUP_16,
NumBits.EIGHT: REG_LOOKUP_8,
}
# mode
class Mode(StrEnum):
MEMORY_NO_DISPLACEMENT = auto()
MEMORY_8BIT_DISPLACEMENT = auto()
MEMORY_16BIT_DISPLACEMENT = auto()
REGISTER = auto()
MOD_LOOKUP = {
0b00: Mode.MEMORY_NO_DISPLACEMENT,
0b01: Mode.MEMORY_8BIT_DISPLACEMENT,
0b10: Mode.MEMORY_16BIT_DISPLACEMENT,
0b11: Mode.REGISTER,
}
def get_op(first_byte: int) -> Op:
for identifier, op in OP_BITS_TO_NAMES:
len_identifier = len(bin(identifier)) - len("0b")
shift_right_n_times = 8 - len_identifier
if (first_byte >> shift_right_n_times) == identifier:
return op
raise NotFound
def get_num_bits(first_byte: int) -> NumBits:
bit_0 = first_byte & MASK_BIT0
return NumBits.SIXTEEN if bit_0 else NumBits.EIGHT
def get_register(second_byte: int, num_bits: NumBits, dest: bool = False) -> Register16 | Register8:
lookup = NUM_BITS_TO_REG_LOOKUP[num_bits]
return lookup[(second_byte >> (BYTE_INDEX_RM if dest else BYTE_INDEX_REG)) & MASK_BITS_0_to_2]
def get_mode(second_byte: int) -> Mode:
return MOD_LOOKUP[second_byte >> BYTE_INDEX_MODE]
def generate_asm_line(op: Op, reg_dest: Register16 | Register8, reg_src: Register16 | Register8) -> str:
op_str = OP_NAME_LOOKUP[op]
return f"{op_str} {reg_dest}, {reg_src}\n"
def main():
binary = read_binary_file(PATH_L38_MANY_REGISTER_MOV_BINARY)
string = "bits 16\n\n"
# 0b[OP:6][D:1][W:1], 0b[MOD:2][REG:3][R/M:3]
for first, second in batched(binary, 2):
op = get_op(first) # OP
match op:
case Op.MOV_REG_TO_REG_OR_MEM:
num_bits = get_num_bits(first) # W
mode = get_mode(second) # MOD
match mode:
case Mode.REGISTER:
register_src = get_register(second, num_bits) # REG
register_dest = get_register(second, num_bits, dest=True) # R/M
string += generate_asm_line(op, register_dest, register_src)
case _:
raise NotImplementedError
case _:
raise NotImplementedError
p.Path("solution.asm").write_text(string)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment