Skip to content

Instantly share code, notes, and snippets.

@szapp
Created August 7, 2025 21:20
Show Gist options
  • Select an option

  • Save szapp/97c42048f1ebc4550c90798b3c96777c to your computer and use it in GitHub Desktop.

Select an option

Save szapp/97c42048f1ebc4550c90798b3c96777c to your computer and use it in GitHub Desktop.
Format hexadecimal machine code bytes into Daedalus conform recipes. Run with `uv run machine-code-to-daedalus.py`.
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "cyclopts",
# ]
# ///
import logging
import re
import sys
from itertools import zip_longest
from pathlib import Path
from typing import Annotated
import cyclopts
app = cyclopts.App(default_parameter=cyclopts.Parameter(consume_multiple=True))
logger = logging.getLogger(Path(__file__).stem)
hex_pattern = re.compile("^[A-F0-9 ]+$")
class InvalidHexValues(Exception):
"""Raised when values are not hexadecimal."""
pass
def process_hex_chunks(hex_bytes: list[str]) -> list[tuple[str, int]]:
"""Turn a list of hexadecimal bytes into integers while reversing endianess.
Wildcards for addresses or values "XX" will be grouped together separately. The
maximum chunk size for either is four.
Args:
hex_bytes: List of strings of two digit hexadecimal bytes.
Returns:
List of tuples containing the integer representation and the number of bytes.
"""
results = []
current_chunk = []
def process_chunk(chunk):
if all(b == "XX" for b in chunk):
return ("XX" * len(chunk), len(chunk))
else:
hex_str = "".join(reversed(chunk))
decimal_value = int(hex_str, 16)
return (str(decimal_value), len(chunk))
for b in hex_bytes:
b = b.upper()
# The code is overly verbose but it's easier to follow the logic of grouping.
if b == "XX":
if (
current_chunk
and all(x != "XX" for x in current_chunk)
or len(current_chunk) > 3
):
results.append(process_chunk(current_chunk))
current_chunk = []
current_chunk.append(b)
else:
if not hex_pattern.fullmatch(b) or len(b) != 2:
raise InvalidHexValues(
f"Invalid hexadecimal byte '{b}' in {hex_bytes}."
)
if (
current_chunk
and all(x == "XX" for x in current_chunk)
or len(current_chunk) > 3
):
results.append(process_chunk(current_chunk))
current_chunk = []
current_chunk.append(b)
if current_chunk:
results.append(process_chunk(current_chunk))
return results
def format_output(stream: list[tuple[str, int]]) -> list[str]:
"""Format a list of integers into a Daedalus conform line.
Args:
stream: List of tuples containing the integer chunks and number of bytes.
Returns:
List of formatted lines.
"""
return [f"ASM_{size}({chunk});" for chunk, size in stream]
def core(lines: list[str]) -> tuple[list[str], int]:
"""Process the lines of hex bytes and format the output.
Args:
lines: List of lines containing hexadecimal bytes separated by spaces.
Returns:
Tuple with the output and the number of total bytes.
"""
num_bytes = 0
output, lengths = list(), list()
for line in lines:
line = line.strip()
if line.startswith("//"):
lengths.append([])
output.append([line])
continue
line = line.upper()
chunks = line.split(" ")
stream = process_hex_chunks(chunks)
num_bytes += sum(s[1] for s in stream)
results = format_output(stream)
length = list(map(len, results))
lengths.append(length)
output.append(results)
# Find the longest text chunks for alignment
pad_sizes = list(map(max, zip_longest(*lengths, fillvalue=0)))
# Add padding
result = []
for row in output:
padded = [
cell.ljust(pad or 0)
for cell, pad in zip_longest(row, pad_sizes, fillvalue="")
]
result.append(" ".join(padded))
return result, num_bytes
@app.command
def file(path: cyclopts.types.ExistingTxtPath, /) -> int:
"""Convert machine code from file into parsable Daedalus.
This allows to convert multiple lines with text alignment and comments.
It's compatible with the output from https://defuse.ca/online-x86-assembler.htm.
Args:
path: Path to a file containing lines of hexadecimal bytes delimited by spaces.
Returns:
Exit code 0 on success.
"""
logger.debug("Read file and split content into lines.")
lines_str = path.read_text().splitlines()
logger.debug("Strip defuse.ca decorations.")
lines_clean = list()
comments = list()
addr_pattern = re.compile(r"^[\d\w]+:\s+")
label_pattern = re.compile(r"^[\d\w]{8,}\s\<(.*)\>:\s*$")
asm_pattern = re.compile(r"^((?:[\d\w]{2}\s)+)\s{2,}(.*)$")
for line in lines_str:
if not line:
continue
comment = ""
line = addr_pattern.sub("", line)
line = label_pattern.sub(r"// \1:", line)
# Split machine code from assembly
if matched := asm_pattern.search(line):
line = matched.group(1)
comment = matched.group(2)
lines_clean.append(line)
comments.append(comment)
logger.debug("Convert bytes to Daedalus recipe.")
result, num_bytes = core(lines_clean)
logger.debug("Add assembly to the end of each line.")
result = [a + " // " + b if b else a for a, b in zip(result, comments)]
logger.debug("Print results to stdout.\n")
print(f"ASM_Open({num_bytes}+1);")
print("\n".join(result))
print("code = ASM_Close();")
return 0
@app.default
def main(bytes: list[str], /) -> int:
"""Convert a line of machine code into parsable Daedalus.
Use the command "file" to convert multiple lines with text alignment and comments.
It's compatible with the output from https://defuse.ca/online-x86-assembler.htm.
Args:
input: Hexadecimal bytes delimited by spaces.
Returns:
Exit code 0 on success.
"""
lines_str = [" ".join(bytes)]
result, _ = core(lines_str)
print(result.pop().strip())
return 0
@app.meta.default
def meta_app(
*tokens: Annotated[str, cyclopts.Parameter(show=False)], # type: ignore
verbose: bool = False,
):
"""Wrapper for error handling and logging.
Args:
verbose: Show debug output.
"""
logging.basicConfig(
level=logging.DEBUG if verbose else logging.WARNING,
format="%(asctime)s | %(name)s | %(levelname)-8s - %(message)s",
)
try:
return app(tokens)
except KeyboardInterrupt:
logger.warning("Interrupted by user.")
return 2
except InvalidHexValues as exc:
logger.error(exc)
return 3
except Exception as exc:
logger.error("An unexpected error occurred:" + str(exc))
if __name__ == "__main__":
sys.exit(app.meta())
@szapp
Copy link
Author

szapp commented Aug 7, 2025

Workflow:

  1. Write assembly code.
  2. If necessary, replace any constants or addresses with generic 32bit addresses, e.g. 0x12345678.
  3. Run it trough defuse.ca's online assembler.
  4. Save the output as file.
  5. If necessary, replace the previous placeholder bytes with "XX", e.g. 78 56 34 12 to XX XX XX XX.
  6. Run uv run machine-code-to-daedalus.py file path/to/file.asm.
  7. Copy and paste the stdout into the Daedalus script.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment