Created
August 7, 2025 21:20
-
-
Save szapp/97c42048f1ebc4550c90798b3c96777c to your computer and use it in GitHub Desktop.
Format hexadecimal machine code bytes into Daedalus conform recipes. Run with `uv run machine-code-to-daedalus.py`.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.13" | |
| # dependencies = [ | |
| # "cyclopts", | |
| # ] | |
| # /// | |
| import logging | |
| import re | |
| import sys | |
| from itertools import zip_longest | |
| from pathlib import Path | |
| from typing import Annotated | |
| import cyclopts | |
| app = cyclopts.App(default_parameter=cyclopts.Parameter(consume_multiple=True)) | |
| logger = logging.getLogger(Path(__file__).stem) | |
| hex_pattern = re.compile("^[A-F0-9 ]+$") | |
| class InvalidHexValues(Exception): | |
| """Raised when values are not hexadecimal.""" | |
| pass | |
| def process_hex_chunks(hex_bytes: list[str]) -> list[tuple[str, int]]: | |
| """Turn a list of hexadecimal bytes into integers while reversing endianess. | |
| Wildcards for addresses or values "XX" will be grouped together separately. The | |
| maximum chunk size for either is four. | |
| Args: | |
| hex_bytes: List of strings of two digit hexadecimal bytes. | |
| Returns: | |
| List of tuples containing the integer representation and the number of bytes. | |
| """ | |
| results = [] | |
| current_chunk = [] | |
| def process_chunk(chunk): | |
| if all(b == "XX" for b in chunk): | |
| return ("XX" * len(chunk), len(chunk)) | |
| else: | |
| hex_str = "".join(reversed(chunk)) | |
| decimal_value = int(hex_str, 16) | |
| return (str(decimal_value), len(chunk)) | |
| for b in hex_bytes: | |
| b = b.upper() | |
| # The code is overly verbose but it's easier to follow the logic of grouping. | |
| if b == "XX": | |
| if ( | |
| current_chunk | |
| and all(x != "XX" for x in current_chunk) | |
| or len(current_chunk) > 3 | |
| ): | |
| results.append(process_chunk(current_chunk)) | |
| current_chunk = [] | |
| current_chunk.append(b) | |
| else: | |
| if not hex_pattern.fullmatch(b) or len(b) != 2: | |
| raise InvalidHexValues( | |
| f"Invalid hexadecimal byte '{b}' in {hex_bytes}." | |
| ) | |
| if ( | |
| current_chunk | |
| and all(x == "XX" for x in current_chunk) | |
| or len(current_chunk) > 3 | |
| ): | |
| results.append(process_chunk(current_chunk)) | |
| current_chunk = [] | |
| current_chunk.append(b) | |
| if current_chunk: | |
| results.append(process_chunk(current_chunk)) | |
| return results | |
| def format_output(stream: list[tuple[str, int]]) -> list[str]: | |
| """Format a list of integers into a Daedalus conform line. | |
| Args: | |
| stream: List of tuples containing the integer chunks and number of bytes. | |
| Returns: | |
| List of formatted lines. | |
| """ | |
| return [f"ASM_{size}({chunk});" for chunk, size in stream] | |
| def core(lines: list[str]) -> tuple[list[str], int]: | |
| """Process the lines of hex bytes and format the output. | |
| Args: | |
| lines: List of lines containing hexadecimal bytes separated by spaces. | |
| Returns: | |
| Tuple with the output and the number of total bytes. | |
| """ | |
| num_bytes = 0 | |
| output, lengths = list(), list() | |
| for line in lines: | |
| line = line.strip() | |
| if line.startswith("//"): | |
| lengths.append([]) | |
| output.append([line]) | |
| continue | |
| line = line.upper() | |
| chunks = line.split(" ") | |
| stream = process_hex_chunks(chunks) | |
| num_bytes += sum(s[1] for s in stream) | |
| results = format_output(stream) | |
| length = list(map(len, results)) | |
| lengths.append(length) | |
| output.append(results) | |
| # Find the longest text chunks for alignment | |
| pad_sizes = list(map(max, zip_longest(*lengths, fillvalue=0))) | |
| # Add padding | |
| result = [] | |
| for row in output: | |
| padded = [ | |
| cell.ljust(pad or 0) | |
| for cell, pad in zip_longest(row, pad_sizes, fillvalue="") | |
| ] | |
| result.append(" ".join(padded)) | |
| return result, num_bytes | |
| @app.command | |
| def file(path: cyclopts.types.ExistingTxtPath, /) -> int: | |
| """Convert machine code from file into parsable Daedalus. | |
| This allows to convert multiple lines with text alignment and comments. | |
| It's compatible with the output from https://defuse.ca/online-x86-assembler.htm. | |
| Args: | |
| path: Path to a file containing lines of hexadecimal bytes delimited by spaces. | |
| Returns: | |
| Exit code 0 on success. | |
| """ | |
| logger.debug("Read file and split content into lines.") | |
| lines_str = path.read_text().splitlines() | |
| logger.debug("Strip defuse.ca decorations.") | |
| lines_clean = list() | |
| comments = list() | |
| addr_pattern = re.compile(r"^[\d\w]+:\s+") | |
| label_pattern = re.compile(r"^[\d\w]{8,}\s\<(.*)\>:\s*$") | |
| asm_pattern = re.compile(r"^((?:[\d\w]{2}\s)+)\s{2,}(.*)$") | |
| for line in lines_str: | |
| if not line: | |
| continue | |
| comment = "" | |
| line = addr_pattern.sub("", line) | |
| line = label_pattern.sub(r"// \1:", line) | |
| # Split machine code from assembly | |
| if matched := asm_pattern.search(line): | |
| line = matched.group(1) | |
| comment = matched.group(2) | |
| lines_clean.append(line) | |
| comments.append(comment) | |
| logger.debug("Convert bytes to Daedalus recipe.") | |
| result, num_bytes = core(lines_clean) | |
| logger.debug("Add assembly to the end of each line.") | |
| result = [a + " // " + b if b else a for a, b in zip(result, comments)] | |
| logger.debug("Print results to stdout.\n") | |
| print(f"ASM_Open({num_bytes}+1);") | |
| print("\n".join(result)) | |
| print("code = ASM_Close();") | |
| return 0 | |
| @app.default | |
| def main(bytes: list[str], /) -> int: | |
| """Convert a line of machine code into parsable Daedalus. | |
| Use the command "file" to convert multiple lines with text alignment and comments. | |
| It's compatible with the output from https://defuse.ca/online-x86-assembler.htm. | |
| Args: | |
| input: Hexadecimal bytes delimited by spaces. | |
| Returns: | |
| Exit code 0 on success. | |
| """ | |
| lines_str = [" ".join(bytes)] | |
| result, _ = core(lines_str) | |
| print(result.pop().strip()) | |
| return 0 | |
| @app.meta.default | |
| def meta_app( | |
| *tokens: Annotated[str, cyclopts.Parameter(show=False)], # type: ignore | |
| verbose: bool = False, | |
| ): | |
| """Wrapper for error handling and logging. | |
| Args: | |
| verbose: Show debug output. | |
| """ | |
| logging.basicConfig( | |
| level=logging.DEBUG if verbose else logging.WARNING, | |
| format="%(asctime)s | %(name)s | %(levelname)-8s - %(message)s", | |
| ) | |
| try: | |
| return app(tokens) | |
| except KeyboardInterrupt: | |
| logger.warning("Interrupted by user.") | |
| return 2 | |
| except InvalidHexValues as exc: | |
| logger.error(exc) | |
| return 3 | |
| except Exception as exc: | |
| logger.error("An unexpected error occurred:" + str(exc)) | |
| if __name__ == "__main__": | |
| sys.exit(app.meta()) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Workflow:
uv run machine-code-to-daedalus.py file path/to/file.asm.