Skip to content

Instantly share code, notes, and snippets.

@RickyCook
Last active August 29, 2018 01:17
Show Gist options
  • Save RickyCook/06cb719870f351c329aac5cc9736539a to your computer and use it in GitHub Desktop.
Save RickyCook/06cb719870f351c329aac5cc9736539a to your computer and use it in GitHub Desktop.
Approximate recreation of the ``hexdump`` CLI utility in Python
#!/usr/bin/env python
""" Approximate recreation of the ``hexdump`` CLI utility.
Several versions of the function are included:
hexdump_lines: Dumps out an iterator of lines. I/O is streamed by generators
hexdump_string: Splits, and dumps out a string
hexdump_stream: Splits, and dumps out a stream. I/O is streamed
Examples:
>>> hexdump_string(b'test')
0000000 74 65 73 74 test
>>> hexdump_string(b'test\\nme\\nhere')
0000000 74 65 73 74 0a 6d 65 0a 68 65 72 65 test.me.here
>>> hexdump_string(b'this string goes over multiple lines')
0000000 74 68 69 73 20 73 74 72 69 6e 67 20 67 6f 65 73 this string goes
0000010 20 6f 76 65 72 20 6d 75 6c 74 69 70 6c 65 20 6c over multiple l
0000020 69 6e 65 73 ines
When ``echo test | ./hexdump.py``
>>> hexdump_stream(sys.stdin)
0000000 74 65 73 74 0a test.
"""
import re
import sys
def hexdump_lines(
lines,
line_length = 16,
line_label_format = '{:07x} ',
byte_format = '{:02x} ',
line_suffix_format = '{}',
line_suffix_sub_re = re.compile(r'[^a-zA-Z0-9]'),
stream = sys.stdout,
):
""" hexdump an iterator of pre-split lines
Args:
lines (iter): Iterator of lines to dump
line_length (int): Number of bytes to print per line
line_label_format (str): Format string for the line label (start index)
byte_format (str): Format string for each byte
line_suffix_format (str): Format string for the line suffix (data)
line_suffix_sub_re: Regex used to substitute non-printable chars
stream: Where to write the output to
"""
byte_length = len(byte_format.format(0))
line_bytes_format = '{:' + str(line_length * byte_length) + 's}'
start_idx = 0
for line in lines:
if line_label_format is not None:
stream.write(line_label_format.format(start_idx))
if byte_format is not None:
stream.write(line_bytes_format.format(''.join([
byte_format.format(ord(byte))
for byte in line
])))
if line_suffix_format is not None:
line_repr = line_suffix_sub_re.sub('.', line, re.MULTILINE)
stream.write(line_suffix_format.format(line_repr))
start_idx += len(line)
stream.write('\n')
def hexdump_string(
value,
line_length = 16,
**kwargs
):
""" hexdump a string
Args:
value (str): String value to dump out
"""
hexdump_lines(
(
value[start_idx : start_idx + line_length]
for start_idx in range(0, len(value), line_length)
),
line_length = line_length,
**kwargs
)
def hexdump_stream(
value,
line_length = 16,
**kwargs
):
""" hexdump a stream
Args:
value: A stream to split into chunks and stream-dump out
"""
def gen():
while True:
line = value.read(line_length)
if not line:
break
yield line
hexdump_lines(gen(), line_length = line_length, **kwargs)
if __name__ == '__main__':
hexdump_stream(sys.stdin)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment