Last active
August 29, 2018 01:17
-
-
Save RickyCook/06cb719870f351c329aac5cc9736539a to your computer and use it in GitHub Desktop.
Approximate recreation of the ``hexdump`` CLI utility in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" Approximate recreation of the ``hexdump`` CLI utility. | |
Several versions of the function are included: | |
hexdump_lines: Dumps out an iterator of lines. I/O is streamed by generators | |
hexdump_string: Splits, and dumps out a string | |
hexdump_stream: Splits, and dumps out a stream. I/O is streamed | |
Examples: | |
>>> hexdump_string(b'test') | |
0000000 74 65 73 74 test | |
>>> hexdump_string(b'test\\nme\\nhere') | |
0000000 74 65 73 74 0a 6d 65 0a 68 65 72 65 test.me.here | |
>>> hexdump_string(b'this string goes over multiple lines') | |
0000000 74 68 69 73 20 73 74 72 69 6e 67 20 67 6f 65 73 this string goes | |
0000010 20 6f 76 65 72 20 6d 75 6c 74 69 70 6c 65 20 6c over multiple l | |
0000020 69 6e 65 73 ines | |
When ``echo test | ./hexdump.py`` | |
>>> hexdump_stream(sys.stdin) | |
0000000 74 65 73 74 0a test. | |
""" | |
import re | |
import sys | |
def hexdump_lines( | |
lines, | |
line_length = 16, | |
line_label_format = '{:07x} ', | |
byte_format = '{:02x} ', | |
line_suffix_format = '{}', | |
line_suffix_sub_re = re.compile(r'[^a-zA-Z0-9]'), | |
stream = sys.stdout, | |
): | |
""" hexdump an iterator of pre-split lines | |
Args: | |
lines (iter): Iterator of lines to dump | |
line_length (int): Number of bytes to print per line | |
line_label_format (str): Format string for the line label (start index) | |
byte_format (str): Format string for each byte | |
line_suffix_format (str): Format string for the line suffix (data) | |
line_suffix_sub_re: Regex used to substitute non-printable chars | |
stream: Where to write the output to | |
""" | |
byte_length = len(byte_format.format(0)) | |
line_bytes_format = '{:' + str(line_length * byte_length) + 's}' | |
start_idx = 0 | |
for line in lines: | |
if line_label_format is not None: | |
stream.write(line_label_format.format(start_idx)) | |
if byte_format is not None: | |
stream.write(line_bytes_format.format(''.join([ | |
byte_format.format(ord(byte)) | |
for byte in line | |
]))) | |
if line_suffix_format is not None: | |
line_repr = line_suffix_sub_re.sub('.', line, re.MULTILINE) | |
stream.write(line_suffix_format.format(line_repr)) | |
start_idx += len(line) | |
stream.write('\n') | |
def hexdump_string( | |
value, | |
line_length = 16, | |
**kwargs | |
): | |
""" hexdump a string | |
Args: | |
value (str): String value to dump out | |
""" | |
hexdump_lines( | |
( | |
value[start_idx : start_idx + line_length] | |
for start_idx in range(0, len(value), line_length) | |
), | |
line_length = line_length, | |
**kwargs | |
) | |
def hexdump_stream( | |
value, | |
line_length = 16, | |
**kwargs | |
): | |
""" hexdump a stream | |
Args: | |
value: A stream to split into chunks and stream-dump out | |
""" | |
def gen(): | |
while True: | |
line = value.read(line_length) | |
if not line: | |
break | |
yield line | |
hexdump_lines(gen(), line_length = line_length, **kwargs) | |
if __name__ == '__main__': | |
hexdump_stream(sys.stdin) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment