Created
November 13, 2013 22:07
-
-
Save dwf/7457323 to your computer and use it in GitHub Desktop.
Undo a textual hex dump (from the Firefox cache).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Reverses a hex dump (of the format shown in the Firefox cache). | |
| The format is as follows: | |
| <OFFSET> <BYTE> [<BYTE> [<BYTE> [...]]] [render] | |
| Where | |
| - <OFFSET> is 8 hex digits, possibly followed by a colon | |
| - each <BYTE> is (maximum 16 per line) | |
| - [render] is ASCII rendering of the bytes, ignored. | |
| - <OFFSET> and the first <BYTE> are separated by two spaces, | |
| as are each pair of consecutive <BYTE>s | |
| """ | |
| __author__ = "David Warde-Farley" | |
| __license__ = "3-clause BSD" | |
| __email__ = "d.warde.farley" + chr(64) + "gmail" + chr(46) + "com" | |
| import argparse | |
| import sys | |
| import re | |
| HEX_BYTE = '([0-9a-f]{2})' | |
| OFFSET_REGEX = "[0-9a-f]{8}" | |
| MAX_BYTES = 16 | |
| BYTES_REGEX = HEX_BYTE + " " + (HEX_BYTE + "?" + " ") * (MAX_BYTES - 1) | |
| LINE_REGEX = OFFSET_REGEX + ":? " + BYTES_REGEX | |
| def unhexdump(infile, outfile): | |
| """ | |
| Reads lines from infile, decodes them according to the dump format, | |
| writes bytes to outfile. | |
| """ | |
| s = [] | |
| for line in infile: | |
| match = re.match(LINE_REGEX, line) | |
| if match: | |
| s.extend([chr(int(c, 16)) for c in match.groups() | |
| if c is not None]) | |
| outfile.write(''.join(s)) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser( | |
| description=__doc__.strip().split('\n')[0] | |
| ) | |
| parser.add_argument('input', default=sys.stdin, nargs='?', | |
| type=argparse.FileType('rb'), | |
| help="An optional filename to read from " | |
| "(default=stdin)") | |
| parser.add_argument('-O', '--output', required=False, | |
| default=sys.stdout, type=argparse.FileType('wb'), | |
| help="An optional filename to which output " | |
| "will be written (default=stdout)") | |
| args = parser.parse_args() | |
| unhexdump(args.input, args.output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
When files have repeated byte patterns, hexdump will print a * alone on a line to indicate "same as the line above", ex.
Python isn't my first language but I coded up the following hack to handle this situation and fill in the missing repeated byte patterns.