Last active
August 29, 2015 14:15
-
-
Save gammy/7286a223b3aaad3dc881 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Calculate occurrence of letters in two text files, | |
| # and map out the closest relation | |
| # (ie, perform frequency analysis and resolution) | |
| from sys import argv, exit | |
| from os.path import basename | |
| import string | |
| if len(argv) <= 2: | |
| print("Usage: {} <file 1> <file b>".format(basename(argv[0]))) | |
| exit(0) | |
| files = argv[1:3] | |
| freqs = {} | |
| freqs_sorted = {} | |
| value_totals = {} | |
| for filename in files: | |
| # Initialize buffers | |
| freqs[filename] = dict.fromkeys(string.ascii_lowercase, 0) | |
| freqs_sorted[filename] = [] | |
| value_totals[filename] = 0 | |
| # Open and read the file | |
| with open(filename, 'r', encoding='utf-8') as fo: | |
| buf = fo.read().lower() | |
| # Count the characters (a-z only) | |
| for i, char in enumerate(buf): | |
| val = ord(char) | |
| if val >= ord(u'a') and val <= ord(u'z'): | |
| freqs[filename][char] += 1 | |
| # Generate tuple lists fit for column printout | |
| for k, v in sorted(freqs[filename].items(), | |
| key=lambda x:x[1], | |
| reverse=True): | |
| tup = (k, v) | |
| value_totals[filename] += v | |
| freqs_sorted[filename].append(tup) | |
| # Generate columns; fugly | |
| buf = [] | |
| fa, fb = files[0], files[1] | |
| left_offset = len(fa) + 10 | |
| letter_map = {} | |
| print("Analysis:\n") | |
| print(u'{}{}{}\n'.format(fa, ' ' * 10, fb), u'') | |
| for a, b in zip(freqs_sorted[fa], | |
| freqs_sorted[fb]): | |
| str_a = u'{}: {:4d} ({:2.2f}%)'.format( | |
| a[0], a[1], (a[1] / value_totals[fa]) * 100) | |
| str_b = u'{}: {:4d} ({:2.2f}%)'.format( | |
| b[0], b[1], (b[1] / value_totals[fb]) * 100) | |
| print(u'{}{}{}'.format( | |
| str_a, | |
| ' ' * (left_offset - len(str_a)), | |
| str_b)) | |
| letter_map[a[0]] = b[0] | |
| print() | |
| with open(fa, 'r', encoding='utf-8') as fo: | |
| buf = fo.read().lower() | |
| print(buf) | |
| print("translated using the above table:\n") | |
| for i in range(0, len(buf)): | |
| char = buf[i] | |
| val = ord(char) | |
| try: | |
| print(letter_map[char], end='') | |
| except KeyError: | |
| print(char, end='') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment