Skip to content

Instantly share code, notes, and snippets.

@gammy
Created October 15, 2017 12:56
Show Gist options
  • Save gammy/56c023d9195f933d14c70f6ceb36ba25 to your computer and use it in GitHub Desktop.
Save gammy/56c023d9195f933d14c70f6ceb36ba25 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Calculate occurrence of letters in two text files,
# and map out the closest relation
# (ie, perform frequency analysis and resolution)
from sys import argv, exit
from os.path import basename
import string
if len(argv) <= 2:
print("Usage: {} <file 1> <file b>".format(basename(argv[0])))
exit(0)
files = argv[1:3]
freqs = {}
freqs_sorted = {}
value_totals = {}
for filename in files:
# Initialize buffers
freqs[filename] = dict.fromkeys(string.ascii_lowercase, 0)
freqs_sorted[filename] = []
value_totals[filename] = 0
# Open and read the file
with open(filename, 'r', encoding='utf-8') as fo:
buf = fo.read().lower()
for char in buf:
# Count the characters (a-z only)
val = ord(char)
if val >= ord(u'a') and val <= ord(u'z'):
freqs[filename][char] += 1
# Generate tuple lists fit for column printout
for k, v in sorted(freqs[filename].items(),
key=lambda x:x[1],
reverse=True):
tup = (k, v)
value_totals[filename] += v
freqs_sorted[filename].append(tup)
# Generate columns; fugly
buf = []
fa, fb = files[0], files[1]
left_offset = len(fa) + 10
letter_map = {}
print("Analysis:\n")
print(u'{}{}{}\n'.format(fa, ' ' * 10, fb), u'')
for a, b in zip(freqs_sorted[fa],
freqs_sorted[fb]):
str_a = u'{}: {:4d} ({:2.2f}%)'.format(
a[0], a[1], (a[1] / value_totals[fa]) * 100)
str_b = u'{}: {:4d} ({:2.2f}%)'.format(
b[0], b[1], (b[1] / value_totals[fb]) * 100)
print(u'{}{}{}'.format(
str_a,
' ' * (left_offset - len(str_a)),
str_b))
letter_map[a[0]] = b[0]
print()
with open(fa, 'r', encoding='utf-8') as fo:
buf = fo.read().lower()
print(buf)
print("translated using the above table:\n")
for i in range(0, len(buf)):
char = buf[i]
val = ord(char)
try:
print(letter_map[char], end='')
except KeyError:
print(char, end='')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment