yakovenkodenis · February 25, 2016 21:58
diff --git a/ceasar_cipher_and_cracker.py b/ceasar_cipher_and_cracker.py
 import numpy as np


 def cipher(text, alphabet='abcdefghijklmnopqrstuvwxyz', key=0):
    result = ""
    alphabet = alphabet.lower()
    n = len(alphabet)
    for char in text:
        if char.isalpha():
            new_char = alphabet[(alphabet.find(char.lower()) + key) % n]
            result += new_char if char.islower() else new_char.upper()
        else:
            result += char

    return result


 def decipher(text, alphabet='abcdefghijklmnopqrstuvwxyz', key=0):
    result = ""
    alphabet = alphabet.lower()
    n = len(alphabet)
    for char in text:
        if char.isalpha():
            new_char = alphabet[(alphabet.find(char.lower()) - key + n) % n]
            result += new_char if char.islower() else new_char.upper()
        else:
            result += char

    return result


 def test_ceasar_cipher():
    alphabet = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
    text = 'Съешь же ещё этих мягких французских булок, да выпей чаю.'
    key = 3
    ciphered_phrase = cipher(text, alphabet, key)
    print(ciphered_phrase)
    deciphered_phrase = decipher(ciphered_phrase, alphabet, key)
    print(deciphered_phrase)


 # ========= Ceasar cipher cracker ===========

 ENGLISH_FREQS = [
    0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228,
    0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025,
    0.02406, 0.06749, 0.07507, 0.01929, 0.00095, 0.05987,
    0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150,
    0.01974, 0.00074
 ]


 # Returns the cross-entropy of the given string with respect to
 # the English unigram frequencies, which is a positive
 # floating-point number.
 def get_entropy(str):
    sum, ignored = 0, 0
    for c in str:
        if c.isalpha():
            sum += np.log(ENGLISH_FREQS[ord(c.lower()) - 97])
        else:
            ignored += 1
    return -sum / np.log(2) / (len(str) - ignored)


 # Returns the entropies when the given string is decrypted with
 # all 26 possible shifts, where the result is an array of tuples
 # (int shift, float enptroy) -
 # e.g. [(0, 2.01), (1, 4.95), ..., (25, 3.73)].
 def get_all_entropies(str):
    result = []
    for i in range(0, 26):
        result.append((i, get_entropy(decipher(str, key=i))))
    return result


 def cmp_to_key(mycmp):
    'Convert a cmp= function into a key= function'
    class K(object):
        def __init__(self, obj, *args):
            self.obj = obj

        def __lt__(self, other):
            return mycmp(self.obj, other.obj) < 0

        def __gt__(self, other):
            return mycmp(self.obj, other.obj) > 0

        def __eq__(self, other):
            return mycmp(self.obj, other.obj) == 0

        def __le__(self, other):
            return mycmp(self.obj, other.obj) <= 0

        def __ge__(self, other):
            return mycmp(self.obj, other.obj) >= 0

        def __ne__(self, other):
            return mycmp(self.obj, other.obj) != 0
    return K


 def comparator(x, y):
    if x[1] < y[1]:
        return -1
    elif x[1] > y[1]:
        return 1
    elif x[0] < y[0]:
        return -1
    elif x[0] > y[0]:
        return 1
    else:
        return 0


 def crack_ceasar(text):
    entropies = get_all_entropies(text)
    entropies.sort(key=cmp_to_key(comparator))
    best_shift = entropies[0][0]
    cracked_val = decipher(text, key=best_shift)
    print("Best guess:")
    print("%d rotations\nDecrypted text: %s" % (best_shift, cracked_val))

    print("=========\nFull circle:")
    for i in range(0, 26):
        print("%d -\t%s" % (i, decipher(text, key=i)))
diff --git a/letters_frequencies_statistics.py b/letters_frequencies_statistics.py
 from collections import defaultdict


 def get_letters_statistics(text):
    freq_dict = defaultdict(int)
    res_dict = dict()
    all_letters_count = 0
    for char in text:
        if char.isalpha():
            all_letters_count += 1
            freq_dict[char.lower()] += 1

    for key, value in freq_dict.items():
        res_dict[key] = value * 100 / all_letters_count

    return res_dict


 def test_letters_statistics():
    s = "asdgshdkjgasdkghasdkgasd"

    with open('war_and_peace.txt', 'r') as war_and_peace:
        s = war_and_peace.read().replace('\n', '')

    stat_dict = get_letters_statistics(s)
    for key, value in sorted(stat_dict.items(),
                             key=lambda k_v: k_v[1], reverse=True):
        print("Letter '%s' - %.2f%%" % (str(key), value))
	import numpy as np


	def cipher(text, alphabet='abcdefghijklmnopqrstuvwxyz', key=0):
	result = ""
	alphabet = alphabet.lower()
	n = len(alphabet)
	for char in text:
	if char.isalpha():
	new_char = alphabet[(alphabet.find(char.lower()) + key) % n]
	result += new_char if char.islower() else new_char.upper()
	else:
	result += char

	return result


	def decipher(text, alphabet='abcdefghijklmnopqrstuvwxyz', key=0):
	result = ""
	alphabet = alphabet.lower()
	n = len(alphabet)
	for char in text:
	if char.isalpha():
	new_char = alphabet[(alphabet.find(char.lower()) - key + n) % n]
	result += new_char if char.islower() else new_char.upper()
	else:
	result += char

	return result


	def test_ceasar_cipher():
	alphabet = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
	text = 'Съешь же ещё этих мягких французских булок, да выпей чаю.'
	key = 3
	ciphered_phrase = cipher(text, alphabet, key)
	print(ciphered_phrase)
	deciphered_phrase = decipher(ciphered_phrase, alphabet, key)
	print(deciphered_phrase)


	# ========= Ceasar cipher cracker ===========

	ENGLISH_FREQS = [
	0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228,
	0.02015, 0.06094, 0.06966, 0.00153, 0.00772, 0.04025,
	0.02406, 0.06749, 0.07507, 0.01929, 0.00095, 0.05987,
	0.06327, 0.09056, 0.02758, 0.00978, 0.02360, 0.00150,
	0.01974, 0.00074
	]


	# Returns the cross-entropy of the given string with respect to
	# the English unigram frequencies, which is a positive
	# floating-point number.
	def get_entropy(str):
	sum, ignored = 0, 0
	for c in str:
	if c.isalpha():
	sum += np.log(ENGLISH_FREQS[ord(c.lower()) - 97])
	else:
	ignored += 1
	return -sum / np.log(2) / (len(str) - ignored)


	# Returns the entropies when the given string is decrypted with
	# all 26 possible shifts, where the result is an array of tuples
	# (int shift, float enptroy) -
	# e.g. [(0, 2.01), (1, 4.95), ..., (25, 3.73)].
	def get_all_entropies(str):
	result = []
	for i in range(0, 26):
	result.append((i, get_entropy(decipher(str, key=i))))
	return result


	def cmp_to_key(mycmp):
	'Convert a cmp= function into a key= function'
	class K(object):
	def __init__(self, obj, *args):
	self.obj = obj

	def __lt__(self, other):
	return mycmp(self.obj, other.obj) < 0

	def __gt__(self, other):
	return mycmp(self.obj, other.obj) > 0

	def __eq__(self, other):
	return mycmp(self.obj, other.obj) == 0

	def __le__(self, other):
	return mycmp(self.obj, other.obj) <= 0

	def __ge__(self, other):
	return mycmp(self.obj, other.obj) >= 0

	def __ne__(self, other):
	return mycmp(self.obj, other.obj) != 0
	return K


	def comparator(x, y):
	if x[1] < y[1]:
	return -1
	elif x[1] > y[1]:
	return 1
	elif x[0] < y[0]:
	return -1
	elif x[0] > y[0]:
	return 1
	else:
	return 0


	def crack_ceasar(text):
	entropies = get_all_entropies(text)
	entropies.sort(key=cmp_to_key(comparator))
	best_shift = entropies[0][0]
	cracked_val = decipher(text, key=best_shift)
	print("Best guess:")
	print("%d rotations\nDecrypted text: %s" % (best_shift, cracked_val))

	print("=========\nFull circle:")
	for i in range(0, 26):
	print("%d -\t%s" % (i, decipher(text, key=i)))
	from collections import defaultdict


	def get_letters_statistics(text):
	freq_dict = defaultdict(int)
	res_dict = dict()
	all_letters_count = 0
	for char in text:
	if char.isalpha():
	all_letters_count += 1
	freq_dict[char.lower()] += 1

	for key, value in freq_dict.items():
	res_dict[key] = value * 100 / all_letters_count

	return res_dict


	def test_letters_statistics():
	s = "asdgshdkjgasdkghasdkgasd"

	with open('war_and_peace.txt', 'r') as war_and_peace:
	s = war_and_peace.read().replace('\n', '')

	stat_dict = get_letters_statistics(s)
	for key, value in sorted(stat_dict.items(),
	key=lambda k_v: k_v[1], reverse=True):
	print("Letter '%s' - %.2f%%" % (str(key), value))