Created
November 25, 2012 23:24
-
-
Save enigmaticape/4145857 to your computer and use it in GitHub Desktop.
Some scripts used in exploring the "Pigeon Code"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some Python scripts used in exploration of the Pigeon Code, a WW2 era | |
UK code found attached to the leg of a dead pigeon in December 2012. | |
There is a lot of shared code between these scripts, and they are of, er, | |
varying quality as I knocked them up on a Saturday afternoon for the sole | |
purpose of hacking away on the Pigeon Code. | |
Shared in the spirit of making my rather limited investigation repeatable. | |
See the associated blog post at http://www.enigmaticape.com/blog/pigeon-code-some-idle-speculation-with-graphs/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import codecs | |
import argparse | |
parser = argparse.ArgumentParser("Count frequency of letters in ciphertext") | |
parser.add_argument("-i", "--input" , help="input file", | |
action="store") | |
parser.add_argument("-o", "--output", help="output file", | |
action="store") | |
parser.add_argument("-e", "--encoding", help="utf-8 | ascii", | |
action="store", | |
default="utf-8") | |
parser.add_argument("-l", "--locale", help="locale name", | |
action="store") | |
# NB that if you do use additional symbols, you are going | |
# to have no fun at all if one of them is a comma. | |
# this issue will be addressed in a future update | |
parser.add_argument("-a", "--addsyms", help="additional symbols", | |
action="store") | |
args = parser.parse_args() | |
if args.addsyms == None: | |
args.addsyms = [] | |
# determine whether to read input from a file or STDIN | |
if args.input : | |
# file | |
if not os.path.exists( args.input ): | |
sys.stderr.write( args.input + "\nFile not found :-( \n" ) | |
sys.exit() | |
cipher = codecs.open( args.input, 'rb', encoding=args.encoding ) | |
else: | |
if sys.stdin.isatty(): | |
cipher = codecs.getreader( sys.stdin.encoding )(sys.stdin) | |
else: | |
cipher = codecs.getreader( args.encoding )(sys.stdin) | |
# read the file | |
cipher_text = cipher.read() | |
# count letters | |
counts = {} | |
for char in cipher_text.upper(): | |
if char.isalpha() or char in args.addsyms: | |
if char in counts: | |
counts[ char ] += 1 | |
else: | |
counts[ char ] = 1 | |
labels, data = [ | |
[ tuple[ i ] for tuple in sorted(counts.items()) ] | |
for i in (0,1) | |
] | |
# determine if we're using stdout or an output file | |
if args.output : | |
output = codecs.open( args.output, "w", args.encoding ) | |
else: | |
# if we're using stdout, there are some pesky encoding issues | |
# to deal with. We default to UTF 8. Your TTY should be UTF8. | |
if sys.stdout.isatty(): | |
output = codecs.getwriter( sys.stdout.encoding )(sys.stdout) | |
else: | |
output = codecs.getwriter( args.encoding )(sys.stdout) | |
# write the data out on two lines, basically as a CSV | |
output.write( ",".join(labels) +"\n" ) | |
output.write( ",".join(str(d) for d in data) +"\n" ) | |
output.close |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import codecs | |
import argparse | |
parser = argparse.ArgumentParser("Grid some text into 5x5 blocks") | |
parser.add_argument("-i", "--input" , help="input file", | |
action="store") | |
parser.add_argument("-o", "--output", help="Output file", | |
action="store" ) | |
parser.add_argument("-e", "--encoding", help="utf-8 | ascii", | |
action="store", | |
default="utf-8") | |
args = parser.parse_args() | |
# determine whether to read input from a file or STDIN | |
if args.input : | |
# file | |
if not os.path.exists( args.input ): | |
sys.stderr.write( args.input + "\nFile not found :-( \n" ) | |
sys.exit() | |
histo_file = codecs.open( args.input, 'rb', encoding=args.encoding ) | |
else: | |
if sys.stdin.isatty(): | |
histo_file = codecs.getreader( sys.stdin.encoding )(sys.stdin) | |
else: | |
histo_file = codecs.getreader( args.encoding )(sys.stdin) | |
# determine if we're using stdout or an output file | |
if args.output : | |
output = codecs.open( args.output, "w", args.encoding ) | |
else: | |
# if we're using stdout, there are some pesky encoding issues | |
# to deal with. We default to UTF 8. Your TTY should be UTF8. | |
if sys.stdout.isatty(): | |
output = codecs.getwriter( sys.stdout.encoding )(sys.stdout) | |
else: | |
output = codecs.getwriter( args.encoding )(sys.stdout) | |
letters = histo_file.read() | |
count = 0 | |
grid = [] | |
group = [] | |
for char in [ c for c in letters.upper() if c.isalpha() ]: | |
if count < 5: | |
group.append( char ) | |
count += 1 | |
else: | |
grid.append( group ) | |
print group | |
group = [] | |
group.append( char ) | |
count = 1 | |
if count > 0: | |
grid.append( group ) | |
for five in (grid[pos:pos + 5] for pos in xrange(0, len(grid), 5)): | |
for one in five: | |
output.write( "".join(one) + " " ) | |
output.write("\n") | |
output.close() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Given a two line input of comma seperated values, | |
# draws a bar chart assuming the first line is labels | |
# and the second line is counts. | |
# Requires reportlab and PIP | |
import sys | |
import os | |
# easy_install PIP | |
# easy_install reportlab | |
from reportlab.graphics.shapes import Drawing | |
from reportlab.graphics.charts.barcharts import VerticalBarChart | |
import codecs | |
import argparse | |
parser = argparse.ArgumentParser("Draw a histogram") | |
parser.add_argument("-i", "--input" , help="input file", | |
action="store") | |
parser.add_argument("-o", "--output", help="output file base name", | |
action="store", | |
default="histo") | |
parser.add_argument("-e", "--encoding", help="utf-8 | ascii", | |
action="store", | |
default="utf-8") | |
parser.add_argument("-s", "--step", help="value axis step", | |
action="store", | |
default=1) | |
args = parser.parse_args() | |
# determine whether to read input from a file or STDIN | |
if args.input : | |
# file | |
if not os.path.exists( args.input ): | |
sys.stderr.write( args.input + "\nFile not found :-( \n" ) | |
sys.exit() | |
histo_file = codecs.open( args.input, 'rb', encoding=args.encoding ) | |
else: | |
if sys.stdin.isatty(): | |
histo_file = codecs.getreader( sys.stdin.encoding )(sys.stdin) | |
else: | |
histo_file = codecs.getreader( args.encoding )(sys.stdin) | |
label_data = histo_file.readline() | |
data_data = histo_file.readline() | |
labels = label_data.strip().split(",") | |
data = [int(i) for i in data_data.strip().split(",")] | |
drawing = Drawing(600, 500) | |
chart = VerticalBarChart() | |
chart.width = 560 | |
chart.height = 460 | |
chart.x = 20 | |
chart.y = 20 | |
chart.data = [data] | |
chart.categoryAxis.categoryNames = labels | |
chart.valueAxis.valueMin = 0 | |
chart.valueAxis.valueStep = int( args.step ) | |
drawing.add( chart ) | |
drawing.save( fnRoot= args.output, formats=['png'] ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import argparse | |
import collections | |
# About the simplest polyalphabetic cipher you can get. | |
parser = argparse.ArgumentParser("Encipher some text with a simple polyalpbetic cipher") | |
parser.add_argument( "text", action="store") | |
parser.add_argument( "key", action="store") | |
parser.add_argument( "-e", "--encipher", action="store_true") | |
parser.add_argument( "-d", "--decipher", action="store_true") | |
args = parser.parse_args() | |
outtext = "" | |
alphabet = [ chr( c ) for c in range( ord("A"), ord("Z") ) ] | |
keyshift = collections.deque( args.key.upper() ) | |
intext = args.text.upper() | |
for start in intext: | |
if start.isalpha(): | |
cipherbet = collections.deque( alphabet ) | |
cipherbet.rotate( ord( keyshift[0] ) ) | |
keyshift.rotate( 1 ) | |
if args.encipher: | |
index = alphabet.index( start ) | |
out = cipherbet[ index ] | |
if args.decipher: | |
index = list(cipherbet).index( start ) | |
out = alphabet[ index ] | |
outtext += out | |
else: | |
outtext += start | |
print outtext |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# scrape textual data from a webpage and write it | |
# to a file or stdout | |
import os | |
import sys | |
import codecs | |
import argparse | |
# easy_install lxml | |
from lxml import html | |
from lxml.html.clean import clean_html | |
# process command line args | |
parser = argparse.ArgumentParser("Scrape text from a url") | |
parser.add_argument("url", help = "http://some.website.com", | |
action="store") | |
parser.add_argument("-o", "--output", help="output file", | |
action="store") | |
parser.add_argument("-e", "--encoding", help="utf-8 | ascii", | |
default="utf-8", | |
action="store") | |
parser.add_argument("-l", "--locale", help="locale name", | |
action="store") | |
args = parser.parse_args() | |
# set the locale if one was provided | |
if args.locale : | |
locale.setlocale( locale.LC_ALL, args.locale ) | |
# get and clean the HTML | |
tree = html.parse( args.url ) | |
tree = clean_html( tree ) | |
text = tree.getroot().text_content() | |
# determine if we're using stdout or an output file | |
if args.output : | |
output = codecs.open( args.output, "w", args.encoding ) | |
else: | |
# if we're using stdout, there are some pesky encoding issues | |
# to deal with. We default to UTF 8. Your TTY should be UTF8. | |
if sys.stdout.isatty(): | |
output = codecs.getwriter( sys.stdout.encoding )(sys.stdout) | |
else: | |
output = codecs.getwriter( args.encoding )(sys.stdout) | |
output.write( text ) | |
output.close |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
l = list( "Even a short sentence is given away by frequency counting, this is how cryptogrpahers break messages" ) | |
random.shuffle( l ) | |
print "".join( l ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The UK is all aquiver about a WW2 era ciphered message found attached to a dead pigeon in a chimney. Oh yes. Find out more about what the cipher might be over at the Enigmatic Ape blog : http://www.enigmaticape.com/blog/pigeon-code-some-idle-speculation-with-graphs/