enigmaticape · November 25, 2012 23:24 · enigmaticape · Nov 27, 2012
diff --git a/readme.txt b/readme.txt
 Some Python scripts used in exploration of the Pigeon Code, a WW2 era 
 UK code found attached to the leg of a dead pigeon in December 2012.

 There is a lot of shared code between these scripts, and they are of, er, 
 varying quality as I knocked them up on a Saturday afternoon for the sole
 purpose of hacking away on the Pigeon Code.

 Shared in the spirit of making my rather limited investigation repeatable.

 See the associated blog post at http://www.enigmaticape.com/blog/pigeon-code-some-idle-speculation-with-graphs/
diff --git a/freq.py b/freq.py
 import os
 import sys
 import codecs
 import argparse

 parser = argparse.ArgumentParser("Count frequency of letters in ciphertext")

 parser.add_argument("-i", "--input" ,   help="input file",    
                                        action="store")
                                        
 parser.add_argument("-o", "--output",   help="output file",   
                                        action="store")
                                        
 parser.add_argument("-e", "--encoding", help="utf-8 | ascii", 
                                        action="store",
                                        default="utf-8")
                                        
 parser.add_argument("-l", "--locale",   help="locale name",   
                                        action="store")

 # NB that if you do use additional symbols, you are going 
 # to have no fun at all if one of them is a comma.
 # this issue will be addressed in a future update
 parser.add_argument("-a", "--addsyms",  help="additional symbols", 
                                        action="store")

 args = parser.parse_args()

 if args.addsyms == None:
    args.addsyms = []

 # determine whether to read input from a file or STDIN
 if args.input :
    # file
    if not os.path.exists( args.input ):
        sys.stderr.write( args.input + "\nFile not found :-( \n" )
        sys.exit()
        
    cipher = codecs.open( args.input, 'rb', encoding=args.encoding )
 else:
    if sys.stdin.isatty():
        cipher = codecs.getreader( sys.stdin.encoding )(sys.stdin)
    else:
        cipher = codecs.getreader(       args.encoding )(sys.stdin)


 # read the file
 cipher_text = cipher.read()

 # count letters
 counts = {}
 for char in cipher_text.upper():
    if char.isalpha() or char in args.addsyms:
        if char in counts:
            counts[ char ] += 1
        else:
            counts[ char ] =  1
            

 labels, data = [
                 [ tuple[ i ] for tuple in sorted(counts.items()) ] 
                 for i in (0,1)
               ]

 # determine if we're using stdout or an output file

 if args.output :
    output = codecs.open( args.output, "w", args.encoding )
 else:
    # if we're using stdout, there are some pesky encoding issues
    # to deal with. We default to UTF 8. Your TTY should be UTF8.
    if sys.stdout.isatty():
        output = codecs.getwriter( sys.stdout.encoding )(sys.stdout)
    else:
        output = codecs.getwriter(       args.encoding )(sys.stdout)
    

 # write the data out on two lines, basically as a CSV
 output.write( ",".join(labels) +"\n" )
 output.write( ",".join(str(d) for d in data) +"\n" )
 output.close
diff --git a/grid.py b/grid.py
 import os
 import sys
 import codecs
 import argparse

 parser = argparse.ArgumentParser("Grid some text into 5x5 blocks")

 parser.add_argument("-i", "--input" ,   help="input file",    
                                        action="store")
                                        
 parser.add_argument("-o", "--output",   help="Output file",
                                        action="store" )
                                        
 parser.add_argument("-e", "--encoding", help="utf-8 | ascii", 
                                        action="store",
                                        default="utf-8")
                                        

 args = parser.parse_args()

 # determine whether to read input from a file or STDIN
 if args.input :
    # file
    if not os.path.exists( args.input ):
        sys.stderr.write( args.input + "\nFile not found :-( \n" )
        sys.exit()
        
    histo_file = codecs.open( args.input, 'rb', encoding=args.encoding )
 else:
    if sys.stdin.isatty():
        histo_file = codecs.getreader( sys.stdin.encoding )(sys.stdin)
    else:
        histo_file = codecs.getreader(       args.encoding )(sys.stdin)

 # determine if we're using stdout or an output file

 if args.output :
    output = codecs.open( args.output, "w", args.encoding )
 else:
    # if we're using stdout, there are some pesky encoding issues
    # to deal with. We default to UTF 8. Your TTY should be UTF8.
    if sys.stdout.isatty():
        output = codecs.getwriter( sys.stdout.encoding )(sys.stdout)
    else:
        output = codecs.getwriter(       args.encoding )(sys.stdout)


 letters = histo_file.read()

 count = 0
 grid  = []
 group = []

 for char in [ c for c in letters.upper() if c.isalpha() ]:
    if count < 5:
        group.append( char )
        count += 1
    else:
        grid.append( group )
        print group
        group = []
        group.append( char )
        count = 1
 if count > 0:
    grid.append( group )

 for five in (grid[pos:pos + 5] for pos in xrange(0, len(grid), 5)):
    for one in five:
        output.write( "".join(one) + " " )
    output.write("\n")
    
 output.close()
diff --git a/histo.py b/histo.py
 # Given a two line input of comma seperated values, 
 # draws a bar chart assuming the first line is labels
 # and the second line is counts.

 # Requires reportlab and PIP

 import sys
 import os


 # easy_install PIP
 # easy_install reportlab
 from   reportlab.graphics.shapes import Drawing
 from   reportlab.graphics.charts.barcharts import VerticalBarChart



 import codecs
 import argparse

 parser = argparse.ArgumentParser("Draw a histogram")

 parser.add_argument("-i", "--input" ,   help="input file",    
                                        action="store")
                                        
 parser.add_argument("-o", "--output",   help="output file base name",
                                        action="store",
                                        default="histo")
                                        
 parser.add_argument("-e", "--encoding", help="utf-8 | ascii", 
                                        action="store",
                                        default="utf-8")

 parser.add_argument("-s", "--step",     help="value axis step", 
                                        action="store",
                                        default=1)

 args = parser.parse_args()

 # determine whether to read input from a file or STDIN
 if args.input :
    # file
    if not os.path.exists( args.input ):
        sys.stderr.write( args.input + "\nFile not found :-( \n" )
        sys.exit()
        
    histo_file = codecs.open( args.input, 'rb', encoding=args.encoding )
 else:
    if sys.stdin.isatty():
        histo_file = codecs.getreader( sys.stdin.encoding )(sys.stdin)
    else:
        histo_file = codecs.getreader(       args.encoding )(sys.stdin)
                                        
                                        
 label_data = histo_file.readline()
 data_data  = histo_file.readline() 

 labels = label_data.strip().split(",")
 data   = [int(i) for i in data_data.strip().split(",")]

 drawing      = Drawing(600, 500)
 chart        = VerticalBarChart()
 chart.width  = 560
 chart.height = 460
 chart.x      = 20
 chart.y      = 20

 chart.data   = [data]
 chart.categoryAxis.categoryNames = labels
 chart.valueAxis.valueMin  = 0
 chart.valueAxis.valueStep = int( args.step )
 drawing.add( chart )
 drawing.save( fnRoot= args.output, formats=['png'] )
diff --git a/polycipher.py b/polycipher.py
 import os
 import sys
 import argparse
 import collections

 # About the simplest polyalphabetic cipher you can get.

 parser = argparse.ArgumentParser("Encipher some text with a simple polyalpbetic cipher")
 parser.add_argument( "text", action="store")
 parser.add_argument( "key",  action="store")
 parser.add_argument( "-e", "--encipher", action="store_true")
 parser.add_argument( "-d", "--decipher", action="store_true")
 args = parser.parse_args()


 outtext  = ""

 alphabet = [ chr( c ) for c in range( ord("A"), ord("Z") ) ]
 keyshift = collections.deque( args.key.upper() )

 intext   = args.text.upper()

 for start in intext:
    if start.isalpha():

        cipherbet = collections.deque( alphabet )
        cipherbet.rotate( ord( keyshift[0] ) )
        keyshift.rotate( 1 )
        
        if args.encipher:
            index  = alphabet.index( start )
            out    = cipherbet[ index ]
        if args.decipher:
            index = list(cipherbet).index( start )
            out   = alphabet[ index ]

        outtext += out
    else:
        outtext += start

 print outtext
diff --git a/scrape.py b/scrape.py
 # scrape textual data from a webpage and write it
 # to a file or stdout

 import os
 import sys
 import codecs
 import argparse

 # easy_install lxml
 from   lxml import html
 from   lxml.html.clean import clean_html


 # process command line args
 parser = argparse.ArgumentParser("Scrape text from a url")

 parser.add_argument("url",              help = "http://some.website.com",  
                                        action="store")

 parser.add_argument("-o", "--output",   help="output file",   
                                        action="store")

 parser.add_argument("-e", "--encoding", help="utf-8 | ascii", 
                                        default="utf-8", 
                                        action="store")

 parser.add_argument("-l", "--locale",   help="locale name",   
                                        action="store")

 args = parser.parse_args()

 # set the locale if one was provided
 if args.locale :
    locale.setlocale( locale.LC_ALL, args.locale )

 # get and clean the HTML
 tree = html.parse( args.url )
 tree = clean_html( tree )
 text = tree.getroot().text_content()

 # determine if we're using stdout or an output file

 if args.output :
    output = codecs.open( args.output, "w", args.encoding )
 else:
    # if we're using stdout, there are some pesky encoding issues
    # to deal with. We default to UTF 8. Your TTY should be UTF8.
    if sys.stdout.isatty():
        output = codecs.getwriter( sys.stdout.encoding )(sys.stdout)
    else:
        output = codecs.getwriter(       args.encoding )(sys.stdout)
    
 output.write( text )
 output.close
diff --git a/short_shuffle.py b/short_shuffle.py
 import random

 l = list( "Even a short sentence is given away by frequency counting, this is how cryptogrpahers break messages" )

 random.shuffle( l )

 print "".join( l )
	Some Python scripts used in exploration of the Pigeon Code, a WW2 era
	UK code found attached to the leg of a dead pigeon in December 2012.

	There is a lot of shared code between these scripts, and they are of, er,
	varying quality as I knocked them up on a Saturday afternoon for the sole
	purpose of hacking away on the Pigeon Code.

	Shared in the spirit of making my rather limited investigation repeatable.

	See the associated blog post at http://www.enigmaticape.com/blog/pigeon-code-some-idle-speculation-with-graphs/
	import os
	import sys
	import codecs
	import argparse

	parser = argparse.ArgumentParser("Count frequency of letters in ciphertext")

	parser.add_argument("-i", "--input" , help="input file",
	action="store")

	parser.add_argument("-o", "--output", help="output file",
	action="store")

	parser.add_argument("-e", "--encoding", help="utf-8 \| ascii",
	action="store",
	default="utf-8")

	parser.add_argument("-l", "--locale", help="locale name",
	action="store")

	# NB that if you do use additional symbols, you are going
	# to have no fun at all if one of them is a comma.
	# this issue will be addressed in a future update
	parser.add_argument("-a", "--addsyms", help="additional symbols",
	action="store")

	args = parser.parse_args()

	if args.addsyms == None:
	args.addsyms = []

	# determine whether to read input from a file or STDIN
	if args.input :
	# file
	if not os.path.exists( args.input ):
	sys.stderr.write( args.input + "\nFile not found :-( \n" )
	sys.exit()

	cipher = codecs.open( args.input, 'rb', encoding=args.encoding )
	else:
	if sys.stdin.isatty():
	cipher = codecs.getreader( sys.stdin.encoding )(sys.stdin)
	else:
	cipher = codecs.getreader( args.encoding )(sys.stdin)


	# read the file
	cipher_text = cipher.read()

	# count letters
	counts = {}
	for char in cipher_text.upper():
	if char.isalpha() or char in args.addsyms:
	if char in counts:
	counts[ char ] += 1
	else:
	counts[ char ] = 1


	labels, data = [
	[ tuple[ i ] for tuple in sorted(counts.items()) ]
	for i in (0,1)
	]

	# determine if we're using stdout or an output file

	if args.output :
	output = codecs.open( args.output, "w", args.encoding )
	else:
	# if we're using stdout, there are some pesky encoding issues
	# to deal with. We default to UTF 8. Your TTY should be UTF8.
	if sys.stdout.isatty():
	output = codecs.getwriter( sys.stdout.encoding )(sys.stdout)
	else:
	output = codecs.getwriter( args.encoding )(sys.stdout)


	# write the data out on two lines, basically as a CSV
	output.write( ",".join(labels) +"\n" )
	output.write( ",".join(str(d) for d in data) +"\n" )
	output.close
	# Given a two line input of comma seperated values,
	# draws a bar chart assuming the first line is labels
	# and the second line is counts.

	# Requires reportlab and PIP

	import sys
	import os


	# easy_install PIP
	# easy_install reportlab
	from reportlab.graphics.shapes import Drawing
	from reportlab.graphics.charts.barcharts import VerticalBarChart



	import codecs
	import argparse

	parser = argparse.ArgumentParser("Draw a histogram")

	parser.add_argument("-i", "--input" , help="input file",
	action="store")

	parser.add_argument("-o", "--output", help="output file base name",
	action="store",
	default="histo")

	parser.add_argument("-e", "--encoding", help="utf-8 \| ascii",
	action="store",
	default="utf-8")

	parser.add_argument("-s", "--step", help="value axis step",
	action="store",
	default=1)

	args = parser.parse_args()

	# determine whether to read input from a file or STDIN
	if args.input :
	# file
	if not os.path.exists( args.input ):
	sys.stderr.write( args.input + "\nFile not found :-( \n" )
	sys.exit()

	histo_file = codecs.open( args.input, 'rb', encoding=args.encoding )
	else:
	if sys.stdin.isatty():
	histo_file = codecs.getreader( sys.stdin.encoding )(sys.stdin)
	else:
	histo_file = codecs.getreader( args.encoding )(sys.stdin)


	label_data = histo_file.readline()
	data_data = histo_file.readline()

	labels = label_data.strip().split(",")
	data = [int(i) for i in data_data.strip().split(",")]

	drawing = Drawing(600, 500)
	chart = VerticalBarChart()
	chart.width = 560
	chart.height = 460
	chart.x = 20
	chart.y = 20

	chart.data = [data]
	chart.categoryAxis.categoryNames = labels
	chart.valueAxis.valueMin = 0
	chart.valueAxis.valueStep = int( args.step )
	drawing.add( chart )
	drawing.save( fnRoot= args.output, formats=['png'] )
	# scrape textual data from a webpage and write it
	# to a file or stdout

	import os
	import sys
	import codecs
	import argparse

	# easy_install lxml
	from lxml import html
	from lxml.html.clean import clean_html


	# process command line args
	parser = argparse.ArgumentParser("Scrape text from a url")

	parser.add_argument("url", help = "http://some.website.com",
	action="store")

	parser.add_argument("-o", "--output", help="output file",
	action="store")

	parser.add_argument("-e", "--encoding", help="utf-8 \| ascii",
	default="utf-8",
	action="store")

	parser.add_argument("-l", "--locale", help="locale name",
	action="store")

	args = parser.parse_args()

	# set the locale if one was provided
	if args.locale :
	locale.setlocale( locale.LC_ALL, args.locale )

	# get and clean the HTML
	tree = html.parse( args.url )
	tree = clean_html( tree )
	text = tree.getroot().text_content()

	# determine if we're using stdout or an output file

	if args.output :
	output = codecs.open( args.output, "w", args.encoding )
	else:
	# if we're using stdout, there are some pesky encoding issues
	# to deal with. We default to UTF 8. Your TTY should be UTF8.
	if sys.stdout.isatty():
	output = codecs.getwriter( sys.stdout.encoding )(sys.stdout)
	else:
	output = codecs.getwriter( args.encoding )(sys.stdout)

	output.write( text )
	output.close
	import random

	l = list( "Even a short sentence is given away by frequency counting, this is how cryptogrpahers break messages" )

	random.shuffle( l )

	print "".join( l )