Stephen Merity Smerity

I hereby claim:

I am smerity on github.
I am smerity (https://keybase.io/smerity) on keybase.
I have a public key whose fingerprint is 56A2 5996 3078 B205 1053 883A 6615 0186 B74F 858B

To claim this, I am signing this object:

	smerity@pegasus:~/Coding/montelight/python$ time ~/Coding/Reference/pypy-2.2.1-linux64/bin/pypy -m cProfile minilight.py roomfront-n-1000.ml.txt

	MiniLight 1.6 Python - http://www.hxa.name/minilight

	iteration: 3^C
	interrupted
	1155613811 function calls (1062023566 primitive calls) in 89.591 seconds

	Ordered by: standard name

	# To run: python just_text.py > text
	###
	from glob import glob
	#
	import warc

	# List any of the WARC files found in the data folder
	warc_files = glob('data/*.wet.gz')

	# Process each of the WARC files we found

	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00000-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00001-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00002-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00003-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00004-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00005-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00006-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/C

	import re
	#
	from collections import Counter
	from glob import glob
	from urlparse import urlparse
	#
	import warc


	# Extract the names and total usage count of all the opening HTML tags in the document

	import boto
	from boto.s3.key import Key
	import zlib


	def stream_decompress_multi(stream):
	dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
	while True:
	chunk = stream.read(1024 * 8)
	if not chunk: