cageyjames · December 12, 2012 17:18
diff --git a/compare_urls.py b/compare_urls.py
 """ This is the code I used to find the URLs that were 404 in Disqus """

 import os,sys
 #import feedparser
 import urllib
 import Levenshtein


 def read_atom():
    d = feedparser.parse("http://cageyjames.webfactional.com/atom.xml")  # the url of the Atom feed can also be used here
    of = open("./cageyjames_out.csv","w")
    for entry in d.entries:
        of.write("%s\n" % entry.link)
    of.close()

 if __name__ == "__main__":    
    of = open("./match.csv", "w")
    nm = open("./no_match.csv", "w")
    cj = open("./cageyjames_out.csv").readlines()
    at = open("spatiallyadjusted_mapping.csv").readlines()
    for c in cj:
        trip = True
        for a in at:
            if Levenshtein.ratio(c.lower(),a.lower()) > .90:
                trip = False
                httpcodea = urllib.urlopen(a).getcode()
                httpcodec = urllib.urlopen(c).getcode()
                of.write("%s,%s,%s,%s,%s\n" % (c.strip(),httpcodec,a.strip(),httpcodea,Levenshtein.ratio(c.lower(),a.lower())))
                break # Could be far more efficient
        if trip:
            nm.write( "%s\n" % (c.strip()) )
    of.close()
    nm.close()
	""" This is the code I used to find the URLs that were 404 in Disqus """

	import os,sys
	#import feedparser
	import urllib
	import Levenshtein


	def read_atom():
	d = feedparser.parse("http://cageyjames.webfactional.com/atom.xml") # the url of the Atom feed can also be used here
	of = open("./cageyjames_out.csv","w")
	for entry in d.entries:
	of.write("%s\n" % entry.link)
	of.close()

	if __name__ == "__main__":
	of = open("./match.csv", "w")
	nm = open("./no_match.csv", "w")
	cj = open("./cageyjames_out.csv").readlines()
	at = open("spatiallyadjusted_mapping.csv").readlines()
	for c in cj:
	trip = True
	for a in at:
	if Levenshtein.ratio(c.lower(),a.lower()) > .90:
	trip = False
	httpcodea = urllib.urlopen(a).getcode()
	httpcodec = urllib.urlopen(c).getcode()
	of.write("%s,%s,%s,%s,%s\n" % (c.strip(),httpcodec,a.strip(),httpcodea,Levenshtein.ratio(c.lower(),a.lower())))
	break # Could be far more efficient
	if trip:
	nm.write( "%s\n" % (c.strip()) )
	of.close()
	nm.close()
No results found