randyzwitch · July 31, 2013 14:38
diff --git a/python-word-search.py b/python-word-search.py
 import collections 
 import nltk

 #Dictionary from Unix
 internal_dict = open("/usr/share/dict/words")
 #Stopwords corpus from NLTK
 stopwords = nltk.corpus.stopwords.words('english')

 #Build english_dictionary of prospect words
 english_dictionary = []
 for line in internal_dict:
    if line not in stopwords and len(line) > 4:  #make sure only "big", useful words included
        english_dictionary.append(line.rstrip('\n'))

 #How many words are in the complete dictionary?        
 len(english_dictionary)

 #Import urls
 urls = [line for line in open("/path/to/urls/file.csv")]

 #Build counter dictionary
 wordcount = collections.Counter()
 for word in english_dictionary:    #Loop over all possible English words
  for url in urls:     #Loop over all urls in list
    if word in url:
      wordcount[word] += 1 #Once word found, add to dictionary counter
	import collections
	import nltk

	#Dictionary from Unix
	internal_dict = open("/usr/share/dict/words")
	#Stopwords corpus from NLTK
	stopwords = nltk.corpus.stopwords.words('english')

	#Build english_dictionary of prospect words
	english_dictionary = []
	for line in internal_dict:
	if line not in stopwords and len(line) > 4: #make sure only "big", useful words included
	english_dictionary.append(line.rstrip('\n'))

	#How many words are in the complete dictionary?
	len(english_dictionary)

	#Import urls
	urls = [line for line in open("/path/to/urls/file.csv")]

	#Build counter dictionary
	wordcount = collections.Counter()
	for word in english_dictionary: #Loop over all possible English words
	for url in urls: #Loop over all urls in list
	if word in url:
	wordcount[word] += 1 #Once word found, add to dictionary counter