ktibb · March 7, 2012 23:35
diff --git a/Prophetic Quatrain b/Prophetic Quatrain
 import urllib
 import BeautifulSoup
 import re
 import nltk
 from random import choice
 #import context_free

 part = dict()
 words = list()
 final = list()

 html = urllib.urlopen('http://www.oprah.com/relationships/What-Kind-of-Woman-Watches-Porn-Researchers-Find-Answers').read()
 soup = BeautifulSoup.BeautifulSoup(html)
 #texts = soup.findAll(text=True)

 texts = soup.find("div", {"class": "arial14"})

 def visible(element):
    if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
        return False
    elif re.match('<!--.*-->', str(element)):
        return False
    return True

 visible_texts = filter(visible, texts)
 #print visible_texts
    
 for tag in visible_texts:

  line = tag.string
  if line is not None:
    if line not in ['\n',' <br /> ', '\bu','<i>', '</i>', '']:
       # print "-----"
        line.replace("u'"," ")
        line.strip(";:#-?.,")
        #print line
 
 #add POStagging
        text = nltk.word_tokenize (line)
        tagged = nltk.pos_tag(text)
  
 #gather all the words of the same part of speech into a dict (with a POS as a key, words as value)

        for i in tagged:
            if re.search(r'\W',i[0]) or re.search(r"http",i[0]):
               pass
            else:
                if i[1] in part.keys():
                    if i[0] in part.values():
                        pass
                    else:
                        part[i[1]] += "|" + i[0]
                else:
                    part[i[1]] = i[0]
                      
 for u in part.keys():
    thisone = u+ "->" + part[u]
    final.append(thisone)
    #print thisone
    words = part[u].split("|")
    random_word = choice(words)
 #print words

 #Combine the words together in a Nostradamus Poem
 #[('IN'+'CD'+'NNS'+'CC'+'CD'+'NNS'+','+'NNP'+'MD'+'VB'+'DT'+'NN'+'NNP'+','+'NNP'+'VBZ'+'MD'+'VB'+'IN'+'PRP$'+'NNP'+','+'NN'+'RB'+'DT'+'NNS'+'CC'+'RB')]

 #nostradamus = ['IN', 'CD', 'NNS' , 'CC', 'CD' , 'NNS' , 'NNP', 'MD' , 'VB' , 'DT' , 'NN' , 'NNP' , 'NNP','MD' , 'VB' , 'IN' , 'PRP$' , 'NNP' ,'NN' , 'RB' , 'DT' , 'NNS', 'CC', 'RB']
 nostradamus = ['IN', 'CD','NNS','CC','CD','NNS', 'NNP', 'MD', 'VB', 'DT', 'NN', 'NNP', 'NNP', 'VBZ', 'MD', 'VB','IN','PRP$','NN', 'TO', 'VB', 'RP', 'DT', 'NNS', 'CC', 'RB', 'DT', 'NN', 'NN', 'MD', 'VB', 'DT', 'NN', 'CD', 'NNS', 'IN', 'DT', 'NN', 'MD', 'VB', 'NNP', 'MD', 'VB', 'CC', 'VBN', 'VB', 'IN', 'DT', 'JJ', 'NNS', 'IN', 'DT', 'NN', 'DT', 'JJ', 'NN', 'MD', 'VB', 'IN', 'CD', 'NNS', 'DT', 'NN', 'MD', 'VB', 'CD', 'NNS', 'TO', 'VB', 'DT', 'JJ', 'NN', 'MD', 'VB', 'DT', 'NNP', 'DT', 'JJ', 'NN', 'MD', 'VB', 'NN', 'IN', 'CD', 'NNS', 'CC', 'CD', 'NNS', 'NNP', 'MD', 'VB', 'DT', 'NN', 'NN', 'DT', 'NNS', 'MD', 'VB', 'IN', 'PRP$', 'NN', 'TO', 'VB', 'RP', 'DT', 'NNS', 'CC', 'RB']



 poem = ""

 for part_of_speech in nostradamus:
    #print part_of_speech
    # get a random word that is the current part_of_speech
    array_of_words = part[part_of_speech].split("|")
    #print array_of_words
    #get a random on of these words
    random_poem_word = choice(array_of_words)
    # add it to the poem string
    poem += " " + random_poem_word
    
    # add a space after it

 print poem
	import urllib
	import BeautifulSoup
	import re
	import nltk
	from random import choice
	#import context_free

	part = dict()
	words = list()
	final = list()

	html = urllib.urlopen('http://www.oprah.com/relationships/What-Kind-of-Woman-Watches-Porn-Researchers-Find-Answers').read()
	soup = BeautifulSoup.BeautifulSoup(html)
	#texts = soup.findAll(text=True)

	texts = soup.find("div", {"class": "arial14"})

	def visible(element):
	if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
	return False
	elif re.match('<!--.*-->', str(element)):
	return False
	return True

	visible_texts = filter(visible, texts)
	#print visible_texts

	for tag in visible_texts:

	line = tag.string
	if line is not None:
	if line not in ['\n',' <br /> ', '\bu','<i>', '</i>', '']:
	# print "-----"
	line.replace("u'"," ")
	line.strip(";:#-?.,")
	#print line

	#add POStagging
	text = nltk.word_tokenize (line)
	tagged = nltk.pos_tag(text)

	#gather all the words of the same part of speech into a dict (with a POS as a key, words as value)

	for i in tagged:
	if re.search(r'\W',i[0]) or re.search(r"http",i[0]):
	pass
	else:
	if i[1] in part.keys():
	if i[0] in part.values():
	pass
	else:
	part[i[1]] += "\|" + i[0]
	else:
	part[i[1]] = i[0]

	for u in part.keys():
	thisone = u+ "->" + part[u]
	final.append(thisone)
	#print thisone
	words = part[u].split("\|")
	random_word = choice(words)
	#print words

	#Combine the words together in a Nostradamus Poem
	#[('IN'+'CD'+'NNS'+'CC'+'CD'+'NNS'+','+'NNP'+'MD'+'VB'+'DT'+'NN'+'NNP'+','+'NNP'+'VBZ'+'MD'+'VB'+'IN'+'PRP$'+'NNP'+','+'NN'+'RB'+'DT'+'NNS'+'CC'+'RB')]

	#nostradamus = ['IN', 'CD', 'NNS' , 'CC', 'CD' , 'NNS' , 'NNP', 'MD' , 'VB' , 'DT' , 'NN' , 'NNP' , 'NNP','MD' , 'VB' , 'IN' , 'PRP$' , 'NNP' ,'NN' , 'RB' , 'DT' , 'NNS', 'CC', 'RB']
	nostradamus = ['IN', 'CD','NNS','CC','CD','NNS', 'NNP', 'MD', 'VB', 'DT', 'NN', 'NNP', 'NNP', 'VBZ', 'MD', 'VB','IN','PRP$','NN', 'TO', 'VB', 'RP', 'DT', 'NNS', 'CC', 'RB', 'DT', 'NN', 'NN', 'MD', 'VB', 'DT', 'NN', 'CD', 'NNS', 'IN', 'DT', 'NN', 'MD', 'VB', 'NNP', 'MD', 'VB', 'CC', 'VBN', 'VB', 'IN', 'DT', 'JJ', 'NNS', 'IN', 'DT', 'NN', 'DT', 'JJ', 'NN', 'MD', 'VB', 'IN', 'CD', 'NNS', 'DT', 'NN', 'MD', 'VB', 'CD', 'NNS', 'TO', 'VB', 'DT', 'JJ', 'NN', 'MD', 'VB', 'DT', 'NNP', 'DT', 'JJ', 'NN', 'MD', 'VB', 'NN', 'IN', 'CD', 'NNS', 'CC', 'CD', 'NNS', 'NNP', 'MD', 'VB', 'DT', 'NN', 'NN', 'DT', 'NNS', 'MD', 'VB', 'IN', 'PRP$', 'NN', 'TO', 'VB', 'RP', 'DT', 'NNS', 'CC', 'RB']



	poem = ""

	for part_of_speech in nostradamus:
	#print part_of_speech
	# get a random word that is the current part_of_speech
	array_of_words = part[part_of_speech].split("\|")
	#print array_of_words
	#get a random on of these words
	random_poem_word = choice(array_of_words)
	# add it to the poem string
	poem += " " + random_poem_word

	# add a space after it

	print poem