glickmac’s gists

glickmac / R_tut_2.r

Created July 9, 2014 19:14 — forked from bobthecat/R_tut_2.r

	### R code from vignette source 'Presentation_2.Rnw'
	### Encoding: UTF-8

	###################################################
	### code chunk number 1: init
	###################################################
	options(width=60)


	###################################################

glickmac / dd_webscrapping.py

Last active December 17, 2019 19:44

	import requests
	from bs4 import BeautifulSoup
	import nltk
	nltk.download("stopwords")
	nltk.download('vader_lexicon')

	### Pull
	url = 'http://www.gutenberg.org/files/501/501-0.txt'
	res = requests.get(url)
	html_page = res.content

glickmac / longest_word.py

Last active December 17, 2019 20:06

	from nltk.tokenize import RegexpTokenizer
	tokenizer = RegexpTokenizer(r'\w+')
	tokens = tokenizer.tokenize(text)
	tokens = [i.lower() for i in tokens]

	## Uncomment and remove the ")" to get length of longest word
	print("Longest word in text: " + max(tokens, key=len) )# + " is " + str(len(max(tokens, key=len))) + " characters long")

	## Longest real word
	tokens = [y for y in tokens if y != "cutterigsloop"]

glickmac / Flesch_Kincaid.py

Last active December 17, 2019 20:10

	def Flesch_Kincaid(text):
	sentences = text.split('.')
	avg_sentence_len = sum(len(x.split()) for x in sentences) / len(sentences)

	syllables = sum(list(map(lambda x: 1 if x in ["a","i","e","o","u","y"] else 0,text)))
	word_count = len(text.split(' '))
	mean_syllables_per_word = syllables/float(word_count)

	return (0.39 * avg_sentence_len) + (11.8 * mean_syllables_per_word) - 15.59

glickmac / Text_Processing.py

Last active December 20, 2019 16:20

	def text_processing(input_text):
	tokens = tokenizer.tokenize(input_text)
	lemmatizer = WordNetLemmatizer()
	tokens = [lemmatizer.lemmatize(i) for i in tokens]
	stops = set(stopwords.words('english'))
	values = [i for i in tokens if i not in stops]
	weird = ["wa", "u"]
	values = [i for i in values if i not in weird]
	return(values)
	values = text_processing(text)

glickmac / Animals.py

Last active December 17, 2019 20:14

	text = text.translate(str.maketrans('', '', string.punctuation))
	animals = {"polynesia":"parrot", "gubgub":"pig", "cheechee":"monkey", "tootoo":"owl","pushmipullyu":"two-headed unicorn", "whitey":"mouse", "jip":"dog", "dabdab":"duck", "toggle":"horse", "cheapside":"sparrow"}
	text = ' '.join([animals.get(i, i) for i in text.split()])
	with open("../data/Animal_Names.txt", "r") as f:
	animals = f.readlines()
	animals = [x.split("\n")[0] for x in animals]
	Animals_in_Text = set(animals) & set(tokens)
	print("Doctor Dolittle interacts with " + str(len(Animals_in_Text)) + " different kinds of animals")
	values = [i for i in values if i in Animals_in_Text]
	freq = nltk.FreqDist(values)

glickmac / Wordcloud_Animals.py

Last active December 17, 2019 20:18

	from collections import Counter
	import numpy as np
	from wordcloud import WordCloud, ImageColorGenerator
	from PIL import Image
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_pdf import PdfPages
	## Split into chapters
	Chapters = text.split("chapter")

	count = 1

glickmac / vader_sentiment.py

Last active December 17, 2019 20:20

	def Chapter_Sentiment(Chapter_Text):
	def Average(lst):
	return float(sum(lst)) / len(lst)
	sid = SentimentIntensityAnalyzer()
	sentences = Chapter_Text.split('.')
	sentiment_scores = []
	for item in sentences:
	sentiment = sid.polarity_scores(item)
	sentiment_value = sentiment['compound']
	sentiment_scores.append(sentiment_value)

glickmac / war_words.py

Last active December 17, 2019 20:22

	with open("../data/War_Terms.txt", "r") as f:
	war_terms = f.readlines()
	war_terms = [x.split("\n")[0] for x in war_terms]
	War_Terms_in_Text = set(war_terms) & set(tokens)
	print('Number of war terms used in the story: '+ str(len(War_Terms_in_Text)))
	values = [i for i in tokens if i in War_Terms_in_Text]
	sentences = text.split('.')
	for item in sentences:
	if "death" in item:
	print(item)

glickmac / webscraping.py

Created December 17, 2019 20:05

	url = 'http://www.gutenberg.org/files/501/501-0.txt'
	res = requests.get(url)
	html_page = res.content
	soup = BeautifulSoup(html_page, 'html.parser')
	text = soup.find_all(text=True)
	text = str(text)
	text = text.replace("\n", " ").replace("\r", " ").replace("\\r", " ").replace("\\n", " ").replace("_", "").lower()
	text = text.split("the first chapter")[1].split("illustration: the end")[0]
	with open("../data/Doctor_Dolittle.txt", "w") as f:
	f.write(text)

Cody Glickman glickmac