Last active
December 16, 2015 11:09
-
-
Save libswan/5425441 to your computer and use it in GitHub Desktop.
L15 Problem 5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pylab | |
"""Rows 4 to 13 copied from l15-3.py - available in the lecture.""" | |
#set line width | |
pylab.rcParams['lines.linewidth'] = 6 | |
#set font size for titles | |
pylab.rcParams['axes.titlesize'] = 20 | |
#set font size for labels on axes | |
pylab.rcParams['axes.labelsize'] = 20 | |
#set size of numbers on x-axis | |
pylab.rcParams['xtick.major.size'] = 5 | |
#set size of numbers on y-axis | |
pylab.rcParams['ytick.major.size'] = 5 | |
WORDLIST_FILENAME = "words.txt" | |
def loadWords(): | |
""" | |
Provided in Problem 5. | |
Returns a list of valid words. Words are strings of lowercase letters. | |
Depending on the size of the word list, this function may | |
take a while to finish. | |
""" | |
print "Loading word list from file..." | |
# inFile: file | |
inFile = open(WORDLIST_FILENAME, 'r', 0) | |
# wordList: list of strings | |
wordList = [] | |
for line in inFile: | |
wordList.append(line.strip().lower()) | |
print " ", len(wordList), "words loaded." | |
return wordList | |
def propL(wordList): | |
""" | |
For each word, calculate the number of vowels to get each word's | |
"proportion" of vowels to non-vowels. | |
Create a list of these "proportions". | |
""" | |
propList = [] | |
vowelList = ['a', 'e', 'i', 'o', 'u'] | |
for i in wordList: | |
lengthOfWord = len(i) | |
sumOfVowels = 0 | |
for j in i: | |
if j in vowelList: | |
sumOfVowels += 1 | |
proportion = sumOfVowels/float(lengthOfWord) | |
propList.append(proportion) | |
return propList | |
def stdDev(propList): | |
""" | |
Calculate the standard deviation of propList (all the proportions calculated). | |
""" | |
mean = sum(propList)/float(len(wordList)) | |
tot = 0.0 | |
for x in propList: | |
tot += (x - mean)**2 | |
sd = round(((tot/len(wordList))**0.5),3) | |
mean = round(sum(propList)/float(len(wordList)),3) | |
return (mean, sd) | |
def labelPlot(numOfWords, mean, sd): | |
""" | |
Establish the labels/text I'm going to use to give context to my plot. | |
Copied and edited from l15-3.py - available in the lecture. | |
""" | |
pylab.title('Propoportion of ' + str(numOfWords) + ' varying length words with vowels') | |
pylab.xlabel('Proportion of Vowels') | |
pylab.ylabel('Number of Words') | |
xmin, xmax = pylab.xlim() | |
ymin, ymax = pylab.ylim() | |
pylab.text(xmin + (xmax-xmin)*0.02, (ymax-ymin)/2, #Position of text | |
'Mean = ' + str(mean) | |
+ '\nSD = ' + str(sd)) | |
def plotVowelProportionHistogram(wordList, numBins=15): | |
""" | |
Plots a histogram of the proportion of vowels in each word in wordList | |
using the specified number of bins in numBins | |
Vowels to non-vowels, by word, in word list. | |
""" | |
vals = [] | |
propList1 = propL(wordList) | |
mean1, sd1 = stdDev(propList1) | |
for i in propList1: | |
vals.append(i) | |
pylab.hist(vals, bins = numBins) | |
xmin,xmax = pylab.xlim() | |
ymin,ymax = pylab.ylim() | |
lengthOfWL = len(wordList) | |
labelPlot(lengthOfWL, mean1, sd1) | |
pylab.figure() | |
if __name__ == '__main__': | |
wordList = loadWords() | |
plotVowelProportionHistogram(wordList) | |
pylab.show() #I had to add this for my plot to show. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment