Skip to content

Instantly share code, notes, and snippets.

@libswan
Last active December 16, 2015 11:09
Show Gist options
  • Save libswan/5425441 to your computer and use it in GitHub Desktop.
Save libswan/5425441 to your computer and use it in GitHub Desktop.
L15 Problem 5
import pylab
"""Rows 4 to 13 copied from l15-3.py - available in the lecture."""
#set line width
pylab.rcParams['lines.linewidth'] = 6
#set font size for titles
pylab.rcParams['axes.titlesize'] = 20
#set font size for labels on axes
pylab.rcParams['axes.labelsize'] = 20
#set size of numbers on x-axis
pylab.rcParams['xtick.major.size'] = 5
#set size of numbers on y-axis
pylab.rcParams['ytick.major.size'] = 5
WORDLIST_FILENAME = "words.txt"
def loadWords():
"""
Provided in Problem 5.
Returns a list of valid words. Words are strings of lowercase letters.
Depending on the size of the word list, this function may
take a while to finish.
"""
print "Loading word list from file..."
# inFile: file
inFile = open(WORDLIST_FILENAME, 'r', 0)
# wordList: list of strings
wordList = []
for line in inFile:
wordList.append(line.strip().lower())
print " ", len(wordList), "words loaded."
return wordList
def propL(wordList):
"""
For each word, calculate the number of vowels to get each word's
"proportion" of vowels to non-vowels.
Create a list of these "proportions".
"""
propList = []
vowelList = ['a', 'e', 'i', 'o', 'u']
for i in wordList:
lengthOfWord = len(i)
sumOfVowels = 0
for j in i:
if j in vowelList:
sumOfVowels += 1
proportion = sumOfVowels/float(lengthOfWord)
propList.append(proportion)
return propList
def stdDev(propList):
"""
Calculate the standard deviation of propList (all the proportions calculated).
"""
mean = sum(propList)/float(len(wordList))
tot = 0.0
for x in propList:
tot += (x - mean)**2
sd = round(((tot/len(wordList))**0.5),3)
mean = round(sum(propList)/float(len(wordList)),3)
return (mean, sd)
def labelPlot(numOfWords, mean, sd):
"""
Establish the labels/text I'm going to use to give context to my plot.
Copied and edited from l15-3.py - available in the lecture.
"""
pylab.title('Propoportion of ' + str(numOfWords) + ' varying length words with vowels')
pylab.xlabel('Proportion of Vowels')
pylab.ylabel('Number of Words')
xmin, xmax = pylab.xlim()
ymin, ymax = pylab.ylim()
pylab.text(xmin + (xmax-xmin)*0.02, (ymax-ymin)/2, #Position of text
'Mean = ' + str(mean)
+ '\nSD = ' + str(sd))
def plotVowelProportionHistogram(wordList, numBins=15):
"""
Plots a histogram of the proportion of vowels in each word in wordList
using the specified number of bins in numBins
Vowels to non-vowels, by word, in word list.
"""
vals = []
propList1 = propL(wordList)
mean1, sd1 = stdDev(propList1)
for i in propList1:
vals.append(i)
pylab.hist(vals, bins = numBins)
xmin,xmax = pylab.xlim()
ymin,ymax = pylab.ylim()
lengthOfWL = len(wordList)
labelPlot(lengthOfWL, mean1, sd1)
pylab.figure()
if __name__ == '__main__':
wordList = loadWords()
plotVowelProportionHistogram(wordList)
pylab.show() #I had to add this for my plot to show.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment