Skip to content

Instantly share code, notes, and snippets.

#get the speech which we will try to identify
secret_speech = open('obama_speech_testing.txt', 'r').read()
secret_speech = secret_speech.replace('\n','').replace(' ',' ').lower()
list_of_phrases_secret = secret_speech.split(' ')
#get zipped group of every 3 consecutive words
secret_tups = [str(item) for item in zip(list_of_phrases_secret[:-2], list_of_phrases_secret[1:-1], list_of_phrases_secret[2:])]
#get TF dictionary for the secret speech
d_secret = get_phrase_freq_dict(secret_tups)
"""
get inverse document frequency (IDF) for a given phrase
"""
def get_idf(phrase, d1, d2):
tot_docs_containing = 0
#for each TF dictionary...
for d in [d1,d2]:
#if the given phrase is in the dictionary, increment the number of docs containing this phrase
if phrase in d.keys():
tot_docs_containing += 1
#for bush and obama, zip up every consecutive series of 3 words
bush_tups = [str(item) for item in zip(bush_speech_list[:-2], bush_speech_list[1:-1], bush_speech_list[2:])]
obama_tups = [str(item) for item in zip(obama_speech_list[:-2], obama_speech_list[1:-1], obama_speech_list[2:])]
"""
get term frequency (TF) dictionary for a given speech
"""
def get_phrase_freq_dict(list_of_phrases):
d = {}
tot_phrases = float(len(list_of_phrases))
#needed for list operations
import numpy as np
#read Bush and Obama speeches in
bush_speech = open('bush_speech.txt', 'r').read()
obama_speech = open('obama_speech.txt', 'r').read()
#clean up speeches: remove newlines, extra spaces, and cast all to lowercase
bush_speech = bush_speech.replace('\n', ' ')
obama_speech = obama_speech.replace('\n', ' ')
#initialize dataframe
df_temp = pd.DataFrame()
#populate date and average temperature fields (cast string date to datetime and convert temperature from tenths of Celsius to Fahrenheit)
df_temp['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_temp]
df_temp['avgTemp'] = [float(v)/10.0*1.8 + 32 for v in temps]
#initialize lists to store data
dates_temp = []
dates_prcp = []
temps = []
prcp = []
#for each year from 2015-2019 ...
for year in range(2015, 2020):
year = str(year)
print('working on year '+year)
#needed to make web requests
import requests
#store the data we get as a dataframe
import pandas as pd
#convert the response as a strcuctured json
import json
#mathematical operations on lists
#initialize the state neighbors dictionary
neighbors = {}
#for each state ...
for k1,v1 in statePolygons.items():
neighbors[k1] = []
#iterate over each other state
for k2,v2 in statePolygons.items():
#if the states touch, then add this state to the list of neighboring states
if v1.touches(v2):
#manipulate complex shapes
from shapely.geometry import Polygon, MultiPolygon
#manipulate json objects
import json
#open up the US States Geojson
with open('us_states.geojson') as f:
states = json.load(f)
#make maps in python
import folium
from folium import plugins
#we need an explicit string zipcode column in our input data
joinedDf['zipcode'] = [str(z) for z in joinedDf.index]
#initialize map of LA County
laMap = folium.Map(location=[34.0522,-118.2437], tiles='Stamen Toner', zoom_start=9)