ritvikmath’s gists

ritvikmath / get_tf_dictionary_for_secret_speech.py

Created July 19, 2019 15:15

	#get the speech which we will try to identify
	secret_speech = open('obama_speech_testing.txt', 'r').read()
	secret_speech = secret_speech.replace('\n','').replace(' ',' ').lower()
	list_of_phrases_secret = secret_speech.split(' ')

	#get zipped group of every 3 consecutive words
	secret_tups = [str(item) for item in zip(list_of_phrases_secret[:-2], list_of_phrases_secret[1:-1], list_of_phrases_secret[2:])]

	#get TF dictionary for the secret speech
	d_secret = get_phrase_freq_dict(secret_tups)

ritvikmath / get_tf_idf_dictionaries.py

Created July 19, 2019 15:11

	"""
	get inverse document frequency (IDF) for a given phrase
	"""
	def get_idf(phrase, d1, d2):
	tot_docs_containing = 0
	#for each TF dictionary...
	for d in [d1,d2]:
	#if the given phrase is in the dictionary, increment the number of docs containing this phrase
	if phrase in d.keys():
	tot_docs_containing += 1

ritvikmath / get_tf_dictionaries.py

Created July 19, 2019 15:08

	#for bush and obama, zip up every consecutive series of 3 words
	bush_tups = [str(item) for item in zip(bush_speech_list[:-2], bush_speech_list[1:-1], bush_speech_list[2:])]
	obama_tups = [str(item) for item in zip(obama_speech_list[:-2], obama_speech_list[1:-1], obama_speech_list[2:])]

	"""
	get term frequency (TF) dictionary for a given speech
	"""
	def get_phrase_freq_dict(list_of_phrases):
	d = {}
	tot_phrases = float(len(list_of_phrases))

ritvikmath / setup_authorship_attribution.py

Created July 19, 2019 15:05

	#needed for list operations
	import numpy as np

	#read Bush and Obama speeches in
	bush_speech = open('bush_speech.txt', 'r').read()
	obama_speech = open('obama_speech.txt', 'r').read()

	#clean up speeches: remove newlines, extra spaces, and cast all to lowercase
	bush_speech = bush_speech.replace('\n', ' ')
	obama_speech = obama_speech.replace('\n', ' ')

ritvikmath / store_weather_data.py

Last active June 9, 2019 16:16

	#initialize dataframe
	df_temp = pd.DataFrame()

	#populate date and average temperature fields (cast string date to datetime and convert temperature from tenths of Celsius to Fahrenheit)
	df_temp['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_temp]
	df_temp['avgTemp'] = [float(v)/10.0*1.8 + 32 for v in temps]

ritvikmath / get_weather_data.py

Last active April 27, 2020 02:18

	#initialize lists to store data
	dates_temp = []
	dates_prcp = []
	temps = []
	prcp = []

	#for each year from 2015-2019 ...
	for year in range(2015, 2020):
	year = str(year)
	print('working on year '+year)

ritvikmath / weather_imports.py

Last active February 14, 2021 07:03

	#needed to make web requests
	import requests

	#store the data we get as a dataframe
	import pandas as pd

	#convert the response as a strcuctured json
	import json

	#mathematical operations on lists

ritvikmath / get_neighboring_states.py

Created April 28, 2019 23:41

	#initialize the state neighbors dictionary
	neighbors = {}

	#for each state ...
	for k1,v1 in statePolygons.items():
	neighbors[k1] = []
	#iterate over each other state
	for k2,v2 in statePolygons.items():
	#if the states touch, then add this state to the list of neighboring states
	if v1.touches(v2):

ritvikmath / get_state_geographies.py

Last active April 30, 2019 13:26

	#manipulate complex shapes
	from shapely.geometry import Polygon, MultiPolygon

	#manipulate json objects
	import json

	#open up the US States Geojson
	with open('us_states.geojson') as f:
	states = json.load(f)

ritvikmath / create_choropleth_map.py

Last active April 27, 2019 22:00

	#make maps in python
	import folium
	from folium import plugins

	#we need an explicit string zipcode column in our input data
	joinedDf['zipcode'] = [str(z) for z in joinedDf.index]

	#initialize map of LA County
	laMap = folium.Map(location=[34.0522,-118.2437], tiles='Stamen Toner', zoom_start=9)