Walker Harrison WalkerHarrison

Grad student in Duke Statistical Science department. Data scientist (in training).

WalkerHarrison / trees_2.R

Last active April 24, 2017 07:07

	extras <- do.call(data.frame, aggregate(tree_dbh ~ zipcode, trees,
	FUN=function(x) c(mn = mean(x), count = length(x))))
	neighborhoods <- merge(neighborhoods, extras, by="zipcode")
	neighborhoods$trees_per_capita <- neighborhoods$tree_dbh.count/neighborhoods$population

	ggplot(neighborhoods,
	aes(x=income, y=tree_dbh.mn, size = trees_per_capita, label=neighborhoods$zipcode)) +
	geom_point(color="green") + geom_smooth(method='lm',formula=y~x, show.legend =F) +
	geom_text(size=4, nudge_x = 1300, nudge_y = c(0.1, -0.1, -0.1)) +
	scale_size_continuous(range=c(0,10)) +

WalkerHarrison / trees_1.R

Last active April 24, 2017 06:55

	library(XML)
	library(ggplot2)
	library(ggmap)
	library(RSocrata)

	url_trees <- 'https://data.cityofnewyork.us/resource/nwxe-4ae8.csv'
	url_zips <- 'http://zipatlas.com/us/ny/brooklyn/zip-code-comparison/median-household-income.htm'

	trees <-read.socrata(url_trees)
	trees <- subset(trees, boroname == 'Brooklyn')

WalkerHarrison / google_searches_4.py

Last active April 7, 2017 20:20

	d = {"search": searches,
	"time": dates}
	googled = pd.DataFrame(d)

	dt = datetime.datetime(2014, 10, 1)
	end = datetime.datetime(2017, 3, 5)
	step = datetime.timedelta(days=7)

	weekly = []

WalkerHarrison / google_searches_3.py

Last active April 7, 2017 20:19

WalkerHarrison / google_searches_2.py

Created March 9, 2017 15:40

	hours = [datetime.datetime.strptime(i, '%Y-%m-%d %H:%M:%S').hour for i in dates]
	n, bins, patches = plt.hist(hours, 24, facecolor='blue', alpha=0.75)
	plt.xticks([0,6,12,18], ['12 AM','6 AM', '12 PM', '6 PM'], fontsize=18)
	plt.xlabel('Hour', fontsize=24)
	plt.ylabel('Frequency', fontsize=24)
	plt.gcf().set_size_inches(18.5, 10.5, forward=True)
	plt.show()

WalkerHarrison / google_searches_1.py

Last active April 7, 2017 20:18

	import json
	import os
	import datetime
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from collections import Counter

	files= os.listdir('Searches')
	del files[0]