Walker Harrison WalkerHarrison

Grad student in Duke Statistical Science department. Data scientist (in training).

WalkerHarrison / google_searches_1.py

Last active April 7, 2017 20:18

	import json
	import os
	import datetime
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from collections import Counter

	files= os.listdir('Searches')
	del files[0]

WalkerHarrison / google_searches_2.py

Created March 9, 2017 15:40

	hours = [datetime.datetime.strptime(i, '%Y-%m-%d %H:%M:%S').hour for i in dates]
	n, bins, patches = plt.hist(hours, 24, facecolor='blue', alpha=0.75)
	plt.xticks([0,6,12,18], ['12 AM','6 AM', '12 PM', '6 PM'], fontsize=18)
	plt.xlabel('Hour', fontsize=24)
	plt.ylabel('Frequency', fontsize=24)
	plt.gcf().set_size_inches(18.5, 10.5, forward=True)
	plt.show()

WalkerHarrison / google_searches_3.py

Last active April 7, 2017 20:19

WalkerHarrison / google_searches_4.py

Last active April 7, 2017 20:20

	d = {"search": searches,
	"time": dates}
	googled = pd.DataFrame(d)

	dt = datetime.datetime(2014, 10, 1)
	end = datetime.datetime(2017, 3, 5)
	step = datetime.timedelta(days=7)

	weekly = []

WalkerHarrison / trees_1.R

Last active April 24, 2017 06:55

	library(XML)
	library(ggplot2)
	library(ggmap)
	library(RSocrata)

	url_trees <- 'https://data.cityofnewyork.us/resource/nwxe-4ae8.csv'
	url_zips <- 'http://zipatlas.com/us/ny/brooklyn/zip-code-comparison/median-household-income.htm'

	trees <-read.socrata(url_trees)
	trees <- subset(trees, boroname == 'Brooklyn')

WalkerHarrison / trees_2.R

Last active April 24, 2017 07:07

	extras <- do.call(data.frame, aggregate(tree_dbh ~ zipcode, trees,
	FUN=function(x) c(mn = mean(x), count = length(x))))
	neighborhoods <- merge(neighborhoods, extras, by="zipcode")
	neighborhoods$trees_per_capita <- neighborhoods$tree_dbh.count/neighborhoods$population

	ggplot(neighborhoods,
	aes(x=income, y=tree_dbh.mn, size = trees_per_capita, label=neighborhoods$zipcode)) +
	geom_point(color="green") + geom_smooth(method='lm',formula=y~x, show.legend =F) +
	geom_text(size=4, nudge_x = 1300, nudge_y = c(0.1, -0.1, -0.1)) +
	scale_size_continuous(range=c(0,10)) +

WalkerHarrison / trees_3.R

Created April 24, 2017 07:08

	zoom <- subset(trees, zipcode %in% c(11239, 11206, 11212, 11224, 11221,
	11201, 11215, 11217, 11231, 11234))
	zoom$hood <- as.factor(ifelse(zoom$zipcode %in% c(11201, 11215, 11217, 11231, 11234), 1,0))

	map <- get_map(location = c(lon = -73.95, lat = 40.64), zoom = 12,
	maptype = "satellite", source = "google")
	ggmap(map) + geom_point(data=zoom, aes(x = longitude, y = latitude, col = hood),
	size = 0.5, shape = 16, alpha = 0.1, show.legend = F)

WalkerHarrison / trees_4.R

Created April 24, 2017 07:26

	zoom$cherry <- grepl("cherry", zoom$spc_common)
	zoom$dead <-zoom$status == "Dead"
	table(zoom$hood, zoom$dead)
	table(zoom$hood, zoom$cherry)
	table(zoom$hood, zoom$brch_shoe)

WalkerHarrison / kernel_viz_1.R

Last active July 27, 2017 16:34

	library(ggplot2)
	set.seed(1)

	x <- 1:100
	y <- x^2sin(2pix/100) + 500rnorm(length(x))

	df <- data.frame(x, y)
	h <- 12
	smoother <- data.frame(ksmooth(x, y, "normal", bandwidth = h, n.points = 100))

WalkerHarrison / kernel_viz_2.R

Last active July 27, 2017 16:40

	library(pracma)

	scale <- abs((erfinv(-0.5)(2^0.5)4/h)^-1)
	wt <- dnorm(x-50, 0, scale)

	ggplot(df, aes(x, y, col = wt)) +
	geom_point(size = pmax(100*wt, 1)) +
	geom_line(data = smoother, aes(x, y), col = "black") +
	geom_point(data = smoother[x==50,], aes(x, y), size = 3,
	col = "black", shape = 21, fill = "white") +