Skip to content

Instantly share code, notes, and snippets.

View WalkerHarrison's full-sized avatar

Walker Harrison WalkerHarrison

View GitHub Profile
import json
import os
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
files= os.listdir('Searches')
del files[0]
hours = [datetime.datetime.strptime(i, '%Y-%m-%d %H:%M:%S').hour for i in dates]
n, bins, patches = plt.hist(hours, 24, facecolor='blue', alpha=0.75)
plt.xticks([0,6,12,18], ['12 AM','6 AM', '12 PM', '6 PM'], fontsize=18)
plt.xlabel('Hour', fontsize=24)
plt.ylabel('Frequency', fontsize=24)
plt.gcf().set_size_inches(18.5, 10.5, forward=True)
plt.show()
combo = ' '.join(searches)
freqs = Counter(combo.split())
top = freqs.most_common(40)
words = []
counts = []
for i in range(40):
words.append(top[i][0])
counts.append(top[i][1])
d = {"search": searches,
"time": dates}
googled = pd.DataFrame(d)
dt = datetime.datetime(2014, 10, 1)
end = datetime.datetime(2017, 3, 5)
step = datetime.timedelta(days=7)
weekly = []
library(XML)
library(ggplot2)
library(ggmap)
library(RSocrata)
url_trees <- 'https://data.cityofnewyork.us/resource/nwxe-4ae8.csv'
url_zips <- 'http://zipatlas.com/us/ny/brooklyn/zip-code-comparison/median-household-income.htm'
trees <-read.socrata(url_trees)
trees <- subset(trees, boroname == 'Brooklyn')
extras <- do.call(data.frame, aggregate(tree_dbh ~ zipcode, trees,
FUN=function(x) c(mn = mean(x), count = length(x))))
neighborhoods <- merge(neighborhoods, extras, by="zipcode")
neighborhoods$trees_per_capita <- neighborhoods$tree_dbh.count/neighborhoods$population
ggplot(neighborhoods,
aes(x=income, y=tree_dbh.mn, size = trees_per_capita, label=neighborhoods$zipcode)) +
geom_point(color="green") + geom_smooth(method='lm',formula=y~x, show.legend =F) +
geom_text(size=4, nudge_x = 1300, nudge_y = c(0.1, -0.1, -0.1)) +
scale_size_continuous(range=c(0,10)) +
zoom <- subset(trees, zipcode %in% c(11239, 11206, 11212, 11224, 11221,
11201, 11215, 11217, 11231, 11234))
zoom$hood <- as.factor(ifelse(zoom$zipcode %in% c(11201, 11215, 11217, 11231, 11234), 1,0))
map <- get_map(location = c(lon = -73.95, lat = 40.64), zoom = 12,
maptype = "satellite", source = "google")
ggmap(map) + geom_point(data=zoom, aes(x = longitude, y = latitude, col = hood),
size = 0.5, shape = 16, alpha = 0.1, show.legend = F)
zoom$cherry <- grepl("cherry", zoom$spc_common)
zoom$dead <-zoom$status == "Dead"
table(zoom$hood, zoom$dead)
table(zoom$hood, zoom$cherry)
table(zoom$hood, zoom$brch_shoe)
library(ggplot2)
set.seed(1)
x <- 1:100
y <- x^2*sin(2*pi*x/100) + 500*rnorm(length(x))
df <- data.frame(x, y)
h <- 12
smoother <- data.frame(ksmooth(x, y, "normal", bandwidth = h, n.points = 100))
library(pracma)
scale <- abs((erfinv(-0.5)*(2^0.5)*4/h)^-1)
wt <- dnorm(x-50, 0, scale)
ggplot(df, aes(x, y, col = wt)) +
geom_point(size = pmax(100*wt, 1)) +
geom_line(data = smoother, aes(x, y), col = "black") +
geom_point(data = smoother[x==50,], aes(x, y), size = 3,
col = "black", shape = 21, fill = "white") +