Created
March 7, 2012 20:33
-
-
Save JoFrhwld/1995931 to your computer and use it in GitHub Desktop.
Mapping Philadelphia Highschools
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rgdal) #This may be a pain to get installed. The OGR functions come from it. | |
## install gdal, available here: http://www.gdal.org/ | |
## install proj.4, available here: http://trac.osgeo.org/proj/ | |
## For some reason, R wasn't expecting for libproj to be installed where it was, | |
## so I had to tell it to look at /usr/local/bin | |
library(ggplot2) | |
library(reshape2) | |
library(plyr) | |
gpclibPermit() | |
## http://www.opendataphilly.org/opendata/resource/38/schools/ | |
setwd("~/PhiladelphiaSchools201201/Philadelphia Schools/") | |
#ogrListLayers(dsn = ".") | |
# "PhiladelphiaSchools201201" | |
schools.points <- readOGR(dsn = ".", layer = "PhiladelphiaSchools201201") | |
schools.points <- spTransform(schools.points, CRS("+proj=longlat")) | |
schools.df <- cbind(schools.points@coords, schools.points@data) | |
colnames(schools.df)[1:2] <- c("long","lat") | |
## This crazy subset was all about making my life easier to merge the data using string matching. | |
hschools.df <- subset(schools.df, | |
(grepl("-12", GRADE_ORG)|GRADE_LEVE == "High School") & | |
FACIL_TYPE == "School" & | |
ACTIVE == "y" & | |
INSTIT_TYP %in% c("District", "Charter")) | |
## http://www.pde.state.pa.us/portal/server.pt/community/graduates/7426 | |
grad <- read.delim("~/graduation.txt") | |
## I had to create and modify these grep strings by hand, | |
## because there was no shared ID between the two data sets. | |
data.frame( | |
search = c("Palumbo", "ARISE", | |
"Bartram.*John", "Bodine", "Bok", | |
"Carroll", "Carver", "Central", "Architecture", | |
"Communications Tech", "Community Academy", | |
"Constitution", "Creative", "Delaware", | |
"Dobbins", "Douglas", "Edison", | |
"Esperanza", "Fels", "FitzSimons", | |
"Frankford", "Benjamin", "Franklin.*L", "Towne", | |
"Freire", "Furness", "Girard Academic", "Germantown", "Girls", | |
"Gratz", "Hope", "Imhotep", "Kensington C[^u]", | |
"Kensington Cu", "Kensington I", | |
"Martin Luther", "Lamberton", "Lankenau", | |
"Lincoln", "Bracetti", "Maritime", | |
"Math, Science.*Community", "Mastbaum", "Masterman", | |
"Mastery Charter", "Thomas.*Mastery", | |
"Civics", "Motivation", "Multi-Cultural", | |
"New Media", "Northeast", "Olney", "Olney", | |
"Overbrook", "Parkway Northwest", "Parkway West", "Parkway.*Center City", | |
"Robeson", "Penn.*William", "Philadelphia Academy", | |
"Philadelphia Electrical", "Phila.*Business", | |
"Learning.*N", "Learning.*S", | |
"Military.*Elverson", "Military.*Leeds", | |
"Prep.*Math.*Science", "Randolph", | |
"Rhodes", "Roxborough", "Saul", | |
"Sayre.*W", "Future", "Science.*Leader", | |
"South Phila", "Strawberry Mansion", "Swenson", | |
"University City", "Vaux", "Washington.*G", | |
"West Philadelphia", "Widener", "World Communications", | |
"YouthBuild"), | |
School = levels(grad$School))->school.search | |
## Sanity checking the string matching. | |
found.list <- dlply(school.search, .(search, School), | |
function(df1, df2){ | |
out <- df2[grep(df1$search, df2$FACIL_NAME),] | |
return(out) | |
}, | |
df2 = hschools.df) | |
found.n <- ldply(found.list, nrow) | |
## The for real merged data. | |
found.df <- ddply(school.search, .(search, School), | |
function(df1, df2){ | |
out <- df2[grep(df1$search, df2$FACIL_NAME),] | |
return(out) | |
}, | |
df2 = hschools.df) | |
found.df <- join(found.df, grad) | |
## Load zip code shapefile | |
## http://www.opendataphilly.org/opendata/resource/44/zip-codes/ | |
setwd("~/phila-city_limits_shp/") | |
city_limits.shp <- readOGR(dsn = ".", layer = "city_limits") | |
city_limits.shp <- spTransform(city_limits.shp, CRS("+proj=longlat")) | |
city_limits.shp@data$id <- row.names(city_limits.shp@data) | |
city_limits.df <- fortify(city_limits.shp, region = "id") | |
## Load city limits shapefile | |
## http://www.opendataphilly.org/opendata/resource/8/city-limits/ | |
setwd("~/phila-zipcodes_shp/") | |
zips.shp <- readOGR(dsn = ".", layer = "zipcodes") | |
zips.shp <- spTransform(zips.shp, CRS("+proj=longlat")) | |
zips.shp@data$id <- row.names(zips.shp@data) | |
zips.df <- fortify(zips.shp, region = "id") | |
zips.df <- join(zips.df, zips.shp@data) | |
## Generate map of all data. | |
ggplot(found.df, aes(long, lat))+ | |
geom_polygon(data = zips.df, aes(group = group), fill = NA, color = "grey70", size = 0.2)+ geom_polygon(data = city_limits.df, aes(group = group), fill = NA, color = "black")+ | |
geom_point(aes(size = Graduates, fill = Postsecondary/Graduates, shape = INSTIT_TYP))+ | |
scale_shape_manual(name = "School Type",values = c(24, 21))+ | |
scale_fill_gradient2(high = "darkred",low = "darkblue", mid = "grey90", midpoint = 0.5, name = "Postsecondary")+ | |
guides(fill = "colorbar")+ | |
scale_area()+ | |
theme_bw()+ | |
coord_map()+ | |
opts(panel.grid.major = theme_blank()) | |
## Map of the higher mode of data | |
ggplot(subset(found.df, Postsecondary/Graduates > 0.05), aes(long, lat))+ | |
geom_polygon(data = zips.df, aes(group = group), fill = NA, color = "grey70", size = 0.2)+ geom_polygon(data = city_limits.df, aes(group = group), fill = NA, color = "black")+ | |
geom_point(aes(size = Graduates, fill = Postsecondary/Graduates, shape = INSTIT_TYP))+ | |
scale_shape_manual(name = "School Type",values = c(24, 21))+ | |
scale_fill_gradient2(high = "darkred",low = "darkblue", mid = "grey90", midpoint = 0.75, name = "Postsecondary", limits = c(0.5,1))+ | |
guides(fill = "colorbar")+ | |
scale_area()+ | |
theme_bw()+ | |
coord_map()+ | |
opts(panel.grid.major = theme_blank()) | |
## Dotplot | |
ggplot(found.df, aes(Postsecondary/Graduates)) + | |
geom_density(aes(y = ..count..)) + | |
geom_dotplot() + | |
facet_wrap(~INSTIT_TYP)+ | |
theme_bw()+ | |
scale_y_continuous(name = "", breaks = NULL) | |
## Dotplot, excluding Postsecondary == 0 data | |
ggplot(subset(found.df, Postsecondary > 0), aes(Postsecondary/Graduates)) + | |
geom_density(aes(y = ..count..)) + | |
geom_dotplot() + | |
facet_wrap(~INSTIT_TYP)+ | |
theme_bw()+ | |
scale_y_continuous(name = "", breaks = NULL) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment