Skip to content

Instantly share code, notes, and snippets.

@JoFrhwld
Created March 7, 2012 20:33
Show Gist options
  • Save JoFrhwld/1995932 to your computer and use it in GitHub Desktop.
Save JoFrhwld/1995932 to your computer and use it in GitHub Desktop.
Mapping Philadelphia Highschools
library(rgdal) #This may be a pain to get installed. The OGR functions come from it.
## install gdal, available here: http://www.gdal.org/
## install proj.4, available here: http://trac.osgeo.org/proj/
## For some reason, R wasn't expecting for libproj to be installed where it was,
## so I had to tell it to look at /usr/local/bin
library(ggplot2)
library(reshape2)
library(plyr)
gpclibPermit()
## http://www.opendataphilly.org/opendata/resource/38/schools/
setwd("~/PhiladelphiaSchools201201/Philadelphia Schools/")
#ogrListLayers(dsn = ".")
# "PhiladelphiaSchools201201"
schools.points <- readOGR(dsn = ".", layer = "PhiladelphiaSchools201201")
schools.points <- spTransform(schools.points, CRS("+proj=longlat"))
schools.df <- cbind(schools.points@coords, schools.points@data)
colnames(schools.df)[1:2] <- c("long","lat")
## This crazy subset was all about making my life easier to merge the data using string matching.
hschools.df <- subset(schools.df,
(grepl("-12", GRADE_ORG)|GRADE_LEVE == "High School") &
FACIL_TYPE == "School" &
ACTIVE == "y" &
INSTIT_TYP %in% c("District", "Charter"))
## http://www.pde.state.pa.us/portal/server.pt/community/graduates/7426
grad <- read.delim("~/graduation.txt")
## I had to create and modify these grep strings by hand,
## because there was no shared ID between the two data sets.
data.frame(
search = c("Palumbo", "ARISE",
"Bartram.*John", "Bodine", "Bok",
"Carroll", "Carver", "Central", "Architecture",
"Communications Tech", "Community Academy",
"Constitution", "Creative", "Delaware",
"Dobbins", "Douglas", "Edison",
"Esperanza", "Fels", "FitzSimons",
"Frankford", "Benjamin", "Franklin.*L", "Towne",
"Freire", "Furness", "Girard Academic", "Germantown", "Girls",
"Gratz", "Hope", "Imhotep", "Kensington C[^u]",
"Kensington Cu", "Kensington I",
"Martin Luther", "Lamberton", "Lankenau",
"Lincoln", "Bracetti", "Maritime",
"Math, Science.*Community", "Mastbaum", "Masterman",
"Mastery Charter", "Thomas.*Mastery",
"Civics", "Motivation", "Multi-Cultural",
"New Media", "Northeast", "Olney", "Olney",
"Overbrook", "Parkway Northwest", "Parkway West", "Parkway.*Center City",
"Robeson", "Penn.*William", "Philadelphia Academy",
"Philadelphia Electrical", "Phila.*Business",
"Learning.*N", "Learning.*S",
"Military.*Elverson", "Military.*Leeds",
"Prep.*Math.*Science", "Randolph",
"Rhodes", "Roxborough", "Saul",
"Sayre.*W", "Future", "Science.*Leader",
"South Phila", "Strawberry Mansion", "Swenson",
"University City", "Vaux", "Washington.*G",
"West Philadelphia", "Widener", "World Communications",
"YouthBuild"),
School = levels(grad$School))->school.search
## Sanity checking the string matching.
found.list <- dlply(school.search, .(search, School),
function(df1, df2){
out <- df2[grep(df1$search, df2$FACIL_NAME),]
return(out)
},
df2 = hschools.df)
found.n <- ldply(found.list, nrow)
## The for real merged data.
found.df <- ddply(school.search, .(search, School),
function(df1, df2){
out <- df2[grep(df1$search, df2$FACIL_NAME),]
return(out)
},
df2 = hschools.df)
found.df <- join(found.df, grad)
## Load zip code shapefile
## http://www.opendataphilly.org/opendata/resource/44/zip-codes/
setwd("~/phila-city_limits_shp/")
city_limits.shp <- readOGR(dsn = ".", layer = "city_limits")
city_limits.shp <- spTransform(city_limits.shp, CRS("+proj=longlat"))
city_limits.shp@data$id <- row.names(city_limits.shp@data)
city_limits.df <- fortify(city_limits.shp, region = "id")
## Load city limits shapefile
## http://www.opendataphilly.org/opendata/resource/8/city-limits/
setwd("~/phila-zipcodes_shp/")
zips.shp <- readOGR(dsn = ".", layer = "zipcodes")
zips.shp <- spTransform(zips.shp, CRS("+proj=longlat"))
zips.shp@data$id <- row.names(zips.shp@data)
zips.df <- fortify(zips.shp, region = "id")
zips.df <- join(zips.df, zips.shp@data)
## Generate map of all data.
ggplot(found.df, aes(long, lat))+
geom_polygon(data = zips.df, aes(group = group), fill = NA, color = "grey70", size = 0.2)+ geom_polygon(data = city_limits.df, aes(group = group), fill = NA, color = "black")+
geom_point(aes(size = Graduates, fill = Postsecondary/Graduates, shape = INSTIT_TYP))+
scale_shape_manual(name = "School Type",values = c(24, 21))+
scale_fill_gradient2(high = "darkred",low = "darkblue", mid = "grey90", midpoint = 0.5, name = "Postsecondary")+
guides(fill = "colorbar")+
scale_area()+
theme_bw()+
coord_map()+
opts(panel.grid.major = theme_blank())
## Map of the higher mode of data
ggplot(subset(found.df, Postsecondary/Graduates > 0.05), aes(long, lat))+
geom_polygon(data = zips.df, aes(group = group), fill = NA, color = "grey70", size = 0.2)+ geom_polygon(data = city_limits.df, aes(group = group), fill = NA, color = "black")+
geom_point(aes(size = Graduates, fill = Postsecondary/Graduates, shape = INSTIT_TYP))+
scale_shape_manual(name = "School Type",values = c(24, 21))+
scale_fill_gradient2(high = "darkred",low = "darkblue", mid = "grey90", midpoint = 0.75, name = "Postsecondary", limits = c(0.5,1))+
guides(fill = "colorbar")+
scale_area()+
theme_bw()+
coord_map()+
opts(panel.grid.major = theme_blank())
## Dotplot
ggplot(found.df, aes(Postsecondary/Graduates)) +
geom_density(aes(y = ..count..)) +
geom_dotplot() +
facet_wrap(~INSTIT_TYP)+
theme_bw()+
scale_y_continuous(name = "", breaks = NULL)
## Dotplot, excluding Postsecondary == 0 data
ggplot(subset(found.df, Postsecondary > 0), aes(Postsecondary/Graduates)) +
geom_density(aes(y = ..count..)) +
geom_dotplot() +
facet_wrap(~INSTIT_TYP)+
theme_bw()+
scale_y_continuous(name = "", breaks = NULL)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment