Created
April 9, 2014 01:42
-
-
Save inkhorn/10217838 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(plyr) | |
library(ggplot2) | |
library(ggmap) | |
libraries = read.csv("ontario_library_stats_2010.csv") | |
libraries$isFN = ifelse(libraries$Library.Service.Type == "First Nations Library",1,0) | |
# Here we create the 'proportionate' versions of all the variables | |
libraries[,143:265] = sapply(libraries[,20:142], function (x) x/libraries[,13]) | |
names(libraries)[143:265] = paste(names(libraries)[20:142], "P",sep=".") | |
libraries$cardholders.per.resident = libraries$X..of.Active.Library.Cardholders / libraries$Population..Resident. | |
libraries[,269:391] = sapply(libraries[,143:265], function (x) (x-min(x, na.rm=TRUE))/(max(x,na.rm=TRUE) - min(x,na.rm=TRUE))) | |
# here we gather a list of proportionate revenue vs expenditure data, | |
# figure out which columns show a significant group difference between | |
# Other libraries and First Nations Libraries, and then plot the medians | |
# from each group! | |
results.rev = c() | |
for (i in 23:50 + 123) { | |
results.rev = rbind(results.rev, data.frame(variable = names(libraries)[i], | |
position = i, | |
pval = kruskal.test(libraries[,i] ~ libraries$isFN)$p.value, | |
med.FN = median(libraries[which(libraries$isFN == 1),i+126], na.rm=TRUE), | |
med.OTHER = median(libraries[which(libraries$isFN == 0),i+126], na.rm=TRUE))) | |
} | |
results.rev = subset(results.rev, !(med.FN == 0 & med.OTHER == 0) & pval < .05) | |
ggplot(results.rev) + geom_point(aes(x=variable, y=med.FN), colour="red",size=6, alpha=.5) + geom_point(aes(x=variable, y=med.OTHER), colour="black",size=6, alpha=.5) + coord_flip() + scale_y_continuous(name="Normed Per-Resident Value \n(x-min(x)/max(x)-min(x))", limits=c(0,.25)) + scale_x_discrete(name="") + theme(axis.text.x=element_text(size=14, colour="black"), axis.text.y=element_text(size=14, colour="black"), axis.title.x=element_text(size=17), plot.title=element_text(size=17,face="bold")) + ggtitle("Costs and Revenues by Library Type\n(Red = First Nations, Black = Other)") | |
# Now for a whole whack of graphing! | |
ggplot(libraries, aes(factor(isFN, labels=c("Other Libraries", "First Nations Libraries")), cardholders.per.resident)) + geom_violin(fill="darkgreen") + scale_x_discrete(name="Library Type") + ggtitle("Cardholders per Resident Population by Library Type") | |
cardholders.by.FN = ddply(libraries, .(isFN.Library = factor(isFN, labels=c("Other Libraries", "First Nations Libraries"))), function (x) round(quantile(x$cardholders.per.resident, na.rm=TRUE),2)) | |
libraries$rev.minus.cost = libraries$Total.Operating.Revenues.P - libraries$Total.Operating.Expenditures.P | |
ggplot(libraries, aes(factor(isFN, labels=c("Other Libraries", "First Nations Libraries")), rev.minus.cost)) + geom_violin(fill="darkgreen") + scale_x_discrete(name="Library Type") + scale_y_continuous(name="Net Profit per Resident Population", limits=c(-50,50) ) + ggtitle("Net Profit per Resident Population by Library Type\n(Zoomed in)") | |
netprofit.by.FN = ddply(libraries, .(isFN.Library = factor(isFN, labels=c("Other Libraries", "First Nations Libraries"))), function (x) round(quantile(x$rev.minus.cost, na.rm=TRUE),2)) | |
ggplot(libraries, aes(x=factor(isFN, labels=c("Other Libraries", "First Nations Libraries")), y=libraries[,13])) + geom_violin(fill="darkgreen") + scale_x_discrete(name="Library Type") + scale_y_log10(name="Resident Population Size\n(log scaled)", breaks=c(25,50,100,250,500,1000,2500,5000,10000,25000,50000,100000,250000,500000, 1000000,2500000), labels=comma) + ggtitle("Distribution of Local Population Sizes by Library Type") | |
pop.by.FN = ddply(libraries, .(isFN.Library = factor(isFN, labels=c("Other Libraries", "First Nations Libraries"))), function (x) quantile(x$Population..Resident., na.rm=TRUE)) | |
ggplot(libraries, aes(x=factor(isFN, labels=c("Other Libraries", "First Nations Libraries")), y=libraries[,66])) + geom_violin(fill="darkgreen") + scale_x_discrete(name="Library Type") + scale_y_log10(name="# English Titles in Circulation\n(log scaled)", breaks=c(25,50,100,250,500,1000,2500,5000,10000,25000,50000,100000,165000,250000,500000), labels=comma) + ggtitle("Distribution of English Titles in Circulation by Library Type") | |
raw.eng.titles.by.FN = ddply(libraries, .(isFN.Library = factor(isFN, labels=c("Other Libraries", "First Nations Libraries"))), function (x) quantile(x$Titles.Held..Circulating...English.Language., na.rm=TRUE)) | |
ggplot(libraries, aes(x=factor(isFN, labels=c("Other Libraries", "First Nations Libraries")), y=libraries[,189])) + geom_violin(fill="darkgreen") + scale_x_discrete(name="Library Type") + scale_y_continuous(name="# English Titles in Circulation / Local Pop. Size") + ggtitle("Distribution of English Titles in Circulation per Resident by Library Type") | |
eng.titles.by.FN = ddply(libraries, .(isFN.Library = factor(isFN, labels=c("Other Libraries", "First Nations Libraries"))), function (x) quantile(x$Titles.Held..Circulating...English.Language..P, na.rm=TRUE)) | |
ggplot(libraries, aes(x=factor(isFN, labels=c("Other Libraries", "First Nations Libraries")), y=libraries[,252])) + geom_violin(fill="darkgreen") + scale_x_discrete(name="Library Type") + scale_y_continuous(name="Annual Program Attendance / Local Pop. Size") + ggtitle("Distribution of Annual Program Attendance \nper Resident by Library Type") | |
prog.attendance.by.FN = ddply(libraries, .(isFN.Library = factor(isFN, labels=c("Other Libraries", "First Nations Libraries"))), function (x) quantile(x$Annual.program.attendance.P, na.rm=TRUE)) | |
# Now for the mapping! | |
# zipcodeset taken from geocoder.ca | |
postals = read.csv("zipcodeset.txt", header=FALSE) | |
names(postals) = c("Postal", "Lat","Lon","City", "Prov") | |
libraries = merge(libraries, postals[,1:3], by.x="Postal.Code", by.y="Postal", all.x=TRUE) | |
libraries.fn = subset(libraries, isFN == 1) | |
ontario = qmap("ontario", zoom=5) | |
ontario + geom_point(aes(x=Lon, y=Lat, colour=rev.minus.cost), data=libraries.fn, alpha=.7, size=6) + scale_colour_continuous(low="red", high="green", name="Net Profit per \nResident Population") + ggtitle("Net Profit per Local Resident\nAmongst First Nations Libraries") + theme(plot.title=element_text(size=20,face="bold")) | |
ontario + geom_point(aes(x=Lon, y=Lat, colour=rev.minus.cost), data=subset(libraries.fn, rev.minus.cost < 0), alpha=.7, size=6) + scale_colour_continuous(low="darkred", high="pink", name="Net Profit per \nResident Population") + ggtitle("Net Profit per Local Resident\nAmongst First Nations Libraries\n(Only those with a net loss)") + theme(plot.title=element_text(size=20,face="bold")) | |
ontario + geom_point(aes(x=Lon, y=Lat, colour=Total.Operating.Revenues/Total.Operating.Expenditures), data=libraries.fn, alpha=.7, size=6) + scale_colour_continuous(low="red", high="green", name="Operating Revenue \nto Cost Ratio") + ggtitle("Operating Revenue to Cost Ratio\nAmongst First Nations Libraries") + theme(plot.title=element_text(size=20,face="bold")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment