Last active
May 31, 2016 21:41
-
-
Save RandomCriticalAnalysis/69ca6628df5ccae25fe25fce8d71450c to your computer and use it in GitHub Desktop.
for Anatoly (relevant code for education analysis)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# test scores and most covariates taken from | |
# https://cepa.stanford.edu/seda/download?nid=1727&destination=node/1717 | |
# parents average years of education computed using data from census ACS (via dept of edu EDGE service) | |
# http://nces.ed.gov/programs/edge/demographicACS.aspx | |
pm = read.csv("cepa_pooled_means.csv",stringsAsFactors = F) | |
cve = read.csv("cepa_covariates_from_excel.csv",stringsAsFactors = F) | |
glarge = merge(cve,pm,by.x='leaid',by.y='nces_district_id') | |
ed = read.csv("nces_parent_ed_levels.csv",stringsAsFactors=F) | |
ed$nces_district_id = as.integer(substr(ed$GeoId,8,20)) | |
ed$avg_years_education = (ed$ed_less_than_9th_grade/100 * 8) + (ed$ed_9th_12th_grade/100 * 10) + (ed$ed_hs_grad /100 * 12) + (ed$ed_some_college/100 * 13) + | |
(ed$ed_assoc_degree/100 * 14) + (ed$ed_bach_degree /100 * 16) + (ed$ed_grad_prof_degree/100 * 20) | |
glarge2 = merge(glarge,ed,by.x='leaid',by.y='nces_district_id') | |
# copying variables for ease of typing (shorter) | |
glarge2$ba_all = glarge2$X..of.adults.with.ba...all. | |
glarge2$ba_wht = glarge2$X..of.adults.with.ba...wht. | |
glarge2$ba_blk = glarge2$X..of.adults.with.ba...blk. | |
glarge2$ba_hsp = glarge2$X..of.adults.with.ba...hsp. | |
glarge2$pct_wht = glarge2$percent.whites.in.the.district | |
glarge2$pct_blk = glarge2$percent.blacks.in.the.district | |
glarge2$pct_hsp = glarge2$percent.hispanics.in.the.district | |
glarge2$pct_asn = glarge2$percent.asians.in.the.district | |
glarge2$percent_na = glarge2$percent.native.americans.in.the.district | |
glarge2$spop = glarge2$Total.Enrollment..Grades.3.8 | |
glarge2$pct_urm = glarge2$pct_hsp + glarge2$pct_blk + glarge2$percent_na | |
glarge2$score_SIRE_ed = predict(lm(pooled_score ~ pct_wht + pct_blk + pct_hsp + pct_asn+ percent_na + avg_years_education,data=glarge2,na.action=na.exclude,weights=glarge2$spop)) | |
ctd = subset(glarge2, LEA.Name %in% c('DARIEN SCHOOL DISTRICT','BRIDGEPORT SCHOOL DISTRICT','GREENWICH SCHOOL DISTRICT') & State.Abbreviation == 'CT') | |
# compute correlation coefficients | |
wtd.cor(glarge2$score_SIRE_ed,glarge2$pooled_score,glarge2$spop) | |
cor(glarge2$score_SIRE_ed,glarge2$pooled_score) | |
ggplot(glarge2,aes(score_SIRE_ed,pooled_score)) + | |
geom_point(color='blue',alpha=0.2) + geom_smooth() + | |
geom_point(data=ctd,color='green',aes(size=spop)) + | |
geom_label_repel(data=ctd,aes(label=LEA.Name)) + | |
xlab('predicted district test scores\nusing race/ethnicity and estimated average years of education of adults, 25+ years old\n with OLS, linear terms, no interactions, etc') + | |
ylab('actual test scores') + | |
annotate("text",x=-.65,y=1,size=8,label="(weighted) r = .89") + | |
annotate("text",x=-.7,y=.8,size=8,label="(unweighted) r = .83") | |
ct_only = subset(glarge2,State.Abbreviation == 'CT') | |
library(scales) | |
ggplot(ct_only,aes(Total.PP.Expenditures..Tot.Exp.Enrl,pooled_score)) + | |
geom_point(color='blue',alpha=0.4,aes(size=spop)) + geom_smooth() + | |
geom_point(data=ctd,color='green',aes(size=spop)) + | |
geom_label_repel(data=ctd,aes(label=LEA.Name)) + | |
scale_x_continuous(labels=dollar) + | |
xlab('total expenditures per pupil') + | |
ylab('actual test scores') + | |
ggtitle('CT school districts test scores by total expenditures per pupil') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment