Skip to content

Instantly share code, notes, and snippets.

@jbryer
Last active August 29, 2015 14:03
Show Gist options
  • Save jbryer/11ad6956dff589ec4f3c to your computer and use it in GitHub Desktop.
Save jbryer/11ad6956dff589ec4f3c to your computer and use it in GitHub Desktop.
require(ipeds)
require(ggplot2)
require(reshape2)
require(scales)
data(surveys)
View(surveys)
# Directory
ipedsHelp('HD', 2012)
schools <- getIPEDSSurvey('HD', 2012)
head(schools)
# Graduates
ipedsHelp('C_A', 2012)
grads <- data.frame()
for(i in seq(2000, 2012)) {
tmp <- getIPEDSSurvey('C_A', i)
if(!'ctotalw' %in% names(tmp)) {
# Somewhere around 2007/2008
tmp$ctotalm <- tmp$crace15
tmp$ctotalw <- tmp$crace16
}
tmp <- tmp[,c('unitid','cipcode','awlevel','ctotalw','ctotalm')]
tmp$Year <- i
grads <- rbind(grads, tmp)
}
# CIP Codes
# Mathematics: http://nces.ed.gov/ipeds/cipcode/cipdetail.aspx?y=55&cipid=88406
# CIS: http://nces.ed.gov/ipeds/cipcode/cipdetail.aspx?y=55&cipid=88073
# NOTE 27.99xx is Mathematics and Statistics Other, leaving in math
cipcode <- ifelse(grads$cipcode < 10,
paste0('0', formatC(grads$cipcode, digits=5, format='fg', flag='#')),
formatC(grads$cipcode, digits=6, format='fg', flag='#'))
grads.math <- grads[substr(grads$cipcode, 1, 5) %in% c('27.01','27.03','27.99'),
c('Year','unitid','cipcode','awlevel','ctotalw','ctotalm')]
grads.stat <- grads[substr(grads$cipcode, 1, 5) %in% c('27.05'),
c('Year','unitid','cipcode','awlevel','ctotalw','ctotalm')]
grads.cis <- grads[substr(grads$cipcode, 1, 2) %in% c('11'),
c('Year','unitid','cipcode','awlevel','ctotalw','ctotalm')]
grads.all <- grads[,c('Year','unitid','cipcode','awlevel','ctotalw','ctotalm')]
names(grads.math) <- names(grads.stat) <- names(grads.cis) <- names(grads.all) <-
c('Year','school','cipcode','level','nfemales','nmales')
grads.math$subject <- 'Math'
grads.stat$subject <- 'Stats'
grads.cis$subject <- 'CIS'
grads.all$subject <- 'All'
grads2 <- rbind(grads.math, grads.stat, grads.cis, grads.all)
grads2 <- grads2[grads2$level == 5,] # Only look at Bachelor's Degree
grads.sum <- cbind(aggregate(grads2$nfemales, by=list(grads2$subject, grads2$Year), FUN=sum),
aggregate(grads2$nmales, by=list(grads2$subject, grads2$Year), FUN=sum)[,3] )
names(grads.sum) <- c('Subject', 'Year', 'nFemales', 'nMales')
grads.sum$Total <- apply(grads.sum[,c('nFemales','nMales')], 1, sum)
grads.sum$Female <- grads.sum$nFemales / grads.sum$Total
grads.sum$Male <- grads.sum$nMales / grads.sum$Total
head(grads.sum)
grads.sum.melt <- melt(grads.sum[,c('Subject','Year','Female','Male')], id=c('Subject','Year'))
grads.tot.melt <- melt(grads.sum[,c('Subject','Year','Total')], id=c('Subject','Year'))
ggplot(grads.tot.melt[grads.tot.melt$Subject != 'All',],
aes(x=factor(Year), y=value, color=Subject, group=Subject)) +
geom_path(stat='identity') +
xlab('Year') + ylab('Number of Graduates')
ggplot(grads.tot.melt[grads.tot.melt$Subject != 'All',],
aes(x=factor(Year), y=value, fill=Subject)) +
geom_bar(stat='identity', position='dodge') +
xlab('Year') + ylab('Number of Graduates') +
ggtitle('Number of Baccalaureate Degrees Awarded by Year')
grads.female <- grads.sum.melt[grads.sum.melt$variable == 'Female',]
ggplot(grads.female,
aes(x=factor(Year), y=value, group=Subject, color=Subject)) +
geom_path(stat='identity', alpha=.5) +
geom_text(data=grads.female[grads.female$Subject != 'Stats',],
aes(label=paste0(prettyNum(value*100, digits=3), '%')), size=4, vjust=-1) +
geom_text(data=grads.female[grads.female$Subject == 'Stats',],
aes(label=paste0(prettyNum(value*100, digits=3), '%')), size=4, vjust=1.1) +
scale_y_continuous(labels=percent, limits=c(0,1)) +
xlab('Year') + ylab('Percent Female Graduates') +
ggtitle(paste0('Percent of Female Baccalaureate Degrees Awarded\n',
'by Year for CIS, Math, and Statistics Majors')) +
annotate('text', x='2000', y=0, size=3, hjust=0,
label='Data Source: Integrated Postsecondary Education Data System')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment