Last active
December 11, 2015 23:58
-
-
Save Btibert3/4680195 to your computer and use it in GitHub Desktop.
US News National Rankings - Yield List
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ############################################################################### | |
| ## Use R to scrape the US News College List and look at yield distribution | |
| ## @brocktibert | |
| ## jan 2013 | |
| ############################################################################### | |
| ## load the necessary packages | |
| require(XML) | |
| require(RCurl) | |
| ## define the Inputs | |
| ## url of the data | |
| URL = "http://goo.gl/39SFG" | |
| ## the benchmarked yield rate you want to look compare against US News | |
| BM = .32 | |
| ## read the table from the website | |
| ## when I ran the script, it was the only table on the page | |
| tables = readHTMLTable(URL) | |
| ## get the rankings into a basic data frame | |
| df = tables[[1]] | |
| df[,1] = as.character(df[,1]) | |
| df[,2] = as.numeric(gsub(",", "", as.character(df[,2]))) | |
| df[,3] = as.numeric(gsub(",", "", as.character(df[,3]))) | |
| names(df) = c("school", "admits", "enroll", "yield") | |
| df$yield = df$enroll / df$admits | |
| ## how many schools listed | |
| nrow(df) | |
| ## summarise the data | |
| summary(df) | |
| ## look at the basic distribution | |
| hist(df$yield, | |
| breaks = seq(from=0, to=1, by=.01), | |
| xlab = "Yield %", | |
| ylab = "# US News Schools", | |
| main = "Yield Distribution of US News National", | |
| col = "red") | |
| ## plot a line for your school | |
| abline(v=BM) | |
| ## what is the percentile rank of the your school | |
| ## function from http://goo.gl/2aiZZ | |
| perc.rank <- function(x, xo) length(x[x <= xo])/length(x)*100 | |
| RANKR = perc.rank(df$yield, BM) | |
| LB = paste0("Percentile Rank from Input is : ", round(RANKR, 0)) | |
| text(x=.8, 10, labels=LB) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment