This is a quick document aimed at highlighting the basics of what you might want to do using MongoDB and R. I am coming at this, almost completely, from a SQL mindset.
The easiest way to install, I believe, is
| ############################################################################### | |
| ## Compare the two 2012/13 Stanley Cup Teams based on basic data | |
| ############################################################################### | |
| ## load the basics | |
| setwd() | |
| ## load the packages | |
| library(XML) | |
| library(RCurl) |
| ####### | |
| ## about | |
| ####### | |
| # searches the ENTIRE document for a class | |
| # assumes that the class you want is homogenous across whole document | |
| ## //* search the entire document | |
| ## can search by classes | |
| ## a subsuquent // allows us to further search the match |
| ## function to get data | |
| getCPI = function(cost1 = 1000, year1 = 2009, year2 = 2013) { | |
| library(RCurl) | |
| library(XML) | |
| ## endpoint | |
| EP = "http://data.bls.gov/cgi-bin/cpicalc.pl?" | |
| ## build the URL | |
| URL = paste0(EP, | |
| "cost1=", cost1, "&", | |
| "year1=", year1, "&", |
| # Title | |
| Here is some text | |
| ## A header 2 | |
| Some more text | |
| ## A chart at header 2 |
| structure(list(variable = structure(c(1L, 3L, 2L), .Label = c("Men", | |
| "Total", "Women"), class = "factor"), sends = c(100, 150, 250 | |
| ), opens = c(75, 75, 150), clicks = c(25, 25, 50)), .Names = c("variable", | |
| "sends", "opens", "clicks"), row.names = c(NA, -3L), class = "data.frame") |
| drop table if exists games; | |
| drop table if exists teams; | |
| drop table if exists events; | |
| drop table if exists events_players; | |
| drop table if exists events_penaltybox; | |
| drop table if exists players; | |
| drop table if exists stats_skaters_summary; | |
| drop table if exists stats_skaters_timeonice; | |
| drop table if exists stats_skaters_faceoff; |
| # http://stackoverflow.com/questions/9455437/parse-jsonp-with-r/9463929#9463929 | |
| j <- readLines('http://live.nhl.com/GameData/20112012/2011020908/Roster.jsonp') | |
| j <- sub('[^\\{]*', '', j) # remove function name and opening parenthesis | |
| j <- sub('\\)$', '', j) # remove closing parenthesis | |
| library(rjson) | |
| res <- fromJSON(j) |
| ## load the packages | |
| library(XML) | |
| library(RCurl) | |
| library(plyr) | |
| library(ggplot2) | |
| library(reshape2) | |
| library(stringr) | |
| ## the page | |
| URL = "http://www.hockey-reference.com/leagues/NHL_2014_standings.html" |
| c("order_no", "run_no", "student_id", "last_name", "first_name", | |
| "mi", "street1", "street2", "street3", "city", "state", "zip", | |
| "country", "county_code", "post_del", "post_corr", "email", "dob", | |
| "gender", "ethnicity", "grad_year", "hs_code", "geomarket", "tbd", | |
| "major1", "major2", "major3", "major4", "major5", "ap1", "ap2", | |
| "ap3", "ap4", "ap5", "ap6", "ap7", "ap8", "ap9", "ap10", "satsub1", | |
| "satsub2", "satsub3", "satsub4", "satsub5", "satsub6", "satsub7", | |
| "satsub8", "satsub9", "satsub10", "name_source", "update_date", | |
| "homeschool", "low_ses", "hs_cluster", "en_cluster", "TBD_1", | |
| "TBD_2", "TBD_3", "TBD_4", "TBD_5", "TBD_6", "TBD_7", "TBD_8", |