Last active
February 15, 2017 20:40
-
-
Save mGalarnyk/5c56dfa3c883c73549e4174289ede883 to your computer and use it in GitHub Desktop.
Getting and Clean Data Quiz 1 from John Hopkins Coursera for the blog post https://medium.com/@GalarnykMichael/getting-and-cleaning-data-jhu-coursera-course-3-c3635747858b#.270anhem0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Quiz data.table code Week 1 | |
# 1. | |
# fread url requires curl package on mac | |
# install.packages("curl") | |
# Reading in data | |
housing <- data.table::fread("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv") | |
# VAL attribute says how much property is worth, .N is the number of rows | |
housing[VAL == 24, .N] | |
# 3. Download the Excel spreadsheet on Natural Gas Aquisition Program here: | |
# https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FDATA.gov_NGAP.xlsx | |
# Read rows 18-23 and columns 7-15 into R and assign the result to a variable called:dat | |
# install.packages("xlsx") | |
# setwd("~/Desktop/datasciencecoursera/3_Getting_and_Cleaning_Data") | |
# housing <- data.table::fread("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FDATA.gov_NGAP.xlsx") | |
fileUrl <- "https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FDATA.gov_NGAP.xlsx" | |
download.file(fileUrl, destfile = paste0(getwd(), '/getdata%2Fdata%2FDATA.gov_NGAP.xlsx'), method = "curl") | |
dat <- xlsx::read.xlsx(file = "getdata%2Fdata%2FDATA.gov_NGAP.xlsx", sheetIndex = 1, rowIndex = 18:23, colIndex = 7:15) | |
sum(dat$Zip*dat$Ext,na.rm=T) | |
# 4. | |
#Read the XML data on Baltimore restaurants from here: | |
# https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml | |
#How many restaurants have zipcode 21231? | |
# install.packages("XML") | |
fileURL<-"https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml" | |
doc <- XML::xmlTreeParse(sub("s", "", fileURL), useInternal = TRUE) | |
rootNode <- XML::xmlRoot(doc) | |
zipcodes <- XML::xpathSApply(rootNode, "//zipcode", XML::xmlValue) | |
xmlZipcodeDT <- data.table::data.table(zipcode = zipcodes) | |
xmlZipcodeDT[zipcode == "21231", .N] | |
# 5 | |
# The American Community Survey distributes downloadable | |
# data about United States communities. Download the 2006 microdata survey about | |
# housing for the state of Idaho using download.file() from here: | |
# https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06pid.csv | |
#Read the XML data on Baltimore restaurants from here: | |
# https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml | |
#How many restaurants have zipcode 21231? | |
DT <- data.table::fread("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06pid.csv") | |
# fastest | |
DT[,mean(pwgtp15),by=SEX] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment