This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
br = webdriver.Firefox() #open firefox | |
br.get('https://www.allrecipes.com/recipes/'+str(yearurls[i])) | |
###ID number for year, example 1997 has ID of 14486 | |
html_list = br.find_element_by_id("grid") | |
urls = html_list.find_elements(By.CLASS_NAME, "favorite") | |
#All top 20 recipes have hearts associated with them. Inside | |
#the heart contains the unique ID number for the given recipe | |
for i, e in enumerate(urls): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def scrape_recipe(br, year, idnumber): | |
#This is called when user wants to scrape for specific recipe site | |
#Try functions were used to prevent any one element from stopping the operation | |
#recipe title | |
try: | |
rtitle = br.find_element_by_tag_name('h1').text | |
except: | |
rtitle = 'NA' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pymongo | |
import pandas as pd | |
# Connection to Mongo DB and import recipe and ingredients collections as Pandas | |
try: | |
conn=pymongo.MongoClient() | |
print "Connected successfully!!!" | |
except pymongo.errors.ConnectionFailure, e: | |
print "Could not connect to MongoDB: %s" % e | |
conn |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Loading packages | |
library(dplyr); library(ggplot2); library(RColorBrewer); library(rworldmap) | |
#Loading datasets | |
indicators = read.csv("subbeddata.csv", header = TRUE, stringsAsFactors = FALSE) | |
allindicators = read.csv("/Users/YannickMac/Dropbox/Applications/Data science/NYCDSA/Data_visualization_project/world-development-indicators/Indicators.csv") | |
counts <- indicators %>% | |
group_by(IndicatorCode, IndicatorName) %>% | |
summarise(NumCountries = n_distinct(CountryName), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny); library(maps); library(mapproj); library(dplyr); library(plotly); library(googleVis); library(car) | |
data("county.fips") | |
#Join data by county indentification numbe (fips) to the county data in the maps package | |
flipjoin = function(x) { | |
y = read.csv(x) | |
left_join(county.fips, y, by = c("fips" = "FIPS")) | |
} | |
#Data wrangling for map plot | |
health2 = flipjoin("data/health.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# server.R | |
library(shiny); library(maps); library(mapproj); library(shinydashboard); library(plotly); library(DT); source("helpers.R") | |
shinyServer( | |
function(input, output) { | |
output$map <- renderPlot({ | |
args <- switch(input$var, | |
"Percent Adult Obese 2009" = list(health2$PCT_OBESE_ADULTS09, "darkgreen", "% Obese"), | |
"Percent Adult Obese 2010" = list(health2$PCT_OBESE_ADULTS10, "darkgreen", "% Obese"), | |
"Percent Adult Diabetic 2009" = list(health2$PCT_DIABETES_ADULTS09, "darkred", "% Diabetic"), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny); library(shinydashboard); library(plotly); source("helpers.R"); library(DT) | |
shinyUI(dashboardPage( | |
dashboardHeader(title = "Food and health demographics in the USA", titleWidth = 400), | |
dashboardSidebar( | |
sidebarUserPanel("Yannick Kimmel", image = "Yannick.jpg"), | |
sidebarMenu( | |
menuItem("Map", tabName = "mappanel", icon = icon("map")), | |
menuItem("Trends", tabName = "trends", icon = icon("line-chart")), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(xgboost); library(methods); library(pROC); library(caret); library(xgboost); library(readr); library(plyr); library(dplyr) | |
library(tidyr); library(dummies); library(doMC); registerDoMC(cores = 4) | |
#Read in the data | |
#higgs.___.full is raw data | |
higgs.train.full = read.csv('./data/training.csv', header=T) | |
higgs.test.full = read.csv('./data/test.csv', header=T) | |
higgs.testId = higgs.test.full$EventId | |
############################################# |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Grid for the parameter search | |
#The guidlines for how to tune parameters are commented below and are taken from | |
# Owen Zheng http://www.slideshare.net/OwenZhang2/tips-for-data-science-competitions | |
xgb_grid_1 = expand.grid( | |
eta = c(.5, 1, 1.5), #[2-10]/num trees | |
max_depth = c(4, 6, 8), #Start with 6 | |
nrounds = 100, #Fix at 100 | |
gamma = 0, #Usually ok to leave at 0 | |
colsample_bytree = c(.3, .5, .7), #.3 - .5 | |
min_child_weight = 1 #start with 1/sqrt(eventrate) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Predicting training data | |
xgmat.train <- xgb.DMatrix(as.matrix(higgs.train.dummy), | |
label = as.numeric(higgs.labels == "X0"), | |
weight = scaled.weight) | |
xgboostTrainPred <- predict(bst, newdata = xgmat.train) | |
labels <- ifelse(as.character(higgs.labels)=="X1", 1, 0) | |
auc = roc(labels, xgboostTrainPred) | |
plot(auc, print.thres=TRUE) | |
######## From the graph, we can tell the best threshold is 0.002 |
OlderNewer