Yankim’s gists

Yankim / top20list

Last active May 4, 2021 20:04

	br = webdriver.Firefox() #open firefox
	br.get('https://www.allrecipes.com/recipes/'+str(yearurls[i]))
	###ID number for year, example 1997 has ID of 14486

	html_list = br.find_element_by_id("grid")
	urls = html_list.find_elements(By.CLASS_NAME, "favorite")
	#All top 20 recipes have hearts associated with them. Inside
	#the heart contains the unique ID number for the given recipe

	for i, e in enumerate(urls):

Yankim / scrape_recipe

Last active May 5, 2021 16:24

	def scrape_recipe(br, year, idnumber):
	#This is called when user wants to scrape for specific recipe site
	#Try functions were used to prevent any one element from stopping the operation

	#recipe title
	try:
	rtitle = br.find_element_by_tag_name('h1').text
	except:
	rtitle = 'NA'

Yankim / scrapingproject.py

Last active May 9, 2018 15:43

	import numpy as np
	import pymongo
	import pandas as pd
	# Connection to Mongo DB and import recipe and ingredients collections as Pandas
	try:
	conn=pymongo.MongoClient()
	print "Connected successfully!!!"
	except pymongo.errors.ConnectionFailure, e:
	print "Could not connect to MongoDB: %s" % e
	conn

Yankim / ResAndDev

Last active September 19, 2016 13:10

	#Loading packages
	library(dplyr); library(ggplot2); library(RColorBrewer); library(rworldmap)

	#Loading datasets
	indicators = read.csv("subbeddata.csv", header = TRUE, stringsAsFactors = FALSE)
	allindicators = read.csv("/Users/YannickMac/Dropbox/Applications/Data science/NYCDSA/Data_visualization_project/world-development-indicators/Indicators.csv")

	counts <- indicators %>%
	group_by(IndicatorCode, IndicatorName) %>%
	summarise(NumCountries = n_distinct(CountryName),

Yankim / helpers.R

Last active September 19, 2016 13:49

	library(shiny); library(maps); library(mapproj); library(dplyr); library(plotly); library(googleVis); library(car)
	data("county.fips")
	#Join data by county indentification numbe (fips) to the county data in the maps package
	flipjoin = function(x) {
	y = read.csv(x)
	left_join(county.fips, y, by = c("fips" = "FIPS"))
	}

	#Data wrangling for map plot
	health2 = flipjoin("data/health.csv")

Yankim / server.R

Last active September 19, 2016 14:10

	# server.R
	library(shiny); library(maps); library(mapproj); library(shinydashboard); library(plotly); library(DT); source("helpers.R")

	shinyServer(
	function(input, output) {
	output$map <- renderPlot({
	args <- switch(input$var,
	"Percent Adult Obese 2009" = list(health2$PCT_OBESE_ADULTS09, "darkgreen", "% Obese"),
	"Percent Adult Obese 2010" = list(health2$PCT_OBESE_ADULTS10, "darkgreen", "% Obese"),
	"Percent Adult Diabetic 2009" = list(health2$PCT_DIABETES_ADULTS09, "darkred", "% Diabetic"),

Yankim / ui.R

Last active September 19, 2016 13:13

	library(shiny); library(shinydashboard); library(plotly); source("helpers.R"); library(DT)

	shinyUI(dashboardPage(
	dashboardHeader(title = "Food and health demographics in the USA", titleWidth = 400),
	dashboardSidebar(
	sidebarUserPanel("Yannick Kimmel", image = "Yannick.jpg"),

	sidebarMenu(
	menuItem("Map", tabName = "mappanel", icon = icon("map")),
	menuItem("Trends", tabName = "trends", icon = icon("line-chart")),

Yankim / XGBoostHiggsBoson.R

Last active September 19, 2016 13:17

	library(xgboost); library(methods); library(pROC); library(caret); library(xgboost); library(readr); library(plyr); library(dplyr)
	library(tidyr); library(dummies); library(doMC); registerDoMC(cores = 4)

	#Read in the data
	#higgs.___.full is raw data
	higgs.train.full = read.csv('./data/training.csv', header=T)
	higgs.test.full = read.csv('./data/test.csv', header=T)
	higgs.testId = higgs.test.full$EventId

	#############################################

Yankim / XGBoostHiggs2.R

Created September 19, 2016 13:24

	# Grid for the parameter search
	#The guidlines for how to tune parameters are commented below and are taken from
	# Owen Zheng http://www.slideshare.net/OwenZhang2/tips-for-data-science-competitions
	xgb_grid_1 = expand.grid(
	eta = c(.5, 1, 1.5), #[2-10]/num trees
	max_depth = c(4, 6, 8), #Start with 6
	nrounds = 100, #Fix at 100
	gamma = 0, #Usually ok to leave at 0
	colsample_bytree = c(.3, .5, .7), #.3 - .5
	min_child_weight = 1 #start with 1/sqrt(eventrate)

Yankim / XGBoostHiggs3.R

Created September 19, 2016 13:28

	#Predicting training data
	xgmat.train <- xgb.DMatrix(as.matrix(higgs.train.dummy),
	label = as.numeric(higgs.labels == "X0"),
	weight = scaled.weight)
	xgboostTrainPred <- predict(bst, newdata = xgmat.train)
	labels <- ifelse(as.character(higgs.labels)=="X1", 1, 0)

	auc = roc(labels, xgboostTrainPred)
	plot(auc, print.thres=TRUE)
	######## From the graph, we can tell the best threshold is 0.002

Yannick Kimmel Yankim