Skip to content

Instantly share code, notes, and snippets.

View Yankim's full-sized avatar

Yannick Kimmel Yankim

View GitHub Profile
@Yankim
Yankim / helpers.R
Last active September 19, 2016 13:49
library(shiny); library(maps); library(mapproj); library(dplyr); library(plotly); library(googleVis); library(car)
data("county.fips")
#Join data by county indentification numbe (fips) to the county data in the maps package
flipjoin = function(x) {
y = read.csv(x)
left_join(county.fips, y, by = c("fips" = "FIPS"))
}
#Data wrangling for map plot
health2 = flipjoin("data/health.csv")
@Yankim
Yankim / ResAndDev
Last active September 19, 2016 13:10
#Loading packages
library(dplyr); library(ggplot2); library(RColorBrewer); library(rworldmap)
#Loading datasets
indicators = read.csv("subbeddata.csv", header = TRUE, stringsAsFactors = FALSE)
allindicators = read.csv("/Users/YannickMac/Dropbox/Applications/Data science/NYCDSA/Data_visualization_project/world-development-indicators/Indicators.csv")
counts <- indicators %>%
group_by(IndicatorCode, IndicatorName) %>%
summarise(NumCountries = n_distinct(CountryName),
import numpy as np
import pymongo
import pandas as pd
# Connection to Mongo DB and import recipe and ingredients collections as Pandas
try:
conn=pymongo.MongoClient()
print "Connected successfully!!!"
except pymongo.errors.ConnectionFailure, e:
print "Could not connect to MongoDB: %s" % e
conn
def scrape_recipe(br, year, idnumber):
#This is called when user wants to scrape for specific recipe site
#Try functions were used to prevent any one element from stopping the operation
#recipe title
try:
rtitle = br.find_element_by_tag_name('h1').text
except:
rtitle = 'NA'
br = webdriver.Firefox() #open firefox
br.get('https://www.allrecipes.com/recipes/'+str(yearurls[i]))
###ID number for year, example 1997 has ID of 14486
html_list = br.find_element_by_id("grid")
urls = html_list.find_elements(By.CLASS_NAME, "favorite")
#All top 20 recipes have hearts associated with them. Inside
#the heart contains the unique ID number for the given recipe
for i, e in enumerate(urls):