This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from PIL import Image | |
import pathlib | |
PATH_ = pathlib.Path(__file__).parent | |
broken_images=[] | |
def check_images(PATH_): | |
for pic_class in os.listdir(PATH_): | |
for pic in os.listdir(f'{PATH_}/{pic_class}'): | |
try: | |
img = Image.open(f'{PATH_}/{pic_class}/{pic}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Generate County map with a few var of interest | |
percent_map <- function(var, color, legend.title, min = 0, max = 100, name = "") { | |
# generate vector of fill colors for map | |
shades <- colorRampPalette(c("white", color))(100) | |
# constrain gradient to percents that occur between min and max | |
var <- pmax(var, min) | |
var <- pmin(var, max) | |
percents <- as.integer(cut(var, 100, | |
include.lowest = TRUE, ordered = TRUE)) | |
fills <- shades[percents] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Stepwise regression was used to predict obesity rate. Starting from saturated model | |
#variables not found significant were removed using stepwise regression. | |
data = fulldb[,-c(1:3)] | |
complete.data = data[complete.cases(data),] | |
model.saturated = lm(PCT_OBESE_ADULTS10 ~ ., data = complete.data) | |
model.empty = lm(PCT_OBESE_ADULTS10 ~ 1, data = complete.data) | |
scope = list(lower = formula(model.empty), upper = formula(model.saturated)) | |
backwardAIC = step(model.saturated, scope, direction = "backward", k = 2) | |
#Used to predict obesity rate with multiple linear regression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Generate County map with a few var of interest | |
percent_map <- function(var, color, legend.title, min = 0, max = 100, name = "") { | |
# generate vector of fill colors for map | |
shades <- colorRampPalette(c("white", color))(100) | |
# constrain gradient to percents that occur between min and max | |
var <- pmax(var, min) | |
var <- pmin(var, max) | |
percents <- as.integer(cut(var, 100, | |
include.lowest = TRUE, ordered = TRUE)) | |
fills <- shades[percents] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Predicting the test data | |
xgmat.test <- xgb.DMatrix(as.matrix(higgs.test.dummy)) | |
xgboostTestPred <- predict(bst, newdata=xgmat.test) | |
predicted <- rep("s",550000) | |
predicted[xgboostTestPred>=threshold] <- "b" | |
weightRank = rank(xgboostTestPred, ties.method= "random") | |
write.csv(as.data.frame(xgboostTestPred), "Submissions/xgboost_prob.csv") | |
write.csv(as.data.frame(higgs.testId), "Submissions/EventID.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Predicting training data | |
xgmat.train <- xgb.DMatrix(as.matrix(higgs.train.dummy), | |
label = as.numeric(higgs.labels == "X0"), | |
weight = scaled.weight) | |
xgboostTrainPred <- predict(bst, newdata = xgmat.train) | |
labels <- ifelse(as.character(higgs.labels)=="X1", 1, 0) | |
auc = roc(labels, xgboostTrainPred) | |
plot(auc, print.thres=TRUE) | |
######## From the graph, we can tell the best threshold is 0.002 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Grid for the parameter search | |
#The guidlines for how to tune parameters are commented below and are taken from | |
# Owen Zheng http://www.slideshare.net/OwenZhang2/tips-for-data-science-competitions | |
xgb_grid_1 = expand.grid( | |
eta = c(.5, 1, 1.5), #[2-10]/num trees | |
max_depth = c(4, 6, 8), #Start with 6 | |
nrounds = 100, #Fix at 100 | |
gamma = 0, #Usually ok to leave at 0 | |
colsample_bytree = c(.3, .5, .7), #.3 - .5 | |
min_child_weight = 1 #start with 1/sqrt(eventrate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(xgboost); library(methods); library(pROC); library(caret); library(xgboost); library(readr); library(plyr); library(dplyr) | |
library(tidyr); library(dummies); library(doMC); registerDoMC(cores = 4) | |
#Read in the data | |
#higgs.___.full is raw data | |
higgs.train.full = read.csv('./data/training.csv', header=T) | |
higgs.test.full = read.csv('./data/test.csv', header=T) | |
higgs.testId = higgs.test.full$EventId | |
############################################# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny); library(shinydashboard); library(plotly); source("helpers.R"); library(DT) | |
shinyUI(dashboardPage( | |
dashboardHeader(title = "Food and health demographics in the USA", titleWidth = 400), | |
dashboardSidebar( | |
sidebarUserPanel("Yannick Kimmel", image = "Yannick.jpg"), | |
sidebarMenu( | |
menuItem("Map", tabName = "mappanel", icon = icon("map")), | |
menuItem("Trends", tabName = "trends", icon = icon("line-chart")), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# server.R | |
library(shiny); library(maps); library(mapproj); library(shinydashboard); library(plotly); library(DT); source("helpers.R") | |
shinyServer( | |
function(input, output) { | |
output$map <- renderPlot({ | |
args <- switch(input$var, | |
"Percent Adult Obese 2009" = list(health2$PCT_OBESE_ADULTS09, "darkgreen", "% Obese"), | |
"Percent Adult Obese 2010" = list(health2$PCT_OBESE_ADULTS10, "darkgreen", "% Obese"), | |
"Percent Adult Diabetic 2009" = list(health2$PCT_DIABETES_ADULTS09, "darkred", "% Diabetic"), |
NewerOlder