Last active
June 28, 2016 12:56
-
-
Save arthurwuhoo/35316ec276bcac7c8c49c5486e3ad8b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
odds <- read_csv("https://www.dropbox.com/s/qmymoyj6jl0owvf/horse-racing.csv?dl=1") | |
head(odds) | |
odds$Odds = sapply(odds$Odds, function(ratio) eval(parse(text = ratio))) | |
# HORSE TABLE --------------------------------------------------------------------------------------------------------- | |
odds$Horse = factor(odds$Horse) | |
horses = levels(odds$Horse) | |
horses = data.frame(horse_id = 1:length(horses), | |
horse_name = horses) | |
odds$horse_id <- as.integer(odds$Horse) | |
odds$Horse <- NULL | |
head(odds) | |
# LOCATION TABLE ------------------------------------------------------------------------------------------------------ | |
odds$Location = factor(odds$Location) | |
locations = levels(odds$Location) | |
locations = data.frame(location_id = 1:length(locations), | |
location_name = locations) | |
odds$location_id <- as.integer(odds$Location) | |
odds$Location <- NULL | |
head(odds) | |
# BOOKMAKER TABLE ----------------------------------------------------------------------------------------------------- | |
bookmakers <- unique(odds[, "Bookmaker", drop = FALSE]) | |
# | |
names(bookmakers) <- "bookmaker_name" | |
bookmakers$bookmaker_id <- 1:nrow(bookmakers) | |
# | |
bookmakers <- bookmakers[, 2:1] | |
odds = merge(odds, bookmakers, by.x = "Bookmaker", by.y = "bookmaker_name") | |
odds$Bookmaker <- NULL | |
# RACE TABLE ---------------------------------------------------------------------------------------------------------- | |
# The races table will capture both the race name and race time. | |
races <- unique(odds[, c("Race", "Time")]) | |
# | |
names(races) <- c("race_name", "race_time") | |
races$race_id <- 1:nrow(races) | |
# | |
races <- races[, c(3, 2, 1)] | |
odds = merge(odds, races, by.x = c("Race", "Time"), by.y = c("race_name", "race_time")) | |
odds$Race <- NULL | |
odds$Time <- NULL | |
head(odds) | |
# WRITE CSV ----------------------------------------------------------------------------------------------------------- | |
write.table(horses, "horse-racing-horses.csv", row.names = FALSE, col.names = FALSE) | |
write.table(locations, "horse-racing-locations.csv", row.names = FALSE, col.names = FALSE) | |
write.table(bookmakers, "horse-racing-bookmakers.csv", row.names = FALSE, col.names = FALSE) | |
write.table(races, "horse-racing-races.csv", row.names = FALSE, col.names = FALSE) | |
write.table(odds, "horse-racing-odds.csv", row.names = FALSE, col.names = FALSE) | |
# CREATE A DATABASE --------------------------------------------------------------------------------------------------- | |
# Go and do the SQL stuff here... | |
# SQL FROM R ---------------------------------------------------------------------------------------------------------- | |
library(RSQLite) | |
db <- dbConnect(dbDriver("SQLite"), dbname = "horse-racing.db") | |
dbListTables(db) | |
dbReadTable(db, "tb_horses") | |
results <- dbSendQuery(db, "SELECT * FROM tb_races ORDER BY race_time;") | |
dbFetch(results, n = -1) | |
dbClearResult(results) | |
dbDisconnect(db) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment