Skip to content

Instantly share code, notes, and snippets.

@arthurwuhoo
Last active June 28, 2016 12:56
Show Gist options
  • Save arthurwuhoo/35316ec276bcac7c8c49c5486e3ad8b0 to your computer and use it in GitHub Desktop.
Save arthurwuhoo/35316ec276bcac7c8c49c5486e3ad8b0 to your computer and use it in GitHub Desktop.
library(readr)
odds <- read_csv("https://www.dropbox.com/s/qmymoyj6jl0owvf/horse-racing.csv?dl=1")
head(odds)
odds$Odds = sapply(odds$Odds, function(ratio) eval(parse(text = ratio)))
# HORSE TABLE ---------------------------------------------------------------------------------------------------------
odds$Horse = factor(odds$Horse)
horses = levels(odds$Horse)
horses = data.frame(horse_id = 1:length(horses),
horse_name = horses)
odds$horse_id <- as.integer(odds$Horse)
odds$Horse <- NULL
head(odds)
# LOCATION TABLE ------------------------------------------------------------------------------------------------------
odds$Location = factor(odds$Location)
locations = levels(odds$Location)
locations = data.frame(location_id = 1:length(locations),
location_name = locations)
odds$location_id <- as.integer(odds$Location)
odds$Location <- NULL
head(odds)
# BOOKMAKER TABLE -----------------------------------------------------------------------------------------------------
bookmakers <- unique(odds[, "Bookmaker", drop = FALSE])
#
names(bookmakers) <- "bookmaker_name"
bookmakers$bookmaker_id <- 1:nrow(bookmakers)
#
bookmakers <- bookmakers[, 2:1]
odds = merge(odds, bookmakers, by.x = "Bookmaker", by.y = "bookmaker_name")
odds$Bookmaker <- NULL
# RACE TABLE ----------------------------------------------------------------------------------------------------------
# The races table will capture both the race name and race time.
races <- unique(odds[, c("Race", "Time")])
#
names(races) <- c("race_name", "race_time")
races$race_id <- 1:nrow(races)
#
races <- races[, c(3, 2, 1)]
odds = merge(odds, races, by.x = c("Race", "Time"), by.y = c("race_name", "race_time"))
odds$Race <- NULL
odds$Time <- NULL
head(odds)
# WRITE CSV -----------------------------------------------------------------------------------------------------------
write.table(horses, "horse-racing-horses.csv", row.names = FALSE, col.names = FALSE)
write.table(locations, "horse-racing-locations.csv", row.names = FALSE, col.names = FALSE)
write.table(bookmakers, "horse-racing-bookmakers.csv", row.names = FALSE, col.names = FALSE)
write.table(races, "horse-racing-races.csv", row.names = FALSE, col.names = FALSE)
write.table(odds, "horse-racing-odds.csv", row.names = FALSE, col.names = FALSE)
# CREATE A DATABASE ---------------------------------------------------------------------------------------------------
# Go and do the SQL stuff here...
# SQL FROM R ----------------------------------------------------------------------------------------------------------
library(RSQLite)
db <- dbConnect(dbDriver("SQLite"), dbname = "horse-racing.db")
dbListTables(db)
dbReadTable(db, "tb_horses")
results <- dbSendQuery(db, "SELECT * FROM tb_races ORDER BY race_time;")
dbFetch(results, n = -1)
dbClearResult(results)
dbDisconnect(db)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment