Skip to content

Instantly share code, notes, and snippets.

@Tadge-Analytics
Created November 30, 2021 23:25
Show Gist options
  • Save Tadge-Analytics/22079c9f5af4ec385427506a498ca37b to your computer and use it in GitHub Desktop.
Save Tadge-Analytics/22079c9f5af4ec385427506a498ca37b to your computer and use it in GitHub Desktop.
library(tidyverse)
library(readxl)
###################################################################
path_to_workbook <- "top_female_poker_players_and_events.xlsx"
workbook_sheets <- excel_sheets(path_to_workbook)
all_sheets_imported <-
workbook_sheets %>%
tibble(sheet = .) %>%
mutate(sheet_data = map(sheet, ~read_excel(path_to_workbook, sheet = .x, col_types = "text") %>% janitor::clean_names()),
sheet = snakecase::to_snake_case(sheet))
map2(all_sheets_imported$sheet, all_sheets_imported$sheet_data, ~assign(.x, .y, envir = .GlobalEnv))
rm(list = setdiff(ls(), all_sheets_imported$sheet))
###################################################################
# here, I make three distinct "tidy" tables (unique_players, unique_events, all_events_only_req_info)
# just to "get my head around" the data properly
unique_events <-
top_100_poker_events %>%
distinct(event_name, event_date, event_country) %>%
mutate(event_date = openxlsx::convertToDate(event_date),
event_id = row_number(), .before = 1)
unique_players <-
top_100 %>%
select(player_id, name, country, all_time_money_usd, position) %>%
mutate(player_id = as.numeric(player_id),
all_time_money_usd = as.numeric(all_time_money_usd),
position = parse_number(position))
all_events_only_req_info <-
top_100_poker_events %>%
mutate(event_date = openxlsx::convertToDate(event_date)) %>%
inner_join(unique_events, by = c("event_name", "event_date", "event_country")) %>%
select(-event_name, -event_country, -source, -last_updated) %>%
mutate(player_place = parse_number(player_place),
was_event_winner = player_place == 1, # Create a column to count when the player finished 1st in an event
across(c(prize_usd, player_id), ~as.numeric(.x)),
prize_usd = replace_na(prize_usd, 0)) # Replace any nulls in prize_usd with zero
###################################################################
# generate aggregations
aggregated_player_stats <-
all_events_only_req_info %>%
inner_join(unique_events %>% select(-event_date), by = "event_id") %>%
group_by(player_id) %>%
summarise(first_event = min(event_date), # Find the dates of the players first and last events
latest_event = max(event_date),
career_length_yrs = round(as.numeric((latest_event - first_event))/365, 1), # calculate the length of poker career in years (with decimals)
total_prize_money = sum(prize_usd), # Total prize money
biggest_win = max(prize_usd), # Their biggest win
total_events = n(), # Number of events they've taken part in
events_won = sum(was_event_winner) %>% replace_na(0),
percentage_of_events_won = paste0(sprintf("%.1f", 100*(events_won/total_events)), "%"), # The percentage of events they've won
countries_visited = n_distinct(event_country) # The distinct count of the countries played in
)
# Reduce the data to name, number of events, total prize money, biggest win, percentage won, countries visited, career length
unique_players_with_stats <-
unique_players %>%
inner_join(aggregated_player_stats, by = "player_id")
# save to csv
unique_players_with_stats %>%
write_csv("output.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment