Created
July 5, 2021 08:25
-
-
Save tonyelhabr/4685f96e7fe4d48427b77544d3f84867 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
dir_data <- file.path('inst', 'extdata', 'json') | |
paths <- | |
fs::dir_ls(dir_data, regexp = 'json$') | |
paths | |
n_path <- paths %>% length() | |
n_path | |
# paths <- 'inst/extdata/json/8-2020-1485311.json' | |
# paths <- 'inst/extdata/json/8-2019-1375927.json' | |
# Reference: https://adv-r.hadley.nz/function-operators.html | |
.print_every <- function(f, n = 1, max = NULL, format = 'Function call {cli::bg_black(i)} of {max}.') { | |
force(f) | |
force(n) | |
i <- 0 | |
function(...) { | |
i <<- i + 1 | |
if (i %% n == 0) { | |
cat(glue::glue(format), sep = '\n') | |
} | |
f(...) | |
} | |
} | |
.parse_init <- function(path, rgx = '([0-9]+)-([0-9]+)-([0-9]+)') { | |
init <- | |
path %>% | |
jsonlite::read_json() %>% | |
enframe() | |
name <- path %>% basename() %>% tools::file_path_sans_ext() | |
.f_replace <- function(i) { | |
name %>% str_replace(rgx, sprintf('\\%d', i)) %>% as.integer() | |
} | |
list( | |
competition_id = .f_replace(1), | |
season_id = .f_replace(2), | |
match_id = .f_replace(3), | |
init = init | |
) | |
} | |
.parse_meta <- function(lst = NULL, path = NULL, ...) { | |
if(is.null(lst)) { | |
lst <- .parse_init(path, ...) | |
} | |
meta_init <- | |
lst$init %>% | |
rowwise() %>% | |
filter(length(value) == 1L) %>% | |
ungroup() %>% | |
pivot_wider(names_from = name, values_from = value) %>% | |
janitor::clean_names() | |
meta <- | |
meta_init %>% | |
unnest(cols = names(meta_init)) %>% | |
mutate(match_id = lst$match_id) %>% | |
relocate(competition_id, season_id, match_id) | |
meta | |
} | |
.parse_events <- function(lst = NULL, path = NULL, ...) { | |
if(is.null(lst)) { | |
lst <- .parse_init(path, ...) | |
} | |
events <- | |
lst$init %>% | |
filter(name == 'events') %>% | |
select(value) %>% | |
unnest(value) %>% | |
unnest_wider(value) | |
players <- | |
lst$init %>% | |
filter(name == 'playerIdNameDictionary') %>% | |
select(value) %>% | |
unnest_longer(value, indices_to = 'player_id', values_to = 'player_name') %>% | |
mutate(across(player_id, as.integer)) | |
.unnest_side <- function(side) { | |
lst$init %>% | |
filter(name == !!side) %>% | |
select(value) %>% | |
unnest_wider(value) %>% | |
janitor::clean_names() %>% | |
select(team_id, team_name = name, side = field) | |
} | |
teams <- c('home', 'away') %>% map_dfr(.unnest_side) | |
events_clean_init1 <- | |
events %>% | |
janitor::clean_names() %>% | |
hoist(period, 'period' = 'value', 'period_name' = 'displayName') %>% | |
hoist(type, 'type' = 'value', 'type_name' = 'displayName') %>% | |
hoist(outcome_type, 'outcome_type' = 'value', 'outcome_type_name' = 'displayName') | |
if(any('cardType' == names(events))) { | |
# There's some weird thing here with `cardType` compared to the other elements that are hoisted. | |
# Since `cardType` is NULL by default while the others are always named lists, automatically renaming | |
# a hoisted element to the same name of the column that is being hoisted drops the column; one way | |
# to avoid this is to temporarily name the hoisted column to something different (in this case `cardTypex`). | |
events_clean_init1 <- | |
events_clean_init1 %>% | |
hoist(card_type, 'card_typex' = 'value', 'card_type_name' = 'displayName') %>% | |
rename(cardtype = card_typex) | |
} | |
events_clean_init2 <- | |
events_clean_init1 %>% | |
left_join(teams, by = 'team_id') %>% | |
left_join(players, by = 'player_id') %>% | |
mutate(competition_id = lst$competition_id, season_id = lst$season_id, match_id = lst$match_id) %>% | |
relocate(competition_id, season_id, match_id) | |
events_clean <- | |
events_clean_init2 %>% | |
select(-c(qualifiers, satisfied_events_types)) %>% | |
arrange(minute, event_id) | |
events_clean | |
} | |
.parse_match <- function(path) { | |
lst <- .parse_init(path) | |
meta <- .parse_meta(lst) | |
events <- .parse_events(lst) | |
# res <- | |
# events %>% | |
# bind_cols( | |
# meta %>% select(league_id, season_id, match_id) | |
# ) | |
# list(events = events, meta = meta) | |
events | |
} | |
f_v <- .print_every(.parse_match, max = n_path) | |
res <- | |
paths %>% | |
tibble(path = .) %>% | |
mutate(data = map(path, f_v)) %>% | |
select(data) %>% | |
unnest(data) | |
res | |
res %>% | |
filter(type_name %>% str_detect('Sub')) %>% | |
relocate(player_name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment