Created
June 20, 2020 23:11
-
-
Save ozjimbob/863a0f553b89e695f524e735cc4b340f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(purrr) | |
library(readxl) | |
library(jsonlite) | |
# Read your spreadsheet | |
input <- read_excel("Book1-2.xlsx") | |
# Take your column C and put it into a list, by interpreting the JSON | |
data <- map(input$context,fromJSON) | |
# As far as I can tell, this 'data' now matches what you have | |
# This is a utility function that turns that weird 23x23 data frame | |
# into a single row with containing all the data | |
# with no NA values | |
proc <- function(x){ | |
frst <- function(x){first(na.omit(x))} | |
y <- x %>% summarise_all(lst(frst)) | |
names(y)<- names(x) | |
y | |
} | |
# This is the main function | |
# This takes one list entry and looks at all the fields | |
# If it's a "normal" field, it adds it as a column to the output | |
# If it is that nested data frame, then it takes it apart using the | |
# "proc" function above and then adds all the columns to the output | |
proc_row <- function(x){ | |
out = list() # | |
for(i in 1:length(x)){ | |
this_name = names(x[i]) | |
if(this_name != "custom"){ | |
full_contents = x[[i]] | |
for(j in 1:length(full_contents)){ | |
sub_name = names(full_contents[j]) | |
sub_data = full_contents[[j]] | |
complete_name = paste0(this_name,"_",sub_name) | |
if(length(sub_data)==0){ | |
sub_data=NA | |
} | |
out[complete_name]=sub_data | |
} | |
}else{ | |
this_df <- x[[i]]$dimensions # Get the contents | |
this_df <- as.list(proc(this_df)) # Take it apart so it's just ONE row and not 23 | |
for(j in 1:length(this_df)){ # For each column | |
this_name2 <- paste0("custom_",names(this_df[j])) | |
this_data2 <- this_df[[j]] | |
if(length(this_data2)==0){ | |
this_data2=NA | |
} | |
out[this_name2]<- this_data2 # Add it to our output | |
} | |
} | |
} | |
as_tibble(out) # Turn our output list into a little data frame | |
} | |
# And here we run it all - apply that "proc_row" function over the input list | |
final <- data %>% map_df(proc_row) | |
print(final) | |
# NOTE; | |
# The input data has fields and subfields - the column names are written as field_subfield to enable this to work | |
# If data is missing anything in a field, then it stores NA in the output | |
# All this only works if every row of the input has the same total number of fields | |
# If a given entry has a different number of fields, it will fail - but that is to be expected, because | |
# you can't store things in a table if they have differnet numbers of columns | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment