Last active
September 6, 2017 03:21
-
-
Save dantonnoriega/3d9d4346864436a80b93e3bb5db79b00 to your computer and use it in GitHub Desktop.
Parse UTM CSV export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# script UTM | |
# functions | |
get_utm <- function(x) { | |
# empty frame | |
ef <- structure(list(cf_affiliate_id = character(1), utm_source = character(1), | |
utm_medium = character(1), utm_campaign = character(1), utm_term = character(1), | |
utm_content = character(1), webinar_delay = character(1), | |
purchase_product_ids = character(1), purchase_taxamo_transaction_key = character(1), | |
purchase_payment_method_nonce = character(1), purchase_stripe_customer_token = character(1), | |
upsell = character(1), purchase_product_id = character(1), | |
purchase_stripe_customer_id = character(1)), .Names = c("cf_affiliate_id", | |
"utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", | |
"webinar_delay", "purchase_product_ids", "purchase_taxamo_transaction_key", | |
"purchase_payment_method_nonce", "purchase_stripe_customer_token", | |
"upsell", "purchase_product_id", "purchase_stripe_customer_id" | |
), row.names = integer(1), class = c("tbl_df", "tbl", "data.frame")) | |
if(is.na(x)) return(ef) | |
y <- rev(gregexpr(",[ ]+:[^\\>]*\\>\\{", x, perl = T)[[1]])[1] | |
z <- gregexpr("\\}", x, perl = T)[[1]][1] | |
# purchase | |
a <- trimws(substring(x, y + 1, z)) | |
a <- substring(a, gregexpr("\\{", a, perl = T)[[1]][1], gregexpr("\\}", a, perl = T)[[1]][1]) | |
# outer | |
l <- substring(x, 1, y - 1) | |
r <- substring(x, z + 1, nchar(x)) | |
b <- paste0(l, r) | |
# skip fail is cool | |
# https://stackoverflow.com/questions/35347537/using-strsplit-in-r-ignoring-anything-in-parentheses | |
parse_vec <- function(x) { | |
x %>% | |
gsub('\\"', "'", .) %>% | |
strsplit(., '\\[[^\\]]+,(*SKIP)(*FAIL)|,\\s*', perl=TRUE) %>% | |
'[['(1) %>% | |
lapply(. %>% strsplit('>') %>% '[['(1)) %>% | |
lapply(., gsub, pattern = "[':{}]", replacement = "") %>% | |
do.call('rbind', .) %>% | |
as.data.frame() | |
} | |
a <- parse_vec(a) %>% | |
dplyr::mutate(V2 = replace(V2, V2 == "", NA_character_)) %>% | |
tibble::column_to_rownames('V1') %>% | |
t() %>% | |
tibble::as_tibble() %>% | |
setNames(paste0('purchase_', names(.))) | |
b <- parse_vec(b) %>% | |
dplyr::mutate(V2 = replace(V2, V2 == "", NA_character_)) %>% | |
tibble::column_to_rownames('V1') %>% | |
t() %>% | |
tibble::as_tibble() | |
dplyr::bind_cols(b, a) | |
} | |
# PARSE AND CLEAN DATA ------------------------------------------------ | |
# get data | |
f <- "/Users/danton/Downloads/346768002859784295.csv" | |
v <- readr::read_csv(f) | |
nms <- names(v) %>% | |
gsub("[ ]", "_", .) %>% | |
tolower() | |
v <- setNames(v, nms) | |
# parse additional info | |
p <- v[['additional_info']] | |
k <- p %>% | |
lapply(get_utm) %>% | |
dplyr::bind_rows() %>% | |
dplyr::mutate(purchase_product_ids = gsub("\\[|\\]", "", purchase_product_ids)) %>% | |
tidyr::separate(purchase_product_ids, c('purhcase_product_id1', 'purhcase_product_id2')) %>% | |
dplyr::mutate_at(dplyr::vars(-purchase_stripe_customer_id), dplyr::funs(tolower(.))) %>% | |
dplyr::mutate_all(dplyr::funs(replace(., . == "", NA))) | |
p <- p %>% | |
lapply(. %>% | |
gsub(pattern = '\\"', replacement = "'", .) %>% | |
'[['(1)) | |
# drop additional_info and bind the parse utm vals | |
# variables of interest | |
# - utm_medium | |
# - utm_campaign | |
# - utm_source | |
# - product_names | |
# - original_amount_cents | |
q <- v %>% | |
dplyr::select(-additional_info) %>% | |
dplyr::bind_cols(., k) %>% | |
dplyr::group_by(product_names, utm_source, utm_medium, utm_campaign) %>% | |
dplyr::summarize(price_dollars = sum(original_amount_cents)/100) | |
# export | |
readr::write_delim(q, "~/Desktop/utm-filtered-sum.psv", delim = '|') | |
readr::write_csv(q, "~/Desktop/utm-filtered-sum.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment