Last active
September 1, 2016 21:34
-
-
Save wmcraver/ebdcde1f17e593dff39c56a98f0f0456 to your computer and use it in GitHub Desktop.
Used to extract analytics traffic from Page Category CSV files. Files are by day and are read in, filtered for specific information and then stored in a data frame.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
needed = c("lubridate", "dplyr", "data.table", "stringr") | |
is.installed <- function(pkg){is.element(pkg, installed.packages()[,1])} | |
for (p in 1:length(needed)){ifelse(!is.installed(needed[p]), install.packages(needed[p], dependencies = T),print(paste(needed[p], " is already installed", sep = "")))} | |
library(lubridate) | |
library(dplyr) | |
library(data.table) #used for the %like% function | |
library(stringr) | |
# Set the working directory to the folder that contains the CSV files | |
setwd(".../PageCategory/Extracts") | |
# Set the sequence to a date range | |
seqdates = seq(ymd("20150601"), ymd("20160630"), by = "day") | |
# Initialize a data frame to store the combined data | |
datCombined = data.frame() | |
# For loop finds the csv file by removing the '-' from the date and storing it in 'a', loads it into 'b', filters the | |
# URL and stores the matched records in 'c', adds the date to the end of the rows, and then combines the data into | |
# the data frame 'datCombined' | |
for (i in seq_along(seqdates)){ | |
a = gsub("-", "",seqdates[i]) | |
b = read.csv(paste("PageCategory-", a, ".csv", sep = ""), stringsAsFactors = F) | |
c = b %>% filter(Page.Url %like% "show-your-card") | |
c$dt = seqdates[i] | |
datCombined = rbind(datCombined, c) | |
} | |
# Rename the column names | |
colnames(datCombined) = c("PAGENAME", "CONTENTCAT", "PAGEURL", "PAGEVIEWS", "SESSIONS", "REVENUE", "BOUNCERATE", "UNIQUEVISITORS", "BOOKINGS", "DATE") | |
# Remove the dollar sign from the Revenue column and convert to numeric | |
datCombined$REVENUE = str_sub(datCombined$REVENUE, 2) | |
datCombined$REVENUE = as.numeric(datCombined$REVENUE) | |
# Update the filename and uncomment if you need the file written to the working directory | |
#write.csv(datCombined, "ShowYourCard.csv") | |
# Summarize the data by grouping on URL and totaling the revenue | |
datSummary = datCombined %>% select(3,6) %>% group_by(PAGEURL) %>% summarize(Revenue = sum(REVENUE)) %>% arrange(desc(Revenue)) | |
# Update the filename and uncomment if you need the summary file written to the working directory | |
#write.csv(datSummary, "ShowYourCard-Summary.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment