Skip to content

Instantly share code, notes, and snippets.

@admariner
Forked from MarkEdmondson1234/compareXMLtoGA.R
Created November 11, 2023 00:25
Show Gist options
  • Save admariner/8ad2e72d9aa7cf22ef7e4327acedaef2 to your computer and use it in GitHub Desktop.
Save admariner/8ad2e72d9aa7cf22ef7e4327acedaef2 to your computer and use it in GitHub Desktop.
Check 0 pageviews by comparing sitemap.XML URLs with Google Analytics visits.
library(googleAnalyticsR)
library(xml2)
library(dplyr)
ga_auth()
## date range of URLs to test
dates <- c(Sys.Date() - 30, Sys.Date())
##GA View ID
id <- 11111111
## function to get sitemap URLs
get_sitemap <- function(sitemap, field = "loc"){
sm <- as_list(read_xml(sitemap))
out <- try(Reduce(rbind,
vapply(sm, function(x) Reduce(rbind, x[[field]]), character(1))
))
if(inherits(out, "try-error")){
message("Problem with sitemap:", sitemap)
return(NULL)
}
as.vector(out)
}
## make google SEO filter
google_seo <- filter_clause_ga4(
list(
dim_filter("source", "EXACT", "google"),
dim_filter("medium", "EXACT", "organic")
),
operator = "AND")
## get the pages
pages <- google_analytics_4(id,
date_range = dates,
dimensions = "pagePath",
metrics = c("pageviews","totalEvents"),
dim_filters = google_seo,
max = -1,
anti_sample = TRUE)
## get the sitemap index file
url_si <- "http://www.example.com/sitemap.xml"
sitemap_index <- get_sitemap(url_si)
## get all the sitemaps (maybe you only need the call above if you have no sitemap index)
many_sitemaps <- lapply(sitemap_index, get_sitemap)
## all the urls in all the sitemaps
all_urls <- Reduce(c, many_sitemaps)
## Compare and get the URLs that are in XML but not in Google Analytics
## dplyr transformations
sitemap_urls <- as.tbl(as.data.frame(all_urls, stringsAsFactors = FALSE))
sitemap_urls <- sitemap_urls %>% mutate(path = paste0("/",urltools::path(all_urls)))
sitemap_not_in_ga <- anti_join(sitemap_urls, pages, by = c(path = "pagePath"))
## write out to CSV
write.csv(sitemap_not_in_ga, file = "./data/sitemap_urls_not_in_ga.csv", row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment