Skip to content

Instantly share code, notes, and snippets.

@stephlocke
Created December 22, 2017 19:03
Show Gist options
  • Save stephlocke/e122e3258e88ba24adca36d0f5ad8cda to your computer and use it in GitHub Desktop.
Save stephlocke/e122e3258e88ba24adca36d0f5ad8cda to your computer and use it in GitHub Desktop.
xml processing
library(xml2)
library(tidyverse)
## Extract any number of xml files
list.files(pattern="*.xml") %>%
map(read_xml) %>%
## Convert to a list to process
map(as_list) %>%
## Simplify list structure
map(flatten) %>%
## Turn into one big data frame
map_df(as_data_frame) %>%
## Handle missing request info
mutate(Request=ifelse(Request=="NULL",NA,
Request)) ->
highleveldata
## Convert embedded XML into XML nodes
xml_handler<-function(x) {
as_xml_document(
paste0("<node>",ifelse(is.na(x),"<CustomerId>NA</CustomerId>",x),"</node>"))
}
highleveldata %>%
## Extract Requests as a vector
pluck("Request") %>%
map(xml_handler) %>%
map(as_list) %>%
map(flatten) %>%
map_df(as_data_frame) ->
requestdata
## If the same number of rows matches i.e. didn't screw anything up, combine the two dataframes
if(nrow(highleveldata)==nrow(requestdata)) {
alldata<-cbind(highleveldata, requestdata)
}
## Et voila
View(alldata)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment