Skip to content

Instantly share code, notes, and snippets.

@bhive01
Last active September 22, 2015 17:55
Show Gist options
  • Save bhive01/880b5100ac4de59a03a5 to your computer and use it in GitHub Desktop.
Save bhive01/880b5100ac4de59a03a5 to your computer and use it in GitHub Desktop.
require(tidyr)
require(dplyr)
#require(ggplot2)
# from: https://twitter.com/JennyBryan/status/646047312830050304
separate(data.frame(x = "howdy"), x, into = 1:6, sep = "(?!^)")
#works with more than one word
separate(data.frame(x = c("howdy", "snake")), x, into = 1:6, sep = "(?!^)")
# from https://groups.google.com/forum/#!topic/ggplot2/wdeLGVdp-bE
df <- data.frame(month = rep(1:12, 4), year = rep(2010:2013, each = 12), p1 = sample(1:100, 48, replace=T), p2 = sample(1:100, 48, replace=T) , p3 = sample(1:100, 48, replace=T), f1 = sample(1:100, 48, replace=T), f2 = sample(1:100, 48, replace=T) , f3 = sample(1:100, 48, replace=T), g1 = sample(1:100, 48, replace=T), g2 = sample(1:100, 48, replace=T) ,g3 = sample(1:100, 48, replace=T) )
long.df <-
df %>%
gather(., key, value, matches("*[0-9]")) %>%
separate(., col= key, into = c("alpha", "numeric"), split = "(?!^)") #does not work properly
#Warning message:
#Too few values at 432 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...
#> long.df
# month year alpha numeric value
# 1 1 2010 p1 <NA> 22
# 2 2 2010 p1 <NA> 25
# 3 3 2010 p1 <NA> 70
# 4 4 2010 p1 <NA> 53
#breaking it down by step
long.df <-
df %>%
gather(., key, value, matches("*[0-9]"))
require(stringi)
asdf <- stri_split_regex(long.df$key, "(?!^)") #works fine
#call up simplifyPieces
simp <- .Call('tidyr_simplifyPieces', PACKAGE = 'tidyr', asdf, 2, fillLeft = TRUE) # works but warning for every row for excessive length
append(long.df, simp$strings) # this works too... I'm not sure why it doesn't work above.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment