trinker · June 25, 2018 13:26 · drhmoosavi · Jun 26, 2018 · trinker · Jun 27, 2018
diff --git a/Mutlistep Cleaning Regex: Substitution & Extract Portion Before Separator.R b/Mutlistep Cleaning Regex: Substitution & Extract Portion Before Separator.R
 library(tidyverse)

 as.data.frame(M, stringsAsFactors = FALSE) %>%
    rownames_to_column('id') %>%
    mutate(
        id = gsub('SuperSMART_', 'S', id), 
        id = gsub('(^S)(\\d{2})(_)', '\\10\\2\\3', id, perl = TRUE) 
    ) %>%
    separate(id, into = c('S', 'R', 'p'), sep = '_', remove = FALSE) %>%  
    mutate(., group = group_indices(., S))

 ##             id    S  R    p x group
 ## 1 S003_T1_p555 S003 T1 p555 1     1
 ## 2 S003_T2_p456 S003 T2 p456 2     1
 ## 3 S004_T3_p785 S004 T3 p785 3     2
 ## 4 S004_T4_p426 S004 T4 p426 4     2
 ## 5 S027_T1_p112 S027 T1 p112 5     3
 ## 6 S027_T2_p414 S027 T2 p414 6     3
 ## 7 S042_T3_p155 S042 T3 p155 7     4
 ## 8 S042_T5_p775 S042 T5 p775 8     4


 ## If you really want it as a function:
 normalize_data <- function(m, ..) {
    as.data.frame(m, stringsAsFactors = FALSE) %>%
        tibble::rownames_to_column('id') %>%
        dplyr::mutate(
            id = gsub('SuperSMART_', 'S', id), 
            id = gsub('(^S)(\\d{2})(_)', '\\10\\2\\3', id, perl = TRUE) 
        ) %>%
        tidyr::separate(id, into = c('S', 'R', 'p'), sep = '_', remove = FALSE) %>%  
        dplyr::mutate(., group = dplyr::group_indices(., S))
 }
	library(tidyverse)

	as.data.frame(M, stringsAsFactors = FALSE) %>%
	rownames_to_column('id') %>%
	mutate(
	id = gsub('SuperSMART_', 'S', id),
	id = gsub('(^S)(\\d{2})(_)', '\\10\\2\\3', id, perl = TRUE)
	) %>%
	separate(id, into = c('S', 'R', 'p'), sep = '_', remove = FALSE) %>%
	mutate(., group = group_indices(., S))

	## id S R p x group
	## 1 S003_T1_p555 S003 T1 p555 1 1
	## 2 S003_T2_p456 S003 T2 p456 2 1
	## 3 S004_T3_p785 S004 T3 p785 3 2
	## 4 S004_T4_p426 S004 T4 p426 4 2
	## 5 S027_T1_p112 S027 T1 p112 5 3
	## 6 S027_T2_p414 S027 T2 p414 6 3
	## 7 S042_T3_p155 S042 T3 p155 7 4
	## 8 S042_T5_p775 S042 T5 p775 8 4


	## If you really want it as a function:
	normalize_data <- function(m, ..) {
	as.data.frame(m, stringsAsFactors = FALSE) %>%
	tibble::rownames_to_column('id') %>%
	dplyr::mutate(
	id = gsub('SuperSMART_', 'S', id),
	id = gsub('(^S)(\\d{2})(_)', '\\10\\2\\3', id, perl = TRUE)
	) %>%
	tidyr::separate(id, into = c('S', 'R', 'p'), sep = '_', remove = FALSE) %>%
	dplyr::mutate(., group = dplyr::group_indices(., S))
	}