Last active
November 1, 2021 22:42
-
-
Save jvelezmagic/216eee1c1711ce7aa722fbe2b159b76c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Cydar batch correction" | |
author: "" | |
date: "`r Sys.Date()`" | |
output: | |
html_document: | |
toc: true | |
toc_float: true | |
toc_depth: 3 | |
code_download: true | |
code_folding: show | |
df_print: paged | |
params: | |
fcs_dir: | |
label: "FCS directory" | |
value: !r here::here() | |
output_dir: | |
label: "Output directory for results" | |
value : !r here::here("batch_corrected") | |
output_suffix: | |
label: "Output suffix (e.g., my-sample-name{value}.fcs)" | |
value: "_batchCorrected" | |
metadata_sep: | |
label: "Filename metadata separator" | |
value: "_" | |
batch_position: | |
label: "Batch position after splitting filename by the metadata separator" | |
value: 1 | |
condition_position: | |
label: "Condition position after splitting filename by the metadata separator" | |
value: 2 | |
control_string: | |
label: "String to identify which filenames are control samples" | |
value: "Sp" | |
fcs_glob: | |
label: "Glob to find files to use in batch correction" | |
value: "*.fcs" | |
plots_in_tabs: | |
label: "Should the report split comparisson plots by markes in tabs?" | |
value: TRUE | |
cytofkit_params: !r list(comp = FALSE, transformMethod = "arcsinh", mergeMethod = "all", sampleSeed = 1234) | |
cydar_params: !r list(mode = "range") | |
--- | |
```{r bcwc-setup, include=FALSE} | |
knitr::opts_chunk$set( | |
echo = TRUE, | |
message = FALSE, | |
warning = FALSE, | |
fig.align = "center" | |
) | |
``` | |
## Libraries | |
```{r bcwc-libraries} | |
library(fs) | |
library(tidyverse) | |
library(cytofkit2) | |
library(cydar) | |
library(flowCore) | |
``` | |
## Identify FCS files | |
```{r bcwc-identify-fcs-files} | |
fcs_complete_files <- params$fcs_dir |> | |
fs::dir_ls(glob = params$fcs_glob) | |
``` | |
```{r bcwc-show-files, echo=FALSE} | |
fcs_complete_files %>% | |
fs::path_file() %>% | |
tibble::tibble( | |
`FCS used` = . | |
) | |
``` | |
## Read FCS files and merge them | |
```{r bcwc-read-and-merge-data} | |
merged_data <- purrr::exec( | |
.fn = cytof_exprsMerge, | |
fcsFiles = fcs_complete_files, | |
!!!params$cytofkit_params | |
) %>% | |
as.data.frame() | |
merged_data %>% | |
str() | |
``` | |
## Prepare data for batch correction | |
```{r bcwc-prepare-data-for-batch-correction} | |
rownames_merged_data <- rownames(merged_data) | |
batch_data_prep <- tibble::tibble( | |
fcs_filename = fs::path_file(fcs_complete_files) | |
) %>% | |
dplyr::mutate( | |
sample = fs::path_ext_remove(fcs_filename), | |
metadata_info <- stringr::str_split( | |
string = sample, | |
pattern = params$metadata_sep | |
), | |
batch = purrr::map_chr( | |
.x = metadata_info, | |
.f = dplyr::nth, | |
params$batch_position | |
), | |
batch = stringr::str_c( | |
batch, | |
params$metadata_sep | |
), | |
condition = purrr::map_chr( | |
.x = metadata_info, | |
.f = dplyr::nth, | |
params$condition_position | |
), | |
condition = dplyr::case_when( | |
stringr::str_detect(condition, params$control_string) ~ "Control", | |
TRUE ~ "Sample" | |
), | |
condition = factor( | |
x = condition, | |
levels = c("Control", "Sample") | |
), | |
sample_data = purrr::map( | |
.x = sample, | |
.f = ~ merged_data[grep(.x, rownames_merged_data), ] | |
) | |
) %>% | |
dplyr::arrange(dplyr::desc(condition)) %>% | |
dplyr::select(sample, batch, condition, sample_data) %>% | |
dplyr::group_by(batch) %>% | |
dplyr::summarize( | |
data = list(sample_data), | |
data = purrr::map( | |
.x = data, | |
.f = ~ { | |
purrr::set_names(x = .x, nm = sample) %>% | |
list() %>% | |
purrr::set_names(nm = dplyr::cur_group()) | |
} | |
), | |
composition = list(condition), | |
composition = purrr::map( | |
.x = composition, | |
.f = ~ { | |
purrr::set_names(x = .x, nm = condition) %>% | |
list() %>% | |
purrr::set_names(nm = dplyr::cur_group()) | |
} | |
) | |
) %>% | |
identity() | |
batch_data_prep %>% | |
dplyr::slice_head(n = 1) %>% | |
dplyr::glimpse(width = 80) | |
``` | |
## Run batch normalization | |
```{r bcwc-run-batch-normalization} | |
batch_x <- unlist(batch_data_prep$data, recursive = FALSE) | |
batch_comp <- unlist(batch_data_prep$composition, recursive = FALSE) | |
batch_corrected_results <- purrr::exec( | |
.fn = cydar::normalizeBatch, | |
batch.x = batch_x, | |
batch.comp = batch_comp, | |
!!!params$cydar_params | |
) | |
batch_corrected_results %>% | |
dplyr::first() %>% | |
str() | |
``` | |
## Save results | |
```{r bcwc-save-results} | |
fs::dir_create(params$output_dir) | |
batch_corrected_results %>% | |
purrr::walk( | |
.f = ~ { | |
flowframe <- new( | |
"flowFrame", | |
exprs = as.matrix(.x[[1]]) | |
) | |
flowframe_name <- fs::path( | |
params$output_dir, | |
stringr::str_c(names(.x)[[1]], params$output_suffix), | |
ext = "fcs" | |
) | |
flowCore::write.FCS( | |
x = flowframe, | |
filename = flowframe_name | |
) | |
} | |
) | |
``` | |
## Compare batch corrections {.tabset .tabset-pills} | |
```{r bcwc-compare-batch-corrections, results='asis', echo=FALSE} | |
cols <- names(batch_corrected_results) %>% | |
length() %>% | |
scales::hue_pal()(.) | |
for (k in 1:ncol(batch_x[[1]][[1]])) { | |
collected <- list() | |
for (i in 1:length(batch_x)) { | |
collected[[names(batch_x)[i]]] <- batch_x[[i]][[1]][, k] | |
} | |
collected2 <- list() | |
for (i in 1:length(batch_x)) { | |
collected2[[names(batch_x)[i]]] <- batch_corrected_results[[i]][[1]][, k] | |
} | |
if (params$plots_in_tabs) { | |
cat("\n") | |
cat("###", colnames(batch_corrected_results[[i]][[1]])[k], "\n") | |
} | |
par(mfrow = c(1, 2)) | |
multiIntHist(collected, main = colnames(batch_corrected_results[[i]][[1]])[k], cols = cols) | |
legend( | |
"topright", | |
legend = names(collected), | |
pch = 16, | |
col = cols | |
) | |
multiIntHist(collected2, main = "Corrected", cols = cols) | |
legend( | |
"topright", | |
legend = names(collected2), | |
pch = 16, | |
col = cols | |
) | |
if (params$plots_in_tabs) { | |
cat("\n") | |
} | |
} | |
``` | |
```{r bcwc-plots-to-pdf, echo=FALSE, results='hide'} | |
pdf(fs::path(params$output_dir, "batch_corrected.pdf"), width = 10) | |
cols <- names(batch_corrected_results) %>% | |
length() %>% | |
scales::hue_pal()(.) | |
for (k in 1:ncol(batch_x[[1]][[1]])) { | |
collected <- list() | |
for (i in 1:length(batch_x)) { | |
collected[[names(batch_x)[i]]] <- batch_x[[i]][[1]][, k] | |
} | |
collected2 <- list() | |
for (i in 1:length(batch_x)) { | |
collected2[[names(batch_x)[i]]] <- batch_corrected_results[[i]][[1]][, k] | |
} | |
par(mfrow = c(1, 2)) | |
multiIntHist(collected, main = colnames(batch_corrected_results[[i]][[1]])[k], cols = cols) | |
legend( | |
"topright", | |
legend = names(collected), | |
pch = 16, | |
col = cols | |
) | |
multiIntHist(collected2, main = "Corrected", cols = cols) | |
legend( | |
"topright", | |
legend = names(collected2), | |
pch = 16, | |
col = cols | |
) | |
} | |
dev.off() | |
``` | |
## Document reproducibility | |
### Code used to render this document | |
```{r bcwc-toy-call, echo=FALSE, results='asis', class.source = 'fold-show', fig.width=10} | |
cat("```r", "\n") | |
rlang::call2( | |
.fn = expr(rmarkdown::render), | |
input = "batch-correction-with-cydar.Rmd", | |
params = list( | |
fcs_dir = params$fcs_dir, | |
output_dir = params$output_dir, | |
output_suffix = params$output_suffix, | |
metadata_sep = params$metadata_sep, | |
batch_position = params$batch_position, | |
condition_position = params$condition_position, | |
control_string = params$control_string, | |
fcs_glob = params$fcs_glob, | |
plots_in_tabs = params$plots_in_tabs, | |
cytofkit_params = params$cytofkit_params, | |
cydar_params = params$cydar_params | |
) | |
) |> | |
rlang::expr_text(width = 20) |> | |
styler::style_text() | |
cat("```") | |
``` | |
### Session info | |
Cydar batch correction was built using `r R.version.string`. | |
```{r bcwc-print-options, echo=FALSE} | |
knitr::opts_chunk$set(rows.print = 1000) | |
``` | |
```{r bcwc-session-info, echo=FALSE} | |
pkgs <- c( | |
"fs", | |
"tidyverse", | |
"cytofkit2", | |
"cydar", | |
"purrr", | |
"stringr", | |
"flowCore", | |
"here", | |
"tibble", | |
"dplyr", | |
"scales", | |
"styler", | |
"rlang" | |
) | |
pkgs |> | |
sessioninfo::package_info(dependencies = FALSE) |> | |
tibble::as_tibble() |> | |
dplyr::transmute( | |
Package = package, | |
Version = ondiskversion, | |
Source = stringr::str_replace(source, "@", "\\\\@"), | |
Source = stringr::str_remove(Source, "\\\\@[a-f0-9]*") | |
) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment