milescsmith · July 26, 2022 22:21
diff --git a/tirosh_module_scoring.R b/tirosh_module_scoring.R
 utils::globalVariables("where")

 tirosh_score_modules <- function(
  expr_obj, # rows == genes, columns == samples
  module_list, # named list
  breaks = 25, # int
  num_ctrls = 100, # int
  parallel = FALSE # bool
 ) {

  if (parallel == TRUE){
    map_func = furrr::future_map
    imap_dfr_func = furrr::future_imap_dfr
    imap_func = furrr::future_imap
  } else {
    map_func = purrr::map
    imap_dfr_func = purrr::imap_dfr
    imap_func = purrr::imap
  }
  
  expr_obj <- expr_obj[!base::duplicated(rowMeans(expr_obj)), ]
  data_avg <- Matrix::rowMeans(x = expr_obj)
  data_avg <- data_avg[order(data_avg)]
  data_cut <-
    ggplot2::cut_number(
      x = data_avg + rnorm(n = length(data_avg)) / 1e30,
      n = num_ctrls,
      labels = FALSE,
      right = FALSE
    )

  names(x = data_cut) <- names(x = data_avg)

  ctrl_use <-
    imap_func(
      .x = module_list,
      .f = \(module_features, index){
        module_features_use <- intersect(module_features, rownames(expr_obj))
            map_func(
              .x = module_features_use,
              .f = \(feature){
                names(
                  x = sample(
                    x = data_cut[which(x = data_cut == data_cut[feature])],
                    size = num_ctrls,
                    replace = FALSE
                    )
                  )
                }) |>
              unlist() |>
              unique()
            })
  
  message("found bins")
  ctrl_scores <-
    imap_dfr_func(
      .x = ctrl_use,
      .f = \(features, index){
        expr_obj |>
          tibble::as_tibble(rownames = "gene") |>
          dplyr::filter(gene %in% features) |>
          dplyr::summarise(
            dplyr::across(
              where(is.numeric),
              mean
              )
            ) |>
          tidyr::pivot_longer(cols = everything()) |>
          tibble::deframe()
      })
  
  message("scored control bins")
  feature_scores <-
    imap_dfr_func(
      .x = module_list,
      .f = \(features, index) {
        expr_obj |>
          tibble::as_tibble(rownames = "gene") |>
          dplyr::filter(gene %in% features) |>
          dplyr::summarise(
            dplyr::across(
              where(is.numeric),
              mean
            )
          ) |>
          tidyr::pivot_longer(cols = everything()) |>
          tibble::deframe()
  })
  message("scored modules")
  module_scores <- feature_scores - ctrl_scores

  module_scores <-
    module_scores |>
    magrittr::set_rownames(names(module_list)) |>
    magrittr::set_colnames(colnames(expr_obj)) |>
    as.data.frame()

  module_scores
 }
	utils::globalVariables("where")

	tirosh_score_modules <- function(
	expr_obj, # rows == genes, columns == samples
	module_list, # named list
	breaks = 25, # int
	num_ctrls = 100, # int
	parallel = FALSE # bool
	) {

	if (parallel == TRUE){
	map_func = furrr::future_map
	imap_dfr_func = furrr::future_imap_dfr
	imap_func = furrr::future_imap
	} else {
	map_func = purrr::map
	imap_dfr_func = purrr::imap_dfr
	imap_func = purrr::imap
	}

	expr_obj <- expr_obj[!base::duplicated(rowMeans(expr_obj)), ]
	data_avg <- Matrix::rowMeans(x = expr_obj)
	data_avg <- data_avg[order(data_avg)]
	data_cut <-
	ggplot2::cut_number(
	x = data_avg + rnorm(n = length(data_avg)) / 1e30,
	n = num_ctrls,
	labels = FALSE,
	right = FALSE
	)

	names(x = data_cut) <- names(x = data_avg)

	ctrl_use <-
	imap_func(
	.x = module_list,
	.f = \(module_features, index){
	module_features_use <- intersect(module_features, rownames(expr_obj))
	map_func(
	.x = module_features_use,
	.f = \(feature){
	names(
	x = sample(
	x = data_cut[which(x = data_cut == data_cut[feature])],
	size = num_ctrls,
	replace = FALSE
	)
	)
	}) \|>
	unlist() \|>
	unique()
	})

	message("found bins")
	ctrl_scores <-
	imap_dfr_func(
	.x = ctrl_use,
	.f = \(features, index){
	expr_obj \|>
	tibble::as_tibble(rownames = "gene") \|>
	dplyr::filter(gene %in% features) \|>
	dplyr::summarise(
	dplyr::across(
	where(is.numeric),
	mean
	)
	) \|>
	tidyr::pivot_longer(cols = everything()) \|>
	tibble::deframe()
	})

	message("scored control bins")
	feature_scores <-
	imap_dfr_func(
	.x = module_list,
	.f = \(features, index) {
	expr_obj \|>
	tibble::as_tibble(rownames = "gene") \|>
	dplyr::filter(gene %in% features) \|>
	dplyr::summarise(
	dplyr::across(
	where(is.numeric),
	mean
	)
	) \|>
	tidyr::pivot_longer(cols = everything()) \|>
	tibble::deframe()
	})
	message("scored modules")
	module_scores <- feature_scores - ctrl_scores

	module_scores <-
	module_scores \|>
	magrittr::set_rownames(names(module_list)) \|>
	magrittr::set_colnames(colnames(expr_obj)) \|>
	as.data.frame()

	module_scores
	}
No results found