Some reminders useful when admining a server
# List all files with that extension 1GB or larger
find . -type f -size +1G -print | grep -E "\\.lif$|\\.lifext$" > lif_files.txt
# Print combined size of all files in that file in GB
awk '
{| get_rle_ranges <- function(x) { | |
| rle_out <- rle(x) | |
| rle_lengths <- rle_out$lengths | |
| ends <- cumsum(rle_lengths) | |
| starts <- c(1, head(ends, -1) + 1) | |
| lens <- (ends - starts) + 1 | |
| data.frame(run = seq_along(rle_lengths), start = starts, end = ends, length = lens) | |
| } |
Some reminders useful when admining a server
# List all files with that extension 1GB or larger
find . -type f -size +1G -print | grep -E "\\.lif$|\\.lifext$" > lif_files.txt
# Print combined size of all files in that file in GB
awk '
{| # Say we have a SingleCellExperiment "se" | |
| nf <- se$norm.factors | |
| ls <- se$lib.size | |
| # effective library size | |
| els <- ls * nf | |
| # size factors centered around 1 | |
| geomeans <- exp(mean(log(els))) | |
| sf <- els / geomeans |
| library(tidyverse) | |
| library(ReactomePA) | |
| library(biomaRt) | |
| r_human <- ReactomePA:::get_Reactome_DATA("human") | |
| # This package maps to Entrez so we need the conversion table to Ensembl IDs | |
| human_entrez2ensembl <- getBM( | |
| attributes = c("entrezgene_id", "ensembl_gene_id"), | |
| mart = mart_human |
| # Retrieve GO terms in R using AnnotationDbi and org.Mm.eg.db | |
| library(org.Mm.eg.db) | |
| library(AnnotationDbi) | |
| all_go <- AnnotationDbi::select( | |
| org.Mm.eg.db, | |
| keys = keys(org.Mm.eg.db, keytype = "ENTREZID"), | |
| columns = c("GO", "ONTOLOGY", "ENSEMBL"), | |
| keytype = "ENTREZID" |
| pkgs <- c("KEGGREST", "org.Mm.eg.db", "tidyverse", "AnnotationDbi") | |
| invisible(lapply(pkgs, function(x) suppressPackageStartupMessages(library(x, character.only = TRUE)))) | |
| pathway_id_2_name <- | |
| keggList("pathway", "mmu") %>% | |
| enframe(name = "pathway_id", value = "pathway_name") %>% | |
| mutate(pathway_id = gsub("mmu", "", pathway_id), | |
| pathway_name = gsub(" - .*", "", pathway_name)) | |
| pathway_2_entrez <- |
| #!/bin/bash | |
| # Read existing md5sums and use to confirm integrity of fastq files | |
| #SBATCH --nodes=1 | |
| #SBATCH --cpus-per-task=36 | |
| #SBATCH --partition=normal | |
| #SBATCH --time=08:00:00 | |
| #SBATCH [email protected] | |
| #SBATCH --job-name=md5checker |
| # Install R infrastructure | |
| pkg_install <- c("reticulate", "zellkonverter") | |
| BiocManager::install(pkg_install, update = FALSE) | |
| # Install conda itself via the reticulate package, takes time... | |
| reticulate::install_miniconda() | |
| # Create an environment for velocity with a specific python version that is required | |
| # to install a downgraded matplotlib, see below | |
| reticulate::conda_create(envname = "velocity", python_version = "3.8.19") |
| geom_segment(data=metadata(sce)$grid.df, | |
| mapping=aes(x=start.1, y=start.2, xend=end.1, yend=end.2), | |
| size=1, | |
| arrow=arrow(length=unit(0.1, "inches"), type="closed"), inherit.aes = FALSE) |
| #' Read loom files into R as CsparseMatrix, depending on Matrix and hdf5r | |
| #' @param path to file loom file on disk | |
| #' @param include.ambiguous logical, whether to also read the "ambiguous" counts from velocyto, default FALSE | |
| #' | |
| readLoomMatrices <- function(file, include.ambiguous=FALSE) { | |
| require(Matrix) | |
| require(hdf5r) | |
| engine='hdf5r' |