Some reminders useful when admining a server
# List all files with that extension 1GB or larger
find . -type f -size +1G -print | grep -E "\\.lif$|\\.lifext$" > lif_files.txt
# Print combined size of all files in that file in GB
awk '
{
get_rle_ranges <- function(x) { | |
rle_out <- rle(x) | |
rle_lengths <- rle_out$lengths | |
ends <- cumsum(rle_lengths) | |
starts <- c(1, head(ends, -1) + 1) | |
lens <- (ends - starts) + 1 | |
data.frame(run = seq_along(rle_lengths), start = starts, end = ends, length = lens) | |
} |
Some reminders useful when admining a server
# List all files with that extension 1GB or larger
find . -type f -size +1G -print | grep -E "\\.lif$|\\.lifext$" > lif_files.txt
# Print combined size of all files in that file in GB
awk '
{
# Say we have a SingleCellExperiment "se" | |
nf <- se$norm.factors | |
ls <- se$lib.size | |
# effective library size | |
els <- ls * nf | |
# size factors centered around 1 | |
geomeans <- exp(mean(log(els))) | |
sf <- els / geomeans |
library(tidyverse) | |
library(ReactomePA) | |
library(biomaRt) | |
r_human <- ReactomePA:::get_Reactome_DATA("human") | |
# This package maps to Entrez so we need the conversion table to Ensembl IDs | |
human_entrez2ensembl <- getBM( | |
attributes = c("entrezgene_id", "ensembl_gene_id"), | |
mart = mart_human |
# Retrieve GO terms in R using AnnotationDbi and org.Mm.eg.db | |
library(org.Mm.eg.db) | |
library(AnnotationDbi) | |
all_go <- AnnotationDbi::select( | |
org.Mm.eg.db, | |
keys = keys(org.Mm.eg.db, keytype = "ENTREZID"), | |
columns = c("GO", "ONTOLOGY", "ENSEMBL"), | |
keytype = "ENTREZID" |
pkgs <- c("KEGGREST", "org.Mm.eg.db", "tidyverse", "AnnotationDbi") | |
invisible(lapply(pkgs, function(x) suppressPackageStartupMessages(library(x, character.only = TRUE)))) | |
pathway_id_2_name <- | |
keggList("pathway", "mmu") %>% | |
enframe(name = "pathway_id", value = "pathway_name") %>% | |
mutate(pathway_id = gsub("mmu", "", pathway_id), | |
pathway_name = gsub(" - .*", "", pathway_name)) | |
pathway_2_entrez <- |
#!/bin/bash | |
# Read existing md5sums and use to confirm integrity of fastq files | |
#SBATCH --nodes=1 | |
#SBATCH --cpus-per-task=36 | |
#SBATCH --partition=normal | |
#SBATCH --time=08:00:00 | |
#SBATCH [email protected] | |
#SBATCH --job-name=md5checker |
# Install R infrastructure | |
pkg_install <- c("reticulate", "zellkonverter") | |
BiocManager::install(pkg_install, update = FALSE) | |
# Install conda itself via the reticulate package, takes time... | |
reticulate::install_miniconda() | |
# Create an environment for velocity with a specific python version that is required | |
# to install a downgraded matplotlib, see below | |
reticulate::conda_create(envname = "velocity", python_version = "3.8.19") |
geom_segment(data=metadata(sce)$grid.df, | |
mapping=aes(x=start.1, y=start.2, xend=end.1, yend=end.2), | |
size=1, | |
arrow=arrow(length=unit(0.1, "inches"), type="closed"), inherit.aes = FALSE) |
#' Read loom files into R as CsparseMatrix, depending on Matrix and hdf5r | |
#' @param path to file loom file on disk | |
#' @param include.ambiguous logical, whether to also read the "ambiguous" counts from velocyto, default FALSE | |
#' | |
readLoomMatrices <- function(file, include.ambiguous=FALSE) { | |
require(Matrix) | |
require(hdf5r) | |
engine='hdf5r' |