Skip to content

Instantly share code, notes, and snippets.

View ATpoint's full-sized avatar

Alexander Bender ATpoint

  • Germany
  • 08:47 (UTC +02:00)
View GitHub Profile
@ATpoint
ATpoint / misc_utils.R
Last active May 5, 2025 08:51
Functio
get_rle_ranges <- function(x) {
rle_out <- rle(x)
rle_lengths <- rle_out$lengths
ends <- cumsum(rle_lengths)
starts <- c(1, head(ends, -1) + 1)
lens <- (ends - starts) + 1
data.frame(run = seq_along(rle_lengths), start = starts, end = ends, length = lens)
}
@ATpoint
ATpoint / servering.md
Created April 3, 2025 12:18
Reminders for checking files

Some reminders useful when admining a server

# List all files with that extension 1GB or larger

find . -type f -size +1G -print | grep -E "\\.lif$|\\.lifext$" > lif_files.txt

# Print combined size of all files in that file in GB
awk '
 {
@ATpoint
ATpoint / nf2sf.R
Created January 22, 2025 09:38
Convert TMM-based normalization factor and library size from edgeR to DESeq2-compatible 1-centered size factors
# Say we have a SingleCellExperiment "se"
nf <- se$norm.factors
ls <- se$lib.size
# effective library size
els <- ls * nf
# size factors centered around 1
geomeans <- exp(mean(log(els)))
sf <- els / geomeans
@ATpoint
ATpoint / retrieveREACTOME.R
Created December 19, 2024 13:00
Get REACTOME terms and associated genes independent of the REACTOME website via ReactomePA
library(tidyverse)
library(ReactomePA)
library(biomaRt)
r_human <- ReactomePA:::get_Reactome_DATA("human")
# This package maps to Entrez so we need the conversion table to Ensembl IDs
human_entrez2ensembl <- getBM(
attributes = c("entrezgene_id", "ensembl_gene_id"),
mart = mart_human
@ATpoint
ATpoint / retrieveGO.R
Created November 26, 2024 10:33
Get GO terms using annotation packages
# Retrieve GO terms in R using AnnotationDbi and org.Mm.eg.db
library(org.Mm.eg.db)
library(AnnotationDbi)
all_go <- AnnotationDbi::select(
org.Mm.eg.db,
keys = keys(org.Mm.eg.db, keytype = "ENTREZID"),
columns = c("GO", "ONTOLOGY", "ENSEMBL"),
keytype = "ENTREZID"
@ATpoint
ATpoint / retrieveKEGG.R
Last active January 24, 2025 13:46
Retrieve human-readable KEGG pathway name to Ensembl gene ID mapping table
pkgs <- c("KEGGREST", "org.Mm.eg.db", "tidyverse", "AnnotationDbi")
invisible(lapply(pkgs, function(x) suppressPackageStartupMessages(library(x, character.only = TRUE))))
pathway_id_2_name <-
keggList("pathway", "mmu") %>%
enframe(name = "pathway_id", value = "pathway_name") %>%
mutate(pathway_id = gsub("mmu", "", pathway_id),
pathway_name = gsub(" - .*", "", pathway_name))
pathway_2_entrez <-
@ATpoint
ATpoint / compare_md5.sh
Last active October 9, 2024 09:00
Compare md5sums in a file (output of md5sum) with current md5 of these files
#!/bin/bash
# Read existing md5sums and use to confirm integrity of fastq files
#SBATCH --nodes=1
#SBATCH --cpus-per-task=36
#SBATCH --partition=normal
#SBATCH --time=08:00:00
#SBATCH [email protected]
#SBATCH --job-name=md5checker
@ATpoint
ATpoint / velocity_windows.R
Created October 8, 2024 08:35
Get scvelo going on Windows using reticulate and conda
# Install R infrastructure
pkg_install <- c("reticulate", "zellkonverter")
BiocManager::install(pkg_install, update = FALSE)
# Install conda itself via the reticulate package, takes time...
reticulate::install_miniconda()
# Create an environment for velocity with a specific python version that is required
# to install a downgraded matplotlib, see below
reticulate::conda_create(envname = "velocity", python_version = "3.8.19")
geom_segment(data=metadata(sce)$grid.df,
mapping=aes(x=start.1, y=start.2, xend=end.1, yend=end.2),
size=1,
arrow=arrow(length=unit(0.1, "inches"), type="closed"), inherit.aes = FALSE)
@ATpoint
ATpoint / readLoomMatrices.R
Created January 23, 2024 08:17
Standalone function to read loom files from velocyto into R as CsparseMatrix, adapted from velocyto.R.
#' Read loom files into R as CsparseMatrix, depending on Matrix and hdf5r
#' @param path to file loom file on disk
#' @param include.ambiguous logical, whether to also read the "ambiguous" counts from velocyto, default FALSE
#'
readLoomMatrices <- function(file, include.ambiguous=FALSE) {
require(Matrix)
require(hdf5r)
engine='hdf5r'