slavailn’s gists

slavailn / biomaRt_paralogs,R

Created May 17, 2021 05:00

Get paralogs for the list of genes using biomaRt

	# Get paralogs for the list of genes

	library(biomaRt)
	human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
	gene_id <- c("TPM1", "BOD1", "ADAP1")
	results <- getBM(attributes = c("ensembl_gene_id",
	"external_gene_name",
	"hsapiens_paralog_ensembl_gene",
	"hsapiens_paralog_associated_gene_name"),
	filters = "external_gene_name",

slavailn / remove_carriage.sh

Created April 29, 2021 22:54

Remove carriage returns from the windows file for the use in linux

slavailn / geneSCF.sh

Last active January 12, 2025 04:35

geneSCF_analysis

	#! /bin/bash

	# Kegg enrichment
	~/programs/geneSCF-master-source-v1.1-p2/geneSCF -m='update' -i='sig_ids.txt' -t='gid' -db='KEGG' -org='mmu' -p='yes' -bg=20000 -o='.'

	# Prepare GO database
	~/programs/geneSCF-master-source-v1.1-p2/prepare_database -db=GO_all -org=mgi

	# Run encrichment against cellular component (CC), biological process (BP), molecular function (MF)
	~/programs/geneSCF-master-source-v1.1-p2/geneSCF -m=normal -i=sig_ids.txt -o=. -t=gid -db=GO_CC -bg=20000 --plot=yes -org=mgi

slavailn / plot_coverage.R

Created August 9, 2020 00:31

plot gene/transcript read coverage in R

slavailn / cluster_sidebar.R

Created July 10, 2020 00:16

How to attach a side bar with cluster labels to a heatmap (pheatmap). Just a little snippet.

	library(pheatmap)

	# We need to use cutree inside pheatmap()
	# functio, capture the returned object into a variable,
	# and than extract the tree, cut it again to the same
	# number of clusters and reorder.
	# Resulting vector is converted to factor and used in the annotation data frame
	# to create cluster labels

	x <- some_matrix # matrix to cluster and show as a heatmap

slavailn / gage_wikipath.R

Created June 28, 2020 00:27

Run GAGE analysis against wikipathways (modify as neccessary)

	library(gage)
	library(org.Hs.eg.db)
	library(pathview)
	library(rWikiPathways)
	library(DESeq2)

	## Get annotation from csv file (obtained via biomaRt)
	annot <- read.table("../annotation/annotation.csv", sep = ",", header = T,
	quote = "\"", fill = T, stringsAsFactors = F)

slavailn / download_with_geoquery.R

Created June 25, 2020 07:27

Dowload data from GEO with GEOquery (short note)

	library(GEOquery)

	# Get GSE
	gse <- getGEO("GSE147507",GSEMatrix=FALSE)
	head(Meta(gse))

	# names of all the GSM objects contained in the GSE
	names(GSMList(gse))

	# and get the first GSM object on the list

slavailn / get_longest_transcript.R

Created March 2, 2020 21:46

Get longest transcript for a set of select genes and write them out as fasta

	library("GenomicFeatures")
	library("DESeq2")
	library("TxDb.Hsapiens.UCSC.hg19.knownGene")
	library("BSgenome.Hsapiens.UCSC.hg19")
	library("biomaRt")

	setwd("<working_dir>")

	# Assuming we are working with DESeq2 projects
	# get all genes expressed in the project

slavailn / SPIAcalc.R

Created August 12, 2019 21:52

Function to calculate SPIA scores for DESeq2 results table

	resFile <- "DESeq2_result_name" # has to have entrez ids as annotations
	compName <- "comparison" # Name of the comparison

	calcSPIA <- function(resFile, compName)
	{
	res <- read.table(resFile, header = T, sep = "\t", fill=T, quote = "\"", stringsAsFactors = F)

	# Select differentially expressed genes (adjusted p-value < 0.05)
	# This is a named vector, where names are entrez ids, and values are log2 fold changes
	ids <- res[res$padj < 0.05,]$entrezgene_id

slavailn / collect_HISAT2_mapping_stats.sh

Created June 8, 2019 00:15

Collect HISAT2 mapping statistics from multiple summary files located in the current directory and organized them into a table

	#! /bin/bash

	# 22498713 reads; of these:
	# 22498713 (100.00%) were unpaired; of these:
	# 1754404 (7.80%) aligned 0 times
	# 17667104 (78.52%) aligned exactly 1 time
	# 3077205 (13.68%) aligned >1 times
	# 92.20% overall alignment rate

	echo -e "sampleID\ttotal_reads\tunmapped\taligned_one_time\taligned_multiple\talignment_rate"

slava ilnytskyy slavailn