Skip to content

Instantly share code, notes, and snippets.

View slavailn's full-sized avatar

slava ilnytskyy slavailn

View GitHub Profile
@slavailn
slavailn / biomaRt_paralogs,R
Created May 17, 2021 05:00
Get paralogs for the list of genes using biomaRt
# Get paralogs for the list of genes
library(biomaRt)
human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
gene_id <- c("TPM1", "BOD1", "ADAP1")
results <- getBM(attributes = c("ensembl_gene_id",
"external_gene_name",
"hsapiens_paralog_ensembl_gene",
"hsapiens_paralog_associated_gene_name"),
filters = "external_gene_name",
@slavailn
slavailn / remove_carriage.sh
Created April 29, 2021 22:54
Remove carriage returns from the windows file for the use in linux
#! /bin/bash
sed -i 's/\r//g' file.txt
@slavailn
slavailn / geneSCF.sh
Last active January 12, 2025 04:35
geneSCF_analysis
#! /bin/bash
# Kegg enrichment
~/programs/geneSCF-master-source-v1.1-p2/geneSCF -m='update' -i='sig_ids.txt' -t='gid' -db='KEGG' -org='mmu' -p='yes' -bg=20000 -o='.'
# Prepare GO database
~/programs/geneSCF-master-source-v1.1-p2/prepare_database -db=GO_all -org=mgi
# Run encrichment against cellular component (CC), biological process (BP), molecular function (MF)
~/programs/geneSCF-master-source-v1.1-p2/geneSCF -m=normal -i=sig_ids.txt -o=. -t=gid -db=GO_CC -bg=20000 --plot=yes -org=mgi
@slavailn
slavailn / plot_coverage.R
Created August 9, 2020 00:31
plot gene/transcript read coverage in R
library("wiggleplotr")
library("dplyr")
library("GenomicRanges")
library("GenomicFeatures")
library("EnsDb.Mmusculus.v79")
library("ensembldb")
setwd(<working_dir>)
list.files()
@slavailn
slavailn / cluster_sidebar.R
Created July 10, 2020 00:16
How to attach a side bar with cluster labels to a heatmap (pheatmap). Just a little snippet.
library(pheatmap)
# We need to use cutree inside pheatmap()
# functio, capture the returned object into a variable,
# and than extract the tree, cut it again to the same
# number of clusters and reorder.
# Resulting vector is converted to factor and used in the annotation data frame
# to create cluster labels
x <- some_matrix # matrix to cluster and show as a heatmap
@slavailn
slavailn / gage_wikipath.R
Created June 28, 2020 00:27
Run GAGE analysis against wikipathways (modify as neccessary)
library(gage)
library(org.Hs.eg.db)
library(pathview)
library(rWikiPathways)
library(DESeq2)
## Get annotation from csv file (obtained via biomaRt)
annot <- read.table("../annotation/annotation.csv", sep = ",", header = T,
quote = "\"", fill = T, stringsAsFactors = F)
@slavailn
slavailn / download_with_geoquery.R
Created June 25, 2020 07:27
Dowload data from GEO with GEOquery (short note)
library(GEOquery)
# Get GSE
gse <- getGEO("GSE147507",GSEMatrix=FALSE)
head(Meta(gse))
# names of all the GSM objects contained in the GSE
names(GSMList(gse))
# and get the first GSM object on the list
@slavailn
slavailn / get_longest_transcript.R
Created March 2, 2020 21:46
Get longest transcript for a set of select genes and write them out as fasta
library("GenomicFeatures")
library("DESeq2")
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("BSgenome.Hsapiens.UCSC.hg19")
library("biomaRt")
setwd("<working_dir>")
# Assuming we are working with DESeq2 projects
# get all genes expressed in the project
@slavailn
slavailn / SPIAcalc.R
Created August 12, 2019 21:52
Function to calculate SPIA scores for DESeq2 results table
resFile <- "DESeq2_result_name" # has to have entrez ids as annotations
compName <- "comparison" # Name of the comparison
calcSPIA <- function(resFile, compName)
{
res <- read.table(resFile, header = T, sep = "\t", fill=T, quote = "\"", stringsAsFactors = F)
# Select differentially expressed genes (adjusted p-value < 0.05)
# This is a named vector, where names are entrez ids, and values are log2 fold changes
ids <- res[res$padj < 0.05,]$entrezgene_id
@slavailn
slavailn / collect_HISAT2_mapping_stats.sh
Created June 8, 2019 00:15
Collect HISAT2 mapping statistics from multiple summary files located in the current directory and organized them into a table
#! /bin/bash
# 22498713 reads; of these:
# 22498713 (100.00%) were unpaired; of these:
# 1754404 (7.80%) aligned 0 times
# 17667104 (78.52%) aligned exactly 1 time
# 3077205 (13.68%) aligned >1 times
# 92.20% overall alignment rate
echo -e "sampleID\ttotal_reads\tunmapped\taligned_one_time\taligned_multiple\talignment_rate"