Skip to content

Instantly share code, notes, and snippets.

View mbk0asis's full-sized avatar

Byungkuk Min mbk0asis

  • Korea Research Institute of Bioscience and Biotechnology (KRIBB)
  • Daejeon, S.Korea
  • 01:11 (UTC +09:00)
View GitHub Profile
@mbk0asis
mbk0asis / INDEL profiler
Created May 31, 2018 04:58
To profile the INDEL patterns in amplicon sequncing reads
printf "\nINDEL.profiler\n\n usage: ./INDEL.profiler.sh READs.fasta Amplicon.seq.fasta\n\n"
cat $1 | \
while read l; do
printf " \n"
read L
echo $l $L | sed 's/ /\n/g' | cat - $2 | muscle -quiet | fasta_formatter | \
while read c; do
printf "\n"$c"\nDEL:\n"
read d;
# counting RNA-seq reads
# similar results with htseq-count w/ "UNION"
featureCounts -T 35 -p -t exon -g rmsk_id -a L1.ERV1.ERVK.ERVL.MaLR.gtf -o featureCounts.results \
sample1.bam sample2.bam sample3.bam
source("https://bioconductor.org/biocLite.R")
biocLite("BSgenome.Mmusculus.UCSC.mm10")
setwd("~/BIO2/MBDseq_Mouse_Muscle_Tcell_JungHo")
library(BSgenome.Mmusculus.UCSC.mm10)
chrs <- names(Mmusculus)[1:21]
CGs <- lapply(chrs, function(x) start(matchPattern("CG", Mmusculus[[x]])))
# index the genome
$ bwa index hs37d5.fa
# create a genome dictionary file
$ picard-tools CreateSequenceDictionary R=genome.fa O=genome.dict
# align reads using 'bwa mem'
$ls -1 *.gz | cut -d. -f1 | sort | uniq | while read l; do bwa mem -t 40 ~/00-NGS/Annotation/HipSci/hs37d5.fa $l.1.val.1.fq.gz $l.2.val.2.fq.gz > $l.PE.sam ; done &
# sam to bam
####################################################
# #
# to report base composition at all mapped sites #
# #
####################################################
# only when NM tag is missing in BAM
$ samtools calmd BAM ref.fasta | samtools view -b - -o BAM.NMtag.bam
# bam-readcount (use "-l" for regions of interest, "-w 0" for no warning)
library(ggplot2)
library(ggpmisc)
library(ggpubr)
library(ggrepel)
library(reshape2)
setwd("~/BIO2/DAM_ID/DamID_2018/SE/mm9")
setwd("~/BIO2/DAM_ID/RNAseq_shDnmt1/mm9")
te = "LINE.5kb"
@mbk0asis
mbk0asis / TCGAbiolinks - DESeq2 pipeline
Created May 20, 2019 08:20
TCGAbiolinks - DESeq2 pipeline
BiocUpgrade()
source("http://bioconductor.org/biocLite.R")
biocLite(c("DESeq2","gplots","pcaExplorer","bovine.db","calibrate","AnnotationFuncs","gage","Rtsne","ggrepel"))
#library(TCGAbiolinks)
library(DESeq2)
library(pcaExplorer)
library(ggfortify)
library(ggplot2)
library(gplots)
#!/bin/bash
# Vatiants calling using 'bcftools mpileup - htslib'
export LC_ALL=C
printf "*** SNVcounter\n
*** 'samtools, bcftools, and bedtools' must be installed to run this script\n
*** Prepare 'reference fasta' and 'SNV list' in BED format\n
*** merge consecutive SNVs using\n
*** 'sort -k1,1 -k2,2n SNV.bed | mergeBed -c 4 -o collapse'\n
library(reshape2)
library(ggplot2)
library(ggpubr)
library(tidyverse)
library(gridExtra)
library(Biobase)
library(GEOquery)
library(limma)
library(matrixStats)