aseetharam / NeuPIMs_v2.0.def

Created June 20, 2025 16:58

	Bootstrap: docker
	From: ubuntu:22.04 # Ubuntu-based for compatibility with older GLIBC

	%labels
	Author Geoffrey Lentner and Arun Seetharam
	Description "Apptainer image for NeuPIMs with Rocky 8, custom CMake, Conan 1.57.0"

	%environment

	export LANG=C

aseetharam / hifiasm_v0.20.0-r639.def

Created October 15, 2024 13:43

Singularity recipe for HiFiasm, a fast and accurate assembler designed for HiFi reads produced by PacBio. Building this fresh will get you the latest version

	BootStrap: docker
	From: ubuntu:20.04

	%help
	HiFiasm is a fast and accurate assembler designed for HiFi reads produced by PacBio.

	%labels
	Maintainer Arun Seetharam
	Version 0.1

aseetharam / geneBed.sh

Last active March 30, 2023 16:44 — forked from dinovski/geneBed.sh

Add padding up and/or downstream of TSS coordinates

	#!/bin/bash

	BEDTOOLS=$(dirname $(which bedtools))
	FEATURECOUNTS=$(which featureCounts)
	R=$(which R)

	IDIR=$(pwd)
	DIR=$(pwd)/GeneBed_output

	GTF=gencode.vM30.annotation.gtf

aseetharam / bedtools_cheatsheet.md

Last active January 5, 2021 16:44 — forked from raivivek/bedtools_cheatsheet.md

Bedtools cheatsheet

Bedtools Cheatsheet

General:

Tools	Description
flank	Create new intervals from the flanks of existing intervals.
slop	Adjust the size of intervals.
shift	Adjust the position of intervals.
subtract	Remove intervals based on overlaps b/w two files.

aseetharam / read_limit

Created September 29, 2017 14:18

	canu \
	-d /project/rw_genome/arun_test/20170928_canu_b \
	-p 20170928_canu_b \
	genomeSize=770m \
	maxMemory=1096g \
	maxThreads=40 \
	minReadLength=1000 \
	corOutCoverage=35 \
	gridOptionsJobName=canu_as-b \
	corMhapFilterThreshold=0.0000000002 \

aseetharam / mem_run

Created September 29, 2017 14:16

	canu \
	-d /project/rw_genome/arun_test/20170928_canu_c \
	-p 20170928_canu_c genomeSize=770m \
	maxMemory=128g \
	maxThreads=16 \
	gridOptionsJobName=canu_as1 \
	gridOptionsExecutive="--mem-per-cpu=5g --time=2:00:00" \
	gridOptionsCORMHAP="--mem-per-cpu=5g" \
	gridOptionsCORMHAP="--time=1:00:00" \
	gridOptionsOBTMHAP="--mem-per-cpu=5g --time=1:00:00" \

aseetharam / spacers_3.sh

Last active April 25, 2016 14:08

thid version

	# here is an example to write seperate files based on number of occurences of spacers:
	perl -ne 'while ($_ =~ m/GTGTTCCCCGCGCCAGCGGGGATAAACC([ATCG]{32})/g) {push(@matches, $1)}if(@matches){ print "@matches\t";undef @matches; print "\n"}' example.list \| awk 'NF==2'
	GGTAACTTGCCGGAGGGCAGCGACCAGTTTAA GATGCACAGCCTGTTGCCATTCCGCCTCCTGT
	GCAACTCGGTCGCCGCATACACTATTCTCAGA GGAAAGCCTCTTTCCTTTGTTTACGATATTGC
	GGTTTTGCGCCATTCGATGGTGTCCGGGATCT GCGGCCCACGCTGGTTTGCCCCAGCAGGCGAA
	GCGCTGATTTCTTAATGTGATCGGTAGCACGT AAAAAATTATATTGACGCGGCGAGTTATAATA
	GTGCTCCAGTGGCTTCTGTTTCTATCAGCTGT GGGTGAACACTATCCCATATCACCAGCTCACC
	GGAATATTCAGCGATTTGCCCGAGCTTGCGAG GCGTGCCGCCCCCAGCAACAATACGCTACTGA

	# see the last part (awk 'NF==2'), here you are specifying that the number of fields should be exactly 2, you can change this to 1 to-

aseetharam / spacers_2.sh

Last active April 23, 2016 13:07

	## find the spacers
	grep -one "GTGTTCCCCGCGCCAGCGGGGATAAACC.\{32\}" example.list \| \
	sed 's/:GTGTTCCCCGCGCCAGCGGGGATAAACC/\t/g' \| \
	awk '{print ">"$1"\n"$2}' > example_spacers.fa
	# find the crispr sequences, extract the 32 bases spacers adjacent to it and print them as fasta sequence
	# multiple spacers from the same sequence are printed with the same sequence id
	# to count:
	grep -c ">" example_spacers.fa
	# will give you total number of spacers
	grep ">" example_spacers.fa \|sort \|uniq \|wc -l

aseetharam / spacer.sh

Last active April 22, 2016 20:31

to extract the sequence of interest

	## OPTION 1
	# convert the gzipped fastq file to a single line sequence files
	zcat input.fastq.gz \| sed -n '2~4p' > single_line_sequences.txt
	# you are aksing it to print 2nd line followed every 4th line after that.
	perl -ne 'while ($_ =~ m/GTGTTCCCCGCGCCAGCGGGGATAAACC([ATCG]{32})/g) {print $1."\t"} {print "\n"}' single_line_sequences.txt
	# here, you are printing the 32 bases after the matching string using perl and using tab as delimiter.
	# the input is the above file you created in the first step.
	# this will generate the output for the first part.

	## OPTION 2

aseetharam / extract_seq.sh

Created September 11, 2015 01:34

	#!/bin/bash
	num=1
	while read line; do
	start=$(echo $line \|cut -f 1 -d ",");
	end=$(echo $line \|cut -f 2 -d ",");
	strand=$(echo $line \|cut -f 3 -d ",");
	if [ "$strand" = "+" ]; then
	echo -e "253771435\t$((start - 3))\t${start}\t${num}\t0\t+";
	else
	echo -e "253771435\t$((${end} - 1))\t$((${end} + 2))\t${num}\t0\t-"

Arun Seetharam aseetharam

Bedtools Cheatsheet