General:
| Tools | Description |
|---|---|
| flank | Create new intervals from the flanks of existing intervals. |
| slop | Adjust the size of intervals. |
| shift | Adjust the position of intervals. |
| subtract | Remove intervals based on overlaps b/w two files. |
| """============================== | |
| Branded IPython Notebook Launcher | |
| ================================= | |
| Executing this module will create an overlay over ipython notebooks own static | |
| files and templates and overrides static files and templates and copies over all | |
| example notebooks into a temporary folder and launches the ipython notebook server. | |
| You can use this to offer an interactive tutorial for your library/framework/... |
| library(Heatplus) | |
| library(vegan) | |
| library(RColorBrewer) | |
| library("gplots") | |
| all.data <- read.csv("C:/Users/Arun Seetharam/OneDrive/PostDoc/Projects/20150303_Perera_metabolomics/bloodroot_data_v2d.csv", quote="") | |
| row.names(all.data) <- all.data$ID | |
| all.data <- all.data[, -1] | |
| data.prop <- all.data/rowSums(all.data) | |
| scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) | |
| heatmap(as.matrix(data.prop), Rowv = NA, Colv = NA, col = scaleyellowred) |
| #!/bin/bash | |
| num=1 | |
| while read line; do | |
| start=$(echo $line |cut -f 1 -d ","); | |
| end=$(echo $line |cut -f 2 -d ","); | |
| strand=$(echo $line |cut -f 3 -d ","); | |
| if [ "$strand" = "+" ]; then | |
| echo -e "253771435\t$((start - 3))\t${start}\t${num}\t0\t+"; | |
| else | |
| echo -e "253771435\t$((${end} - 1))\t$((${end} + 2))\t${num}\t0\t-" |
| ## OPTION 1 | |
| # convert the gzipped fastq file to a single line sequence files | |
| zcat input.fastq.gz | sed -n '2~4p' > single_line_sequences.txt | |
| # you are aksing it to print 2nd line followed every 4th line after that. | |
| perl -ne 'while ($_ =~ m/GTGTTCCCCGCGCCAGCGGGGATAAACC([ATCG]{32})/g) {print $1."\t"} {print "\n"}' single_line_sequences.txt | |
| # here, you are printing the 32 bases after the matching string using perl and using tab as delimiter. | |
| # the input is the above file you created in the first step. | |
| # this will generate the output for the first part. | |
| ## OPTION 2 |
| ## find the spacers | |
| grep -one "GTGTTCCCCGCGCCAGCGGGGATAAACC.\{32\}" example.list | \ | |
| sed 's/:GTGTTCCCCGCGCCAGCGGGGATAAACC/\t/g' | \ | |
| awk '{print ">"$1"\n"$2}' > example_spacers.fa | |
| # find the crispr sequences, extract the 32 bases spacers adjacent to it and print them as fasta sequence | |
| # multiple spacers from the same sequence are printed with the same sequence id | |
| # to count: | |
| grep -c ">" example_spacers.fa | |
| # will give you total number of spacers | |
| grep ">" example_spacers.fa |sort |uniq |wc -l |
| # here is an example to write seperate files based on number of occurences of spacers: | |
| perl -ne 'while ($_ =~ m/GTGTTCCCCGCGCCAGCGGGGATAAACC([ATCG]{32})/g) {push(@matches, $1)}if(@matches){ print "@matches\t";undef @matches; print "\n"}' example.list | awk 'NF==2' | |
| GGTAACTTGCCGGAGGGCAGCGACCAGTTTAA GATGCACAGCCTGTTGCCATTCCGCCTCCTGT | |
| GCAACTCGGTCGCCGCATACACTATTCTCAGA GGAAAGCCTCTTTCCTTTGTTTACGATATTGC | |
| GGTTTTGCGCCATTCGATGGTGTCCGGGATCT GCGGCCCACGCTGGTTTGCCCCAGCAGGCGAA | |
| GCGCTGATTTCTTAATGTGATCGGTAGCACGT AAAAAATTATATTGACGCGGCGAGTTATAATA | |
| GTGCTCCAGTGGCTTCTGTTTCTATCAGCTGT GGGTGAACACTATCCCATATCACCAGCTCACC | |
| GGAATATTCAGCGATTTGCCCGAGCTTGCGAG GCGTGCCGCCCCCAGCAACAATACGCTACTGA | |
| # see the last part (awk 'NF==2'), here you are specifying that the number of fields should be exactly 2, you can change this to 1 to- |
| canu \ | |
| -d /project/rw_genome/arun_test/20170928_canu_c \ | |
| -p 20170928_canu_c genomeSize=770m \ | |
| maxMemory=128g \ | |
| maxThreads=16 \ | |
| gridOptionsJobName=canu_as1 \ | |
| gridOptionsExecutive="--mem-per-cpu=5g --time=2:00:00" \ | |
| gridOptionsCORMHAP="--mem-per-cpu=5g" \ | |
| gridOptionsCORMHAP="--time=1:00:00" \ | |
| gridOptionsOBTMHAP="--mem-per-cpu=5g --time=1:00:00" \ |
| canu \ | |
| -d /project/rw_genome/arun_test/20170928_canu_b \ | |
| -p 20170928_canu_b \ | |
| genomeSize=770m \ | |
| maxMemory=1096g \ | |
| maxThreads=40 \ | |
| minReadLength=1000 \ | |
| corOutCoverage=35 \ | |
| gridOptionsJobName=canu_as-b \ | |
| corMhapFilterThreshold=0.0000000002 \ |
| #!/bin/bash | |
| BEDTOOLS=$(dirname $(which bedtools)) | |
| FEATURECOUNTS=$(which featureCounts) | |
| R=$(which R) | |
| IDIR=$(pwd) | |
| DIR=$(pwd)/GeneBed_output | |
| GTF=gencode.vM30.annotation.gtf |