Load rgbif
library("rgbif")| #!/bin/bash | |
| gffs=`ls -1 | grep -E "*.gff" | sort` | |
| RNA_SUFFIX=".rnaseqs.gff" | |
| mkdir 16s | |
| echo "Cleaning..." | |
| rm *rnaseq* | |
| rm *16s* | |
| rm *.fai |
| source("http://bioconductor.org/biocLite.R") | |
| biocLite("genomes") | |
| library(genomes) | |
| library(ggplot2) | |
| valid <- c("released", "created", "submitted") | |
| data(proks) | |
| update(proks) |
SAM and BAM filtering one-liners
@author: David Fredman, [email protected] (sans poly-A tail)
@dependencies: http://sourceforge.net/projects/bamtools/ and http://samtools.sourceforge.net/
Please comment or extend with additional/faster/better solutions.
BWA mapping (using piping for minimal disk I/O)
| def Process.gsub pat, sub | |
| mem = File.open('/proc/self/mem', 'r+') | |
| maps = File.open('/proc/self/maps') | |
| maps.each do |map| | |
| from, to, perms, offset = map.scan(/(\h+)-(\h+) (\S+) (\h+)/)[0] | |
| if perms['rw'] | |
| from, to = [from, to].map { |addr| addr.hex + offset.hex } | |
| data = mem.tap { |m| m.seek from }.read(to - from) rescue next |
| # Voices available to `say` on OS X | |
| # "*" indicates new additions since 10.8 | |
| Agnes (en_US) | |
| Albert (en_US) | |
| Alex (en_US) | |
| *Alice (it_IT) | |
| *Alva (sv_SE) | |
| *Amelie (fr_CA) | |
| *Anna (de_DE) |
| ## Quick attempt to ggplot-i-fy the heatmap post seen here: https://biomickwatson.wordpress.com/2015/04/09/recreating-a-famous-visualisation/ | |
| measles_cases <- read.csv("../data/MEASLES_Cases_1909-2001_20150409092603.csv", skip=2, stringsAsFactors=FALSE) | |
| ## From blog post | |
| measles_cases[measles_cases=="-"] <- 0 | |
| ## not sure what is happening in the last col | |
| measles_cases <- measles_cases[,-62] | |
| for (i in 2:61) { |
| $ (find ./ -name "*final.bam" | while read F; do samtools view -c ${F} 22 ; done ) | sort -n | gnuplot -e "set terminal dumb 130 40 ; set title 'Reads chr22'; plot '-' with lines notitle;" | |
| Reads chr22 | |
| 1.6e+06 ++------------+--------------+-------------+--------------+-------------+-------------+--------------+------------++ | |
| + + + + + + + + + | |
| | * | |
| 1.5e+06 ++ *+ | |
| | * | | |
| | |
#Finding the number of sequence records by taxonomic group
Ricardo wants to know how to find the number of sequence records associated with sub-groups within a given taxon . This example grew a bit too big to make into a comment, so here it is in gist form.
So, let's find out how many DNA sequences are present in genbank for each
| [include] | |
| # For user/credentials/token/etc | |
| path = ~/.gitconfig.local | |
| [core] | |
| editor = vim | |
| excludesfile = ~/.gitignore | |
| [color] | |
| branch = auto | |
| diff = auto | |
| status = auto |