Load rgbif
library("rgbif")
#!/bin/bash | |
gffs=`ls -1 | grep -E "*.gff" | sort` | |
RNA_SUFFIX=".rnaseqs.gff" | |
mkdir 16s | |
echo "Cleaning..." | |
rm *rnaseq* | |
rm *16s* | |
rm *.fai |
source("http://bioconductor.org/biocLite.R") | |
biocLite("genomes") | |
library(genomes) | |
library(ggplot2) | |
valid <- c("released", "created", "submitted") | |
data(proks) | |
update(proks) |
SAM and BAM filtering one-liners
@author: David Fredman, [email protected] (sans poly-A tail)
@dependencies: http://sourceforge.net/projects/bamtools/ and http://samtools.sourceforge.net/
Please comment or extend with additional/faster/better solutions.
BWA mapping (using piping for minimal disk I/O)
def Process.gsub pat, sub | |
mem = File.open('/proc/self/mem', 'r+') | |
maps = File.open('/proc/self/maps') | |
maps.each do |map| | |
from, to, perms, offset = map.scan(/(\h+)-(\h+) (\S+) (\h+)/)[0] | |
if perms['rw'] | |
from, to = [from, to].map { |addr| addr.hex + offset.hex } | |
data = mem.tap { |m| m.seek from }.read(to - from) rescue next |
# Voices available to `say` on OS X | |
# "*" indicates new additions since 10.8 | |
Agnes (en_US) | |
Albert (en_US) | |
Alex (en_US) | |
*Alice (it_IT) | |
*Alva (sv_SE) | |
*Amelie (fr_CA) | |
*Anna (de_DE) |
## Quick attempt to ggplot-i-fy the heatmap post seen here: https://biomickwatson.wordpress.com/2015/04/09/recreating-a-famous-visualisation/ | |
measles_cases <- read.csv("../data/MEASLES_Cases_1909-2001_20150409092603.csv", skip=2, stringsAsFactors=FALSE) | |
## From blog post | |
measles_cases[measles_cases=="-"] <- 0 | |
## not sure what is happening in the last col | |
measles_cases <- measles_cases[,-62] | |
for (i in 2:61) { |
$ (find ./ -name "*final.bam" | while read F; do samtools view -c ${F} 22 ; done ) | sort -n | gnuplot -e "set terminal dumb 130 40 ; set title 'Reads chr22'; plot '-' with lines notitle;" | |
Reads chr22 | |
1.6e+06 ++------------+--------------+-------------+--------------+-------------+-------------+--------------+------------++ | |
+ + + + + + + + + | |
| * | |
1.5e+06 ++ *+ | |
| * | | |
| |
#Finding the number of sequence records by taxonomic group
Ricardo wants to know how to find the number of sequence records associated with sub-groups within a given taxon . This example grew a bit too big to make into a comment, so here it is in gist form.
So, let's find out how many DNA sequences are present in genbank for each
[include] | |
# For user/credentials/token/etc | |
path = ~/.gitconfig.local | |
[core] | |
editor = vim | |
excludesfile = ~/.gitignore | |
[color] | |
branch = auto | |
diff = auto | |
status = auto |