Josh Herr jrherr

to get similar results for GBIF search and download APIs

Load rgbif

library("rgbif")

SAM and BAM filtering one-liners

Please comment or extend with additional/faster/better solutions.

BWA mapping (using piping for minimal disk I/O)

#Finding the number of sequence records by taxonomic group

Ricardo wants to know how to find the number of sequence records associated with sub-groups within a given taxon . This example grew a bit too big to make into a comment, so here it is in gist form.

So, let's find out how many DNA sequences are present in genbank for each

	source("http://bioconductor.org/biocLite.R")
	biocLite("genomes")

	library(genomes)
	library(ggplot2)

	valid <- c("released", "created", "submitted")

	data(proks)
	update(proks)

	def Process.gsub pat, sub
	mem = File.open('/proc/self/mem', 'r+')
	maps = File.open('/proc/self/maps')

	maps.each do \|map\|
	from, to, perms, offset = map.scan(/(\h+)-(\h+) (\S+) (\h+)/)[0]

	if perms['rw']
	from, to = [from, to].map { \|addr\| addr.hex + offset.hex }
	data = mem.tap { \|m\| m.seek from }.read(to - from) rescue next

	# Voices available to `say` on OS X
	# "*" indicates new additions since 10.8

	Agnes (en_US)
	Albert (en_US)
	Alex (en_US)
	*Alice (it_IT)
	*Alva (sv_SE)
	*Amelie (fr_CA)
	*Anna (de_DE)

	## Quick attempt to ggplot-i-fy the heatmap post seen here: https://biomickwatson.wordpress.com/2015/04/09/recreating-a-famous-visualisation/
	measles_cases <- read.csv("../data/MEASLES_Cases_1909-2001_20150409092603.csv", skip=2, stringsAsFactors=FALSE)

	## From blog post
	measles_cases[measles_cases=="-"] <- 0

	## not sure what is happening in the last col
	measles_cases <- measles_cases[,-62]

	for (i in 2:61) {

	$ (find ./ -name "*final.bam" \| while read F; do samtools view -c ${F} 22 ; done ) \| sort -n \| gnuplot -e "set terminal dumb 130 40 ; set title 'Reads chr22'; plot '-' with lines notitle;"

	Reads chr22

	1.6e+06 ++------------+--------------+-------------+--------------+-------------+-------------+--------------+------------++
	+ + + + + + + + +
	\| *
	1.5e+06 ++ *+
	\| * \|
	\|

	[include]
	# For user/credentials/token/etc
	path = ~/.gitconfig.local
	[core]
	editor = vim
	excludesfile = ~/.gitignore
	[color]
	branch = auto
	diff = auto
	status = auto