Benjamin Langmead BenLangmead

Associate Professor of Computer Science at Johns Hopkins University

BenLangmead / sra_growth.R

Last active May 7, 2025 19:01

SRA growth calc

	system('curl https://trace.ncbi.nlm.nih.gov/Traces/sra/sra_stat.cgi > /tmp/stats.csv')
	st <- read.table('/tmp/stats.csv', sep=',', header=T)
	st$date <- as.Date(st$date, format='%m/%d/%Y')
	i <- min(which(st$bases >= 0.5625e16))
	id1 <- i
	id2 <- min(which(st$bases >= 1.125e16))
	id3 <- min(which(st$bases >= 2.25e16))
	id4 <- min(which(st$bases >= 4.5e16))
	id5 <- min(which(st$bases >= 8.95e16))
	plot(st$date[id1:id5], log10(st$bases[id1:id5]), type='l', xlab="Date", ylab="log10(Total SRA bases)")

BenLangmead / sradbv2.py

Created July 4, 2018 02:10

	#!/usr/bin/env python

	# Authors: Chris Wilks (original) and Ben Langmead (modifications)
	# Date: 7/3/2018
	# License: MIT

	"""sradbv2
	Usage:
	sradbv2 search <lucene-search> [options]
	sradbv2 query [<SRP>,<SRR>]... [options]

BenLangmead / gist:f40ff161c3be517b5b4d8867479fe4ee

Created June 2, 2018 21:01

gathering human rna-seq w/ sradbv2

	> query=paste(
	+ 'sample_taxon_id:9606',
	+ 'experiment_library_strategy:"rna seq"',
	+ 'experiment_library_source:transcriptomic',
	+ 'experiment_platform:illumina')
	> st <- 0
	> SZ <- 500
	> df <- sra_full_search(q=query, start=st, size=SZ); st <- st + SZ
	https://api-omicidx.cancerdatasci.org/sra/1.0/search/full?q=sample_taxon_id%3A9606%20experiment_library_strategy%3A%22rna%20seq%22%20experiment_library_source%3Atranscriptomic%20experiment_platform%3Aillumina&start=0&size=500
	> print(paste('ncol for batch', st, '=', ncol(df)))

BenLangmead / gist:5d6340fd00f67d307efffa5330aab148

Created June 2, 2018 17:25

SRAdbV2 attempt

	R version 3.5.0 (2018-04-23) -- "Joy in Playing"
	Copyright (C) 2018 The R Foundation for Statistical Computing
	Platform: x86_64-apple-darwin15.6.0 (64-bit)

	R is free software and comes with ABSOLUTELY NO WARRANTY.
	You are welcome to redistribute it under certain conditions.
	Type 'license()' or 'licence()' for distribution details.

	Natural language support but running in an English locale

BenLangmead / by_species.R

Created November 22, 2017 19:48

sradb RNA-seq samps by species

	library('SRAdb')

	# If you have SRAmetadb already, set this directory, or setwd appropriately
	# to save yourself a large download
	sqlfile <- file.path('.', 'SRAmetadb.sqlite')
	if(!file.exists('SRAmetadb.sqlite')) sqlfile <<- getSRAdbFile()
	sra_con <- dbConnect(SQLite(),sqlfile)
	q <- function(x) { dbGetQuery(sra_con, x) }

	species_to_tax_id = list(

BenLangmead / benchmarks.R

Last active May 13, 2017 00:47

	#!/usr/bin/env Rscript

	# source('http://bioconductor.org/biocLite.R')
	# bioClite('recount')
	# bioClite('GenomicRanges')
	# bioCLite('LieberInstitute/recount.bwtool')

	library('GenomicRanges')
	library('recount')
	library('recount.bwtool')

BenLangmead / CG_Radix

Created May 8, 2014 22:12

	{
	"metadata": {
	"name": "",
	"signature": "sha256:0e059d6141a29e725dcfd5b05c7fa0fe7e20abcd0698fa34124a7c847f09627d"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [

BenLangmead / FASTQ.json

Created January 11, 2014 20:31

BenLangmead / FASTA.json

Last active January 2, 2016 12:59

BenLangmead / CG_Naive.json

Created January 4, 2014 18:42