Skip to content

Instantly share code, notes, and snippets.

--- lib/qiime/pycogent_backports/uclust.py 2011-06-07 21:34:23.000000000 +0100
+++ ../16s/Qiime-1.2.1/build/lib/qiime/pycogent_backports/uclust.py 2011-02-22 21:00:06.000000000 +0000
@@ -107,10 +107,6 @@
'--optimal':FlagParameter('--',Name='optimal'),
'--stable_sort':FlagParameter('--',Name='stable_sort'),
-
- '--gapopen':ValuedParameter('--',Name='gapopen',Delimiter=' '),
-
- '--gapext':ValuedParameter('--',Name='gapext',Delimiter=' '),
@nickloman
nickloman / MLST_blast.py
Created October 10, 2012 09:13
MLST_blast.py
# a little script to run BLASTN (BLAST+) and generate an MLST profile from a database defined in 'bugs'
# The alleles and profile files should be located in the directory BLAST_DIRECTORY/<bugname>/
# requires Biopython
# use like:
# process('Acinetobacter_baumannii', open('contigs.fa'))
from Bio import SeqIO
from Bio.Blast import NCBIXML
from StringIO import StringIO
import sys
@nickloman
nickloman / rsync_ecoli.sh
Created October 29, 2012 15:14
Retrieve all Escherichia sequences from Genbank with rsync
rsync -av rsync://ftp.ncbi.nlm.nih.gov/genomes/Bacteria --include "*/" --include "Bacteria/Escherichia*/*.fna" --exclude=* .
# bonus script - concatenate chromosomes and plasmids into single fasta file, make sure the files don't already exist
find . -mindepth 1 -type d | xargs -L 1 -I '{}' find {} -name "*.fna" | while read i ; do cat "$i" >> `dirname "$i"`.fasta ; done
@nickloman
nickloman / extractseq.py
Created November 20, 2012 15:16
Extract the first X bytes from a FASTA file
from Bio import SeqIO
import sys
rec = SeqIO.parse(open(sys.argv[1]), "fasta").next()
SeqIO.write(rec[0:int(sys.argv[2])], sys.stdout, "fasta")
@nickloman
nickloman / extractpairs.py
Created November 20, 2012 15:18
Extract random pairs from a FASTA file and output fake FASTQ
from Bio import SeqIO
import sys
import random
rec = SeqIO.parse(open(sys.argv[1]), "fasta").next()
insert_size = int(sys.argv[2])
rec_length = len(rec)
read_length = int(sys.argv[3])
number_seqs = int(sys.argv[4])
prefix = sys.argv[5]
@nickloman
nickloman / rarify_from_vcf.py
Created November 27, 2012 20:13
rarify_from_vcf.py-- Subsample SNP discovery rates for subset of isolates
## rarify SNP discovery:
## n isolates from 1 .. total isolates
## x bootstraps (pick randomly without replacement)
## output:
## NumberIsolates BootStrapNumber Variants NoCalls
@nickloman
nickloman / pullblast.py
Last active December 22, 2015 20:09
pullblast.py - retrieve BLAST HSP database hits in FASTA format, returning results in same strand orientation as query
#Usage:
#blastall -p blastn -d db -i query -m 8 | python pullblast.py db.fasta
import sys
from Bio import SeqIO
records = SeqIO.to_dict(SeqIO.parse(open(sys.argv[1]), "fasta"))
for ln in sys.stdin:
cols = ln.rstrip().split("\t")
@nickloman
nickloman / gist:8031817
Last active December 31, 2015 19:09
make_blobolog_file.py
import sys
from collections import defaultdict
from itertools import izip_longest
def grouper(n, iterable, fillvalue=None):
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
contigs = defaultdict(dict)
@nickloman
nickloman / hdf5.rb
Last active August 29, 2015 14:02
hdf5.rb
require 'formula'
class Hdf5 < Formula
homepage 'http://www.hdfgroup.org/HDF5'
url 'http://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8.12/src/hdf5-1.8.12.tar.bz2'
sha1 '8414ca0e6ff7d08e423955960d641ec5f309a55f'
version '1.8.12'
# TODO - warn that these options conflict
option :universal
@nickloman
nickloman / fast5tofasta.py
Created June 9, 2014 16:01
fast5tofasta.py
import h5py
from Bio import SeqIO
from StringIO import StringIO
import sys
keys = {'template' : '/Analyses/Basecall_2D_000/BaseCalled_template/Fastq',
'complement' : '/Analyses/Basecall_2D_000/BaseCalled_complement/Fastq',
'twodirections' : '/Analyses/Basecall_2D_000/BaseCalled_2D/Fastq'}
for fn in sys.argv[1:]: