Skip to content

Instantly share code, notes, and snippets.

@nickloman
nickloman / rarify_from_vcf.py
Created November 27, 2012 20:13
rarify_from_vcf.py-- Subsample SNP discovery rates for subset of isolates
## rarify SNP discovery:
## n isolates from 1 .. total isolates
## x bootstraps (pick randomly without replacement)
## output:
## NumberIsolates BootStrapNumber Variants NoCalls
@nickloman
nickloman / extractpairs.py
Created November 20, 2012 15:18
Extract random pairs from a FASTA file and output fake FASTQ
from Bio import SeqIO
import sys
import random
rec = SeqIO.parse(open(sys.argv[1]), "fasta").next()
insert_size = int(sys.argv[2])
rec_length = len(rec)
read_length = int(sys.argv[3])
number_seqs = int(sys.argv[4])
prefix = sys.argv[5]
@nickloman
nickloman / extractseq.py
Created November 20, 2012 15:16
Extract the first X bytes from a FASTA file
from Bio import SeqIO
import sys
rec = SeqIO.parse(open(sys.argv[1]), "fasta").next()
SeqIO.write(rec[0:int(sys.argv[2])], sys.stdout, "fasta")
@nickloman
nickloman / rsync_ecoli.sh
Created October 29, 2012 15:14
Retrieve all Escherichia sequences from Genbank with rsync
rsync -av rsync://ftp.ncbi.nlm.nih.gov/genomes/Bacteria --include "*/" --include "Bacteria/Escherichia*/*.fna" --exclude=* .
# bonus script - concatenate chromosomes and plasmids into single fasta file, make sure the files don't already exist
find . -mindepth 1 -type d | xargs -L 1 -I '{}' find {} -name "*.fna" | while read i ; do cat "$i" >> `dirname "$i"`.fasta ; done
@nickloman
nickloman / MLST_blast.py
Created October 10, 2012 09:13
MLST_blast.py
# a little script to run BLASTN (BLAST+) and generate an MLST profile from a database defined in 'bugs'
# The alleles and profile files should be located in the directory BLAST_DIRECTORY/<bugname>/
# requires Biopython
# use like:
# process('Acinetobacter_baumannii', open('contigs.fa'))
from Bio import SeqIO
from Bio.Blast import NCBIXML
from StringIO import StringIO
import sys
--- lib/qiime/pycogent_backports/uclust.py 2011-06-07 21:34:23.000000000 +0100
+++ ../16s/Qiime-1.2.1/build/lib/qiime/pycogent_backports/uclust.py 2011-02-22 21:00:06.000000000 +0000
@@ -107,10 +107,6 @@
'--optimal':FlagParameter('--',Name='optimal'),
'--stable_sort':FlagParameter('--',Name='stable_sort'),
-
- '--gapopen':ValuedParameter('--',Name='gapopen',Delimiter=' '),
-
- '--gapext':ValuedParameter('--',Name='gapext',Delimiter=' '),
--- lib/qiime/assign_taxonomy.py 2011-09-11 20:53:06.000000000 +0100
+++ ../16s/Qiime-1.2.1/build/lib/qiime/assign_taxonomy.py 2011-02-22 21:00:06.000000000 +0000
@@ -89,7 +89,7 @@
""" Initialize the object
"""
_params = {
- 'Min percent identity': 90,
+ 'Min percent identity': 0.90,
'Max E value': 1e-30,
'Application': 'blastn/megablast'
@nickloman
nickloman / genbank_to_tbl.py
Created May 11, 2012 16:10
genbank_to_tbl.py
# requires biopython
# run like:
# genbank_to_tbl.py "my organism name" "my strain ID" "ncbi project id" < my_sequence.gbk
# writes seq.fsa, seq.tbl as output
import sys
from copy import copy
from Bio import SeqIO
def find_gene_entry(features, locus_tag):
@nickloman
nickloman / dodir_seqnoisem.py
Created November 18, 2011 14:26
dodir_seqnoisem.py
import os
import sys
import glob
TRUNCATE_LENGTH = int(sys.argv[2])
def system(cmd):
print >>sys.stderr, cmd
os.system(cmd)
@nickloman
nickloman / gist:1376578
Created November 18, 2011 14:25
dodir_pyrom.py
import os
import sys
import glob
def system(cmd):
print >>sys.stderr, cmd
os.system(cmd)
# dodir_pyro.py directory