This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## rarify SNP discovery: | |
## n isolates from 1 .. total isolates | |
## x bootstraps (pick randomly without replacement) | |
## output: | |
## NumberIsolates BootStrapNumber Variants NoCalls | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import SeqIO | |
import sys | |
import random | |
rec = SeqIO.parse(open(sys.argv[1]), "fasta").next() | |
insert_size = int(sys.argv[2]) | |
rec_length = len(rec) | |
read_length = int(sys.argv[3]) | |
number_seqs = int(sys.argv[4]) | |
prefix = sys.argv[5] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import SeqIO | |
import sys | |
rec = SeqIO.parse(open(sys.argv[1]), "fasta").next() | |
SeqIO.write(rec[0:int(sys.argv[2])], sys.stdout, "fasta") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rsync -av rsync://ftp.ncbi.nlm.nih.gov/genomes/Bacteria --include "*/" --include "Bacteria/Escherichia*/*.fna" --exclude=* . | |
# bonus script - concatenate chromosomes and plasmids into single fasta file, make sure the files don't already exist | |
find . -mindepth 1 -type d | xargs -L 1 -I '{}' find {} -name "*.fna" | while read i ; do cat "$i" >> `dirname "$i"`.fasta ; done | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# a little script to run BLASTN (BLAST+) and generate an MLST profile from a database defined in 'bugs' | |
# The alleles and profile files should be located in the directory BLAST_DIRECTORY/<bugname>/ | |
# requires Biopython | |
# use like: | |
# process('Acinetobacter_baumannii', open('contigs.fa')) | |
from Bio import SeqIO | |
from Bio.Blast import NCBIXML | |
from StringIO import StringIO | |
import sys |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- lib/qiime/pycogent_backports/uclust.py 2011-06-07 21:34:23.000000000 +0100 | |
+++ ../16s/Qiime-1.2.1/build/lib/qiime/pycogent_backports/uclust.py 2011-02-22 21:00:06.000000000 +0000 | |
@@ -107,10 +107,6 @@ | |
'--optimal':FlagParameter('--',Name='optimal'), | |
'--stable_sort':FlagParameter('--',Name='stable_sort'), | |
- | |
- '--gapopen':ValuedParameter('--',Name='gapopen',Delimiter=' '), | |
- | |
- '--gapext':ValuedParameter('--',Name='gapext',Delimiter=' '), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- lib/qiime/assign_taxonomy.py 2011-09-11 20:53:06.000000000 +0100 | |
+++ ../16s/Qiime-1.2.1/build/lib/qiime/assign_taxonomy.py 2011-02-22 21:00:06.000000000 +0000 | |
@@ -89,7 +89,7 @@ | |
""" Initialize the object | |
""" | |
_params = { | |
- 'Min percent identity': 90, | |
+ 'Min percent identity': 0.90, | |
'Max E value': 1e-30, | |
'Application': 'blastn/megablast' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# requires biopython | |
# run like: | |
# genbank_to_tbl.py "my organism name" "my strain ID" "ncbi project id" < my_sequence.gbk | |
# writes seq.fsa, seq.tbl as output | |
import sys | |
from copy import copy | |
from Bio import SeqIO | |
def find_gene_entry(features, locus_tag): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import glob | |
TRUNCATE_LENGTH = int(sys.argv[2]) | |
def system(cmd): | |
print >>sys.stderr, cmd | |
os.system(cmd) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import glob | |
def system(cmd): | |
print >>sys.stderr, cmd | |
os.system(cmd) | |
# dodir_pyro.py directory |