genomewalker · February 13, 2018 21:14
diff --git a/README.md b/README.md
diff --git a/gb2ptt.pl b/gb2ptt.pl
 #!/usr/bin/env perl 
 #===============================================================================
 #
 #         FILE:  gb2ptt.pl
 #
 #        USAGE:  ./gb2ptt.pl  
 #
 #  DESCRIPTION:  Script to convert a genbank flat file to ptt
 #
 #      OPTIONS:  ---
 # REQUIREMENTS:  ---
 #         BUGS:  ---
 #        NOTES:  ---
 #       AUTHOR:  Dr. Scott A. Givan (sag), [email protected]
 #      COMPANY:  University of Missouri, USA
 #      VERSION:  1.0
 #      CREATED:  05/04/15 15:23:32
 #     REVISION:  ---
 #===============================================================================

 use 5.010;      # Require at least Perl version 5.10
 use autodie;
 use Getopt::Long; # use GetOptions function to for CL args
 use warnings;
 use strict;
 use Bio::SeqIO;

 my ($debug,$verbose,$help,$infile);

 my $result = GetOptions(
    "debug"     =>  \$debug,
    "verbose"   =>  \$verbose,
    "help"      =>  \$help,
    "infile:s"  =>  \$infile,
 );

 if ($help) {
    help();
    exit(0);
 }

 sub help {

 say <<HELP;

 --debug
 --verbose
 --help
 --infile

 HELP

 }

 $infile = 'test' unless ($infile);

 my $seqio = Bio::SeqIO->new(
                                -file   =>  $infile,
                                -format =>  'genbank',
                            );

 open(my $PTT, ">", $infile . ".ptt");
 open(my $RNT, ">", $infile . ".rnt");

 my %tags = ();
 while (my $seq = $seqio->next_seq()) {
    my $header1 = $seq->desc() || 'unknown' . " - 1.." . $seq->length();

    if ($debug) {
        say "\$seq isa '" . ref($seq) . "'" if ($debug);
        say $seq->desc() . " - 1.." . $seq->length();
        say "CDS: " . scalar($seq->get_SeqFeatures('CDS'));
        exit();
    }

    my @CDS = $seq->get_SeqFeatures('CDS');
    my @RNA = $seq->get_SeqFeatures('tRNA');
    push(@RNA,$seq->get_SeqFeatures('rRNA'));

    my $featcnt = 0;
    
    say $PTT $header1;
    say $PTT scalar(@CDS) . " proteins";
    say $RNT $header1;
    say $RNT scalar(@RNA) . " RNAs";
    say $PTT "Location\tStrand\tLength\tPID\tGene\tSynonym\tCode\tCOG\tProduct";
    say $RNT "Location\tStrand\tLength\tPID\tGene\tSynonym\tCode\tCOG\tProduct";

    for my $feature (@CDS) {

        next if $feature->has_tag('pseudo');

        my $tag = $feature->primary_tag();
        ++$tags{$tag};


        last if ($debug && ++$featcnt > 10);

        my $start = $feature->start();
        my $stop = $feature->end();
        my $strand = $feature->strand();
        my $length = $feature->length();
        my @pid = $feature->has_tag('protein_id') ? $feature->get_tag_values('protein_id') : $feature->get_tag_values('locus_tag');
        my @gene = $feature->has_tag('gene') ? $feature->get_tag_values('gene') : '-';
        my @synonym = $feature->get_tag_values('locus_tag');
        my $code = '-';
        my $cog = '-';
        my @description = $feature->get_tag_values('product');

        if ($strand > 0) {
            $strand = '+';
        } elsif ($strand < 0) {
            $strand = '-';
        }

 #            my @misc = $feature->primary_tag('misc_feature');
 #
 #            for my $misc_feature (@misc) {
 #                say "\$misc_feature isa '" . ref($misc_feature) . "'";
 #                if ($misc_feature->has_tag('note')) {
 #                    my @notes = $misc_feature->get_tag_values('note');
 #                    for my $note (@notes) {
 #                        if ($note =~ /(COG\d\d\d\d)/) {
 #                            $cog = $1;
 #                        }
 #                    }
 #                }
 #            }

        say $PTT $start . ".." . "$stop\t$strand\t$length\t$pid[0]\t$gene[0]\t$synonym[0]\t$code\t$cog\t$description[0]";
    }

    #foreach my $feature (sort { $a->start() <=> $b->start() } (@tRNA, @rRNA)) {
    for my $feature (sort { $a->start() <=> $b->start() } @RNA) {

            my $start = $feature->start();
            my $stop = $feature->end();
            my $strand = '+';
            my $length = $feature->length();
            my $pid = $feature->has_tag('db_xref') ? ($feature->get_tag_values('db_xref'))[0] : ($feature->get_tag_values('locus_tag'))[0];
            my @gene = $feature->has_tag('gene') ? $feature->get_tag_values('gene') : '-';
            my @synonym = $feature->get_tag_values('locus_tag');
            my $code = '-';
            my $cog = '-';
            my @description = $feature->get_tag_values('product');

            say $RNT $start . ".." . "$stop\t$strand\t$length\t$pid\t$gene[0]\t$synonym[0]\t$code\t$cog\t$description[0]";
        }
 }
diff --git a/geneplot.r b/geneplot.r
 library(ggplot2)
 library(genoPlotR)

 setwd("~/Downloads/gb_files_geno/")
 labelS <- 10L
 temp <-list.files(pattern="*genoplot.tsv")


 gbknames<- lapply(as.list(temp),function(x){strsplit(x = gsub(pattern = "[.]",replacement = " ",x = x),split = c(" "))[[1]][1]})

 df <- lapply(temp, read_tsv, col_names = FALSE)
 df <- lapply(df,function(x){colnames(x)<-c("name", "start",  "end" ,"strand"  ,"col" ,"lty" ,"lwd" ,"pch" ,"cex", "gene_type");x})
 df <- lapply(df,function(x){dna_seg(x)})
 names(df) <- gbknames



 annot<-lapply(df,function(x){annot<-annotation(x1=x$start+10,
                                               x2=x$end-10,
                                               text=x$name,
                                               rot=replicate(nrow(x),35));
 annot$text<-paste(substr(x$name,start = 0,stop = labelS),"...")
 annot})



 uniqnames<-unique(do.call(rbind.data.frame, df)["name"])
 uniqnames<-sort(uniqnames[,1])
 color <- grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)]
 colors<-sample(color, length(uniqnames))
 df2color<-data.frame(as.matrix(uniqnames),as.matrix(colors))
 df2color<-t(df2color)
 colnames(df2color)<-df2color[1,]
 df2color<- df2color[-1,]
 df<-lapply(df,function(x){x["col"]<-"black";x})
 df<-lapply(df,function(x){x["fill"]<-df2color[x$name];x})
 uniqnames<-gsub(pattern = "_",x = as.matrix(uniqnames),replacement = " ")
 uniqnames<-gsub(pattern = "[.]",x = as.matrix(uniqnames),replacement = ",")

 plot(c(0,1000), c(0,1000), type="n", axes=FALSE, xlab="", ylab="")
 legend("center", legend = c(as.matrix(uniqnames)),ncol = 1,xpd = NA, cex = 0.8,
       bty="n",fill=c(as.matrix(colors)),border = c("white"),title = "Genes")
 plot_gene_map(dna_segs = df, dna_seg_label_cex = 0.4, fixed_gap_length = TRUE, annotations = annot, annotation_cex = 0.4)
diff --git a/slicer.py b/slicer.py
 #!/usr/bin/python

 # This script is designed to take a genbank file and 'slice out'/'subset'
 # regions (genes/operons etc.) and produce a separate file. This can be
 # done explicitly by telling the script which base sites to use, or can
 # 'decide' for itself by blasting a fasta of the sequence you're inter-
 # ed in against the Genbank you want to slice a record out of.

 # Note, the script (obviously) does not preseve the index number of the
 # bases from the original

 # Based upon the tutorial at:
 # http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc44

 # This script depends on BLAST and having the makeblastdb functionality
 # available if BLAST_MODE is active. It also depends on Biopython.

 # Set up and handle arguments:

 import warnings
 import sys
 import subprocess
 import os
 import time
 from Bio import SeqIO

 __author__ = "Joe R. J. Healey"
 __version__ = "1.1.0"
 __title__ = "Genbank_slicer"
 __license__ = "GPLv3"
 __author_email__ = "[email protected]"

 # Import SearchIO and suppress experimental warning
 from Bio import BiopythonExperimentalWarning
 with warnings.catch_warnings():
 	warnings.simplefilter('ignore', BiopythonExperimentalWarning)
 	from Bio import SearchIO


 def convert(basename, genbank):
 	'''Convert the provided genbank to a fasta to BLAST.'''

 	refFasta = "{}.fasta.tmp".format(basename)
 	SeqIO.convert(genbank, 'genbank', refFasta, 'fasta')

 	return refFasta

 def runBlast(basename, refFasta, fasta, verbose):
 	'''Synthesise BLAST commands and make system calls'''

 	resultHandle = "{}.blastout.tmp".format(basename)
 	blastdb_cmd = 'makeblastdb -in {0} -dbtype nucl -title temp_blastdb'.format(refFasta)
 	blastn_cmd = 'blastn -query {0} -strand both -task blastn -db {1} -perc_identity 100 -outfmt 6 -out {2} -max_target_seqs 1'.format(fasta, refFasta, resultHandle)

 	print("Constructing BLAST Database: " + '\n' + blastdb_cmd)
 	print("BLASTing: " + '\n' + blastn_cmd)
 	DB_process = subprocess.Popen(blastdb_cmd,
 				      shell=True,
 				      stdin=subprocess.PIPE,
 				      stdout=subprocess.PIPE,
 				      stderr=subprocess.PIPE)
 	DB_process.wait()
 	BLAST_process = subprocess.Popen(blastn_cmd,
 					 shell=True,
 					 stdin=subprocess.PIPE,
 					 stdout=subprocess.PIPE,
 					 stderr=subprocess.PIPE)
 	BLAST_process.wait()

 	return resultHandle

 def getIndices(resultHandle):
 	'''If not provided directly by the user, this function retrieves the best BLAST hit's indices.'''

 	blast_result = SearchIO.read(resultHandle, 'blast-tab')

 	print(blast_result[0][0])
 	start = blast_result[0][0].hit_start
 	end = blast_result[0][0].hit_end

 	return start, end

 def slice(start, end, genbank, FPoffset, TPoffset):
 	'''Subset the provided genbank to return the sub record.'''

 	seqObj = SeqIO.read(genbank, 'genbank')
 	subRecord = seqObj[start-FPoffset:end+TPoffset]

 	return subRecord


 def main():
 ###################################################################################################
 # Parse command line arguments
 	import argparse
 	try:
   		parser = argparse.ArgumentParser(
   			description='This script slices entries such as genes or operons out of a genbank, subsetting them as their own file.')
 		parser.add_argument(
 			'-g',
 			'--genbank',
 			action='store',
 			required=True,
 			help='The genbank file you wish to subset.')
 		parser.add_argument(
 			'-o',
 			'--outfile',
 			action='store',
 			help='If specifed, the script will write a file, otherwise redirect STDOUT for pipelines.')
 		parser.add_argument(
 			'-s',
 			'--start',
 			type=int,
 			help='Integer base index to slice from.')
 		parser.add_argument(
 			'-e',
 			'--end',
 			type=int,
 			default=0,
 			help='Integer base index to slice to.')
 		parser.add_argument(
 			'-F',
 			'--FPoffset',
 			action='store',
 			type=int,
 			default=0,
 			help='If you want to capture regions around the target site, specify the 5\' offset.')
 		parser.add_argument(
 			'-T',
 			'--TPoffset',
 			action='store',
 			type=int,
 			default=0,
 			help='If you want to capture regions around the target site, specify the 3\' offset.')
 		parser.add_argument(
 			'-b',
 			'--blastmode',
 			action='store_true',
 			help='If flag is set, provide an input fasta (-f | --fasta), and BLAST will be called to find your sequence indices in the original genbank.')
 		parser.add_argument(
 			'-f',
 			'--fasta',
 			action='store',
 			help='The operon fasta to pull annotations from the provided genbank.')
 		parser.add_argument(
 			'-t',
 			'--tidy',
 			action='store_true',
 			help='Tell the script whether or not to remove the temporary files it generated during processing. Off by default. WARNING: removes files based on the "tmp" string.')
 		parser.add_argument(
 			'-m',
 			'--meta',
 			action='store',
 			default=None,
 			help='Specify a string to use as the Genbank meta-data fields if the parent one doesn\'t contain anything. Else inherits from parent sequence.')
 		parser.add_argument(
 			'-v',
 			'--verbose',
 			action='store_true',
 			help='Verbose behaviour. Shows progress of BLAST etc. if appropriate.')


 		args = parser.parse_args()

 	except NameError:
 		print "An exception occured with argument parsing. Check your provided options."

 	genbank   =  args.genbank
 	fasta     =  args.fasta
 	split     =  os.path.splitext(args.genbank)
 	basename  =  os.path.basename(split[0])
 	start     =  args.start
 	end       =  args.end
 	FPoffset  =  args.FPoffset
 	TPoffset  =  args.TPoffset
 	blastMode =  args.blastmode
 	outfile   =  args.outfile
 	fasta     =  args.fasta
 	tidy      =  args.tidy
 	meta	  =  args.meta
 	verbose   =  args.verbose

 # Main code:
 	if blastMode is not False and fasta is not None:
 		refFasta = convert(basename,genbank)
 		resultHandle = runBlast(basename, refFasta, fasta, verbose)
 		start, end = getIndices(resultHandle)
 	else:
 		if fasta is None:
 			print("No fasta was provided so BLAST mode cannot be used.")

 	if start is None or end is None:
        	print('No slice indices have been specified or retrieved from blastout')
                sys.exit(1)


 	subRecord = slice(start, end, genbank, FPoffset, TPoffset)
 	# Populate the metadata of the genbank
 	if meta is not None:
 		subRecord.id = meta
 		subRecord.locus = meta + 'subregion'
 		subRecord.accession = meta
 		subRecord.name = meta
 		subRecord.annotations["date"] = time.strftime("%d-%b-%Y").upper()
 		# Other field options include:
 		#  subRecord.annotations["source"]
 		#			["taxonomy"]
 		#			["keywords"]
 		#			["references"]
 		#			["accessions"]
 		#			["data_file_division"] e.g. BCT, PLN, UNK
 		#			["organism"]
 		#			["topology"]

 	if outfile is not None:
 		SeqIO.write(subRecord, outfile, "genbank")
 	else:
 		print(subRecord.format('genbank'))

 	if tidy is True:
 		if verbose is True:
 			rm="rm -v ./*tmp*"
 		else:
 			rm="rm ./*tmp*"

 		subprocess.Popen(rm,shell=True)

 if __name__ == "__main__":
 	main()
	#!/usr/bin/env perl
	#===============================================================================
	#
	# FILE: gb2ptt.pl
	#
	# USAGE: ./gb2ptt.pl
	#
	# DESCRIPTION: Script to convert a genbank flat file to ptt
	#
	# OPTIONS: ---
	# REQUIREMENTS: ---
	# BUGS: ---
	# NOTES: ---
	# AUTHOR: Dr. Scott A. Givan (sag), [email protected]
	# COMPANY: University of Missouri, USA
	# VERSION: 1.0
	# CREATED: 05/04/15 15:23:32
	# REVISION: ---
	#===============================================================================

	use 5.010; # Require at least Perl version 5.10
	use autodie;
	use Getopt::Long; # use GetOptions function to for CL args
	use warnings;
	use strict;
	use Bio::SeqIO;

	my ($debug,$verbose,$help,$infile);

	my $result = GetOptions(
	"debug" => \$debug,
	"verbose" => \$verbose,
	"help" => \$help,
	"infile:s" => \$infile,
	);

	if ($help) {
	help();
	exit(0);
	}

	sub help {

	say <<HELP;

	--debug
	--verbose
	--help
	--infile

	HELP

	}

	$infile = 'test' unless ($infile);

	my $seqio = Bio::SeqIO->new(
	-file => $infile,
	-format => 'genbank',
	);

	open(my $PTT, ">", $infile . ".ptt");
	open(my $RNT, ">", $infile . ".rnt");

	my %tags = ();
	while (my $seq = $seqio->next_seq()) {
	my $header1 = $seq->desc() \|\| 'unknown' . " - 1.." . $seq->length();

	if ($debug) {
	say "\$seq isa '" . ref($seq) . "'" if ($debug);
	say $seq->desc() . " - 1.." . $seq->length();
	say "CDS: " . scalar($seq->get_SeqFeatures('CDS'));
	exit();
	}

	my @CDS = $seq->get_SeqFeatures('CDS');
	my @RNA = $seq->get_SeqFeatures('tRNA');
	push(@RNA,$seq->get_SeqFeatures('rRNA'));

	my $featcnt = 0;

	say $PTT $header1;
	say $PTT scalar(@CDS) . " proteins";
	say $RNT $header1;
	say $RNT scalar(@RNA) . " RNAs";
	say $PTT "Location\tStrand\tLength\tPID\tGene\tSynonym\tCode\tCOG\tProduct";
	say $RNT "Location\tStrand\tLength\tPID\tGene\tSynonym\tCode\tCOG\tProduct";

	for my $feature (@CDS) {

	next if $feature->has_tag('pseudo');

	my $tag = $feature->primary_tag();
	++$tags{$tag};


	last if ($debug && ++$featcnt > 10);

	my $start = $feature->start();
	my $stop = $feature->end();
	my $strand = $feature->strand();
	my $length = $feature->length();
	my @pid = $feature->has_tag('protein_id') ? $feature->get_tag_values('protein_id') : $feature->get_tag_values('locus_tag');
	my @gene = $feature->has_tag('gene') ? $feature->get_tag_values('gene') : '-';
	my @synonym = $feature->get_tag_values('locus_tag');
	my $code = '-';
	my $cog = '-';
	my @description = $feature->get_tag_values('product');

	if ($strand > 0) {
	$strand = '+';
	} elsif ($strand < 0) {
	$strand = '-';
	}

	# my @misc = $feature->primary_tag('misc_feature');
	#
	# for my $misc_feature (@misc) {
	# say "\$misc_feature isa '" . ref($misc_feature) . "'";
	# if ($misc_feature->has_tag('note')) {
	# my @notes = $misc_feature->get_tag_values('note');
	# for my $note (@notes) {
	# if ($note =~ /(COG\d\d\d\d)/) {
	# $cog = $1;
	# }
	# }
	# }
	# }

	say $PTT $start . ".." . "$stop\t$strand\t$length\t$pid[0]\t$gene[0]\t$synonym[0]\t$code\t$cog\t$description[0]";
	}

	#foreach my $feature (sort { $a->start() <=> $b->start() } (@tRNA, @rRNA)) {
	for my $feature (sort { $a->start() <=> $b->start() } @RNA) {

	my $start = $feature->start();
	my $stop = $feature->end();
	my $strand = '+';
	my $length = $feature->length();
	my $pid = $feature->has_tag('db_xref') ? ($feature->get_tag_values('db_xref'))[0] : ($feature->get_tag_values('locus_tag'))[0];
	my @gene = $feature->has_tag('gene') ? $feature->get_tag_values('gene') : '-';
	my @synonym = $feature->get_tag_values('locus_tag');
	my $code = '-';
	my $cog = '-';
	my @description = $feature->get_tag_values('product');

	say $RNT $start . ".." . "$stop\t$strand\t$length\t$pid\t$gene[0]\t$synonym[0]\t$code\t$cog\t$description[0]";
	}
	}
	library(ggplot2)
	library(genoPlotR)

	setwd("~/Downloads/gb_files_geno/")
	labelS <- 10L
	temp <-list.files(pattern="*genoplot.tsv")


	gbknames<- lapply(as.list(temp),function(x){strsplit(x = gsub(pattern = "[.]",replacement = " ",x = x),split = c(" "))[[1]][1]})

	df <- lapply(temp, read_tsv, col_names = FALSE)
	df <- lapply(df,function(x){colnames(x)<-c("name", "start", "end" ,"strand" ,"col" ,"lty" ,"lwd" ,"pch" ,"cex", "gene_type");x})
	df <- lapply(df,function(x){dna_seg(x)})
	names(df) <- gbknames



	annot<-lapply(df,function(x){annot<-annotation(x1=x$start+10,
	x2=x$end-10,
	text=x$name,
	rot=replicate(nrow(x),35));
	annot$text<-paste(substr(x$name,start = 0,stop = labelS),"...")
	annot})



	uniqnames<-unique(do.call(rbind.data.frame, df)["name"])
	uniqnames<-sort(uniqnames[,1])
	color <- grDevices::colors()[grep('gr(a\|e)y', grDevices::colors(), invert = T)]
	colors<-sample(color, length(uniqnames))
	df2color<-data.frame(as.matrix(uniqnames),as.matrix(colors))
	df2color<-t(df2color)
	colnames(df2color)<-df2color[1,]
	df2color<- df2color[-1,]
	df<-lapply(df,function(x){x["col"]<-"black";x})
	df<-lapply(df,function(x){x["fill"]<-df2color[x$name];x})
	uniqnames<-gsub(pattern = "_",x = as.matrix(uniqnames),replacement = " ")
	uniqnames<-gsub(pattern = "[.]",x = as.matrix(uniqnames),replacement = ",")

	plot(c(0,1000), c(0,1000), type="n", axes=FALSE, xlab="", ylab="")
	legend("center", legend = c(as.matrix(uniqnames)),ncol = 1,xpd = NA, cex = 0.8,
	bty="n",fill=c(as.matrix(colors)),border = c("white"),title = "Genes")
	plot_gene_map(dna_segs = df, dna_seg_label_cex = 0.4, fixed_gap_length = TRUE, annotations = annot, annotation_cex = 0.4)
	#!/usr/bin/python

	# This script is designed to take a genbank file and 'slice out'/'subset'
	# regions (genes/operons etc.) and produce a separate file. This can be
	# done explicitly by telling the script which base sites to use, or can
	# 'decide' for itself by blasting a fasta of the sequence you're inter-
	# ed in against the Genbank you want to slice a record out of.

	# Note, the script (obviously) does not preseve the index number of the
	# bases from the original

	# Based upon the tutorial at:
	# http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc44

	# This script depends on BLAST and having the makeblastdb functionality
	# available if BLAST_MODE is active. It also depends on Biopython.

	# Set up and handle arguments:

	import warnings
	import sys
	import subprocess
	import os
	import time
	from Bio import SeqIO

	__author__ = "Joe R. J. Healey"
	__version__ = "1.1.0"
	__title__ = "Genbank_slicer"
	__license__ = "GPLv3"
	__author_email__ = "[email protected]"

	# Import SearchIO and suppress experimental warning
	from Bio import BiopythonExperimentalWarning
	with warnings.catch_warnings():
	warnings.simplefilter('ignore', BiopythonExperimentalWarning)
	from Bio import SearchIO


	def convert(basename, genbank):
	'''Convert the provided genbank to a fasta to BLAST.'''

	refFasta = "{}.fasta.tmp".format(basename)
	SeqIO.convert(genbank, 'genbank', refFasta, 'fasta')

	return refFasta

	def runBlast(basename, refFasta, fasta, verbose):
	'''Synthesise BLAST commands and make system calls'''

	resultHandle = "{}.blastout.tmp".format(basename)
	blastdb_cmd = 'makeblastdb -in {0} -dbtype nucl -title temp_blastdb'.format(refFasta)
	blastn_cmd = 'blastn -query {0} -strand both -task blastn -db {1} -perc_identity 100 -outfmt 6 -out {2} -max_target_seqs 1'.format(fasta, refFasta, resultHandle)

	print("Constructing BLAST Database: " + '\n' + blastdb_cmd)
	print("BLASTing: " + '\n' + blastn_cmd)
	DB_process = subprocess.Popen(blastdb_cmd,
	shell=True,
	stdin=subprocess.PIPE,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE)
	DB_process.wait()
	BLAST_process = subprocess.Popen(blastn_cmd,
	shell=True,
	stdin=subprocess.PIPE,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE)
	BLAST_process.wait()

	return resultHandle

	def getIndices(resultHandle):
	'''If not provided directly by the user, this function retrieves the best BLAST hit's indices.'''

	blast_result = SearchIO.read(resultHandle, 'blast-tab')

	print(blast_result[0][0])
	start = blast_result[0][0].hit_start
	end = blast_result[0][0].hit_end

	return start, end

	def slice(start, end, genbank, FPoffset, TPoffset):
	'''Subset the provided genbank to return the sub record.'''

	seqObj = SeqIO.read(genbank, 'genbank')
	subRecord = seqObj[start-FPoffset:end+TPoffset]

	return subRecord


	def main():
	###################################################################################################
	# Parse command line arguments
	import argparse
	try:
	parser = argparse.ArgumentParser(
	description='This script slices entries such as genes or operons out of a genbank, subsetting them as their own file.')
	parser.add_argument(
	'-g',
	'--genbank',
	action='store',
	required=True,
	help='The genbank file you wish to subset.')
	parser.add_argument(
	'-o',
	'--outfile',
	action='store',
	help='If specifed, the script will write a file, otherwise redirect STDOUT for pipelines.')
	parser.add_argument(
	'-s',
	'--start',
	type=int,
	help='Integer base index to slice from.')
	parser.add_argument(
	'-e',
	'--end',
	type=int,
	default=0,
	help='Integer base index to slice to.')
	parser.add_argument(
	'-F',
	'--FPoffset',
	action='store',
	type=int,
	default=0,
	help='If you want to capture regions around the target site, specify the 5\' offset.')
	parser.add_argument(
	'-T',
	'--TPoffset',
	action='store',
	type=int,
	default=0,
	help='If you want to capture regions around the target site, specify the 3\' offset.')
	parser.add_argument(
	'-b',
	'--blastmode',
	action='store_true',
	help='If flag is set, provide an input fasta (-f \| --fasta), and BLAST will be called to find your sequence indices in the original genbank.')
	parser.add_argument(
	'-f',
	'--fasta',
	action='store',
	help='The operon fasta to pull annotations from the provided genbank.')
	parser.add_argument(
	'-t',
	'--tidy',
	action='store_true',
	help='Tell the script whether or not to remove the temporary files it generated during processing. Off by default. WARNING: removes files based on the "tmp" string.')
	parser.add_argument(
	'-m',
	'--meta',
	action='store',
	default=None,
	help='Specify a string to use as the Genbank meta-data fields if the parent one doesn\'t contain anything. Else inherits from parent sequence.')
	parser.add_argument(
	'-v',
	'--verbose',
	action='store_true',
	help='Verbose behaviour. Shows progress of BLAST etc. if appropriate.')


	args = parser.parse_args()

	except NameError:
	print "An exception occured with argument parsing. Check your provided options."

	genbank = args.genbank
	fasta = args.fasta
	split = os.path.splitext(args.genbank)
	basename = os.path.basename(split[0])
	start = args.start
	end = args.end
	FPoffset = args.FPoffset
	TPoffset = args.TPoffset
	blastMode = args.blastmode
	outfile = args.outfile
	fasta = args.fasta
	tidy = args.tidy
	meta = args.meta
	verbose = args.verbose

	# Main code:
	if blastMode is not False and fasta is not None:
	refFasta = convert(basename,genbank)
	resultHandle = runBlast(basename, refFasta, fasta, verbose)
	start, end = getIndices(resultHandle)
	else:
	if fasta is None:
	print("No fasta was provided so BLAST mode cannot be used.")

	if start is None or end is None:
	print('No slice indices have been specified or retrieved from blastout')
	sys.exit(1)


	subRecord = slice(start, end, genbank, FPoffset, TPoffset)
	# Populate the metadata of the genbank
	if meta is not None:
	subRecord.id = meta
	subRecord.locus = meta + 'subregion'
	subRecord.accession = meta
	subRecord.name = meta
	subRecord.annotations["date"] = time.strftime("%d-%b-%Y").upper()
	# Other field options include:
	# subRecord.annotations["source"]
	# ["taxonomy"]
	# ["keywords"]
	# ["references"]
	# ["accessions"]
	# ["data_file_division"] e.g. BCT, PLN, UNK
	# ["organism"]
	# ["topology"]

	if outfile is not None:
	SeqIO.write(subRecord, outfile, "genbank")
	else:
	print(subRecord.format('genbank'))

	if tidy is True:
	if verbose is True:
	rm="rm -v ./tmp"
	else:
	rm="rm ./tmp"

	subprocess.Popen(rm,shell=True)

	if __name__ == "__main__":
	main()