fomightez · August 29, 2015 14:20
diff --git a/README.md b/README.md
diff --git a/file_hierarchy_processorv0.1.py b/file_hierarchy_processorv0.1.py
 #! /usr/bin/env python

 # file_hierarchy_processorv0.1 by Wayne Decatur
 # ver 0.1
 #
 #
 #
 #*******************************************************************************
 # USES Python 2.7
 # PURPOSE: Takes a demo file hierarchy and traverses it, processing the FASTA-
 # formatted sequences of protein coding regions to get coordinates to mine
 # the corresponding genome FASTA sequence
 #
 # Dependencies:
 # Beyond biopython, only typical modules like os, sys, and fnmatch.
 #
 # Adjust the 'USER ADJUSTABLE VALUES' to match the settings you need.
 #
 # v.0.1. Started
 #
 #
 #
 # TO RUN:
 # For example, enter on the command line terminal, the line
 #-----------------------------------
 # python file_hierarchy_processsorv0.1
 #-----------------------------------
 # or run in your favorite IDE, such as IDLE or Canopy.
 #
 #
 #*******************************************************************************


 ##################################
 #  USER ADJUSTABLE VALUES        #

 ##################################
 #

 #
 #*******************************************************************************
 #*******************************************************************************






 #*******************************************************************************
 #*******************************************************************************
 ###DO NOT EDIT BELOW HERE - ENTER VALUES ABOVE###























 ###---------------------------HELPER FUNCTIONS---------------------------------###


 def Write2File_orPrint_Lines_Of_Output_List(a_list,OutputFile):
    '''
    *** DISCLAIMER STARTS HERE
    This is very clunky & not the best designed method,
    but works in the course of quick and dirty development.
    **** DISCLAIMER ENDS ****
    Prints contents of a list to lines in file or to stdout.
    Adapted from http://stackoverflow.com/questions/4675728/redirect-stdout-to-a-file-in-python,
    see MARCOG's answer and mgold's comments
    Developed as 'Print_Lines_Of_Output_List'
    for SPARTAN08_Fixerv0.7 so I could simply draft code with output
    going to stdout by running with the first and last two lines
    commented out. Then later easily can switch to it going to a file ---
    in the end to send to file instead of stdout just needed to add the
    uncomment first two lines and after close stream and restore process by
    uncomment last two lines.
    '''
    #stdout=sys.stdout
    #sys.stdout = open(OutputFile, 'w')
    for the_line in a_list:
        print the_line
    #sys.stdout.close()
    #sys.stdout = stdout



 ###--------------------------END OF HELPER FUNCTIONS---------------------------###





 ###-----------------Actual Main function of script---------------------------###

 import os
 import sys
 import fnmatch
 from Bio import SeqIO

 mined_text_list = []

 anchor_dir = '.'
 # Starts in the 'anchor' directory and gives us the following details
 # for each dierctory: absolute path and name of current directory, a list of
 # sub-directories in the current directory, and a list of files in the current
 # directory. Then it walks the directory tree doing that for each subdirectory.
 for current_dir_path_and_name, subdir_list, file_list in os.walk(anchor_dir):
    # Identify the genome sequence among the file_list;
    # it should be only to match '*.ffa' so we'll take first.
    # Added error handling because top directory won't have a genome file and so
    # indexerror will arise when try to index result to get first list instance.
    try:
        genome_file = fnmatch.filter(file_list, '*.ffa')[0]
    except IndexError:
        pass
    # now go through the files that end in '.ffn', and mine
    # information out of the desciption line and use it to
    # determine what simple information to capture from the genome file for
    # thsi simple demonstration.
    for file_name in fnmatch.filter(file_list, '*.ffn'):
        mined_genome_info = ""
        # SeqIO parse involves opening a file so handle like that in giving
        # the path, see Martijn's answer at
        # http://stackoverflow.com/questions/13571134/python-how-to-recursively-go-through-all-subdirectories-and-read-files
        for seq_record in SeqIO.parse(os.path.join(current_dir_path_and_name, file_name ), "fasta"):
            # mine info in description
            print(seq_record.id)
            info_list = seq_record.id.split(":")
            coordinates_list = info_list[-1].split("-")
            print coordinates_list
            # use coordinates_list information to get identify something to mine
            # as purely demo.
            seq_to_mine_start = int(coordinates_list[0][0])
            print seq_to_mine_start
            seq_to_mine_end = int(coordinates_list[1][0:2])
            print seq_to_mine_end
            # use those coordinates to mine info from genome_file
            for record in SeqIO.parse(os.path.join(current_dir_path_and_name, genome_file ), "fasta"):
                mined_genome_info = str(record.seq)[seq_to_mine_start:seq_to_mine_end + 1]
                mined_text_list.append(mined_genome_info)





 ###--------GENERATE OUTPUT AND GIVER USER FEEDBACK---------------###

 output_file = 'mined_information.txt'
 Write2File_orPrint_Lines_Of_Output_List(mined_text_list,output_file)

 sys.stderr.write("The file " + output_file +" has been created.\n")





 #*******************************************************************************
 #*******************************************************************************
diff --git a/file_hierarchy_simulation_generator.py b/file_hierarchy_simulation_generator.py
 #! /usr/bin/env python

 # file_hierarchy_simulation_generator.py by Wayne Decatur
 # ver 0.1
 #
 #
 #
 #*******************************************************************************
 # USES Python 2.7
 # PURPOSE: generate fake file hierarchy for testing iterative processing of
 # folders containing genome and protein sequence information.
 #
 # Dependencies:
 # Only typical modules like os.
 #
 # Adjust the 'USER ADJUSTABLE VALUES' to match the settings you need.
 #
 # v.0.1. Started
 #
 #
 #
 # TO RUN:
 # For example, enter on the command line terminal, the line
 #-----------------------------------
 # python file_hierarchy_simulation_generator.py
 #-----------------------------------
 # or run in your favorite IDE, such as IDLE or Canopy.
 #
 #
 #*******************************************************************************


 ##################################
 #  USER ADJUSTABLE VALUES        #

 ##################################
 #

 directories_to_make = (
    'a_marina',
    'e_coli',
    'b_subtilis',
    'v_parahaemolyticus',
    'c_perfringens',
    'l_acidophilus')

 sequences_to_use = (
    '> gi|158303474|gb|CP000828.1|:3233-4009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGTGCAATTGCATGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
    '> gi|158303475|gb|CP000829.1|:5233-6009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGGGCAATTGCCTGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
    '> gi|758863732|gb|AJO84690.1|:2973916-2975355 nitric oxidase, Escherichia coli, complete genome\nATGAATGTATTAGACTCCAAGCTGGTGTCGCTACTTCGTCAAGA',
    '> gi|758863727|gb|AJO84685.1|:322233-322933 sorbitol-6-phosphate 2-dehydrogenase, Escherichia coli, complete genome\nATGTCGGACAAGGCTTTGCGCGCTGGTGAGGATGGC',
    '> gi|158303474|gb|CP000828.1|:3233-4009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGTGCAATTGCATGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
    '> gi|158303475|gb|CP000829.1|:5233-6009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGGGCAATTGCCTGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
    '> gi|758863732|gb|AJO84690.1|:2973916-2975355 nitric oxidase, Escherichia coli, complete genome\nATGAATGTATTAGACTCCAAGCTGGTGTCGCTACTTCGTCAAGA',
    '> gi|758863727|gb|AJO84685.1|:322233-322933 sorbitol-6-phosphate 2-dehydrogenase, Escherichia coli, complete genome\nATGTCGGACAAGGCTTTGCGCGCTGGTGAGGATGGC',
    '> gi|158303474|gb|CP000828.1|:3233-4009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGTGCAATTGCATGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
    '> gi|158303475|gb|CP000829.1|:5233-6009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGGGCAATTGCCTGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
    '> gi|758863732|gb|AJO84690.1|:2973916-2975355 nitric oxidase, Escherichia coli, complete genome\nATGAATGTATTAGACTCCAAGCTGGTGTCGCTACTTCGTCAAGA',
    '> gi|758863727|gb|AJO84685.1|:322233-322933 sorbitol-6-phosphate 2-dehydrogenase, Escherichia coli, complete genome\nATGTCGGACAAGGCTTTGCGCGCTGGTGAGGATGGC'
    )

 genomes_to_use = (
    '>gi|158303474|gb|CP000828.1| Acaryochloris marina MBIC11017, complete genome\nAATAAATACTTACAGGTATTCCACCTGAAACTCTTTCTATGAATGACTTTCAAGTCTATATCCTATATTT\nATCCTCAATAAAATATGCACAATAGATCTCTACTGAGAAAACTTTATATTTTAGAAGCAATTCATCTCCC\nTTTTAAAATAC',
    '>gi|26111730|gb|AE014075.1| Escherichia coli CFT073, complete genome\nAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC\nTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA\nTATAGGCATAGCG',
    '>gi|255767013|ref|NC_000964.3| Bacillus subtilis subsp. subtilis str. 168 chromosome, complete genome\nATCTTTTTCGGCTTTTTTTAGTATCCACAGAGGTTATCGACAACATTTTCACATTACCAACCCCTGTGGA\nCAAGGTTTTTTCAACAGGTTGTCCGCTTTGTGGATAAGATTGTGACAACCATTGCAAGCTCTCGTTTATT\nTTGGTATTAT',
    '>gi|686270189|ref|NZ_JNTW01000020.1| Vibrio parahaemolyticus strain CFSAN001611 CFSAN001611_contig0019, whole genome shotgun sequence\nGCAATTGCTTGGTCTTTTTGTTTCGATTAAGGGCCTAAACAGCTATAAAACCGCTTTTTCTTTATTTTTT\nAGCAGCTTATCCATTTCATCTCGATTCGCGATGAAGGTTGCCATCTCCTTCTTAGGAACAGAGCTCGCCA\nTCGGAATATTTG',
    '>gi|47118322|dbj|BA000016.3| Clostridium perfringens str. 13 DNA, complete genome\nTCTAAATAAGTTTTACACAAAATAAGTTATCAACAGCTGTTATTTTTGTGGATAACTTATTGAATCCAAC\nTATACCTTTATGTTATCATATTAATGCATTGTGAATAACTTTATCTAATATAACAACTTATCCACACTTG\nTGAATAATCCTGTTGAT',
    '>gi|238694164|ref|NZ_GG669567.1| Lactobacillus acidophilus ATCC 4796 SCAFFOLD2, whole genome shotgun sequence\nCTATTGTTGAATTAAAATCGATTTGTTGGAATTCCTTGATTAGTTCAATTATAGATGGTGAAACATTTCC\nTTTTGATTTAGTTGCAATGAAGAAATCAATATAAATTAAACTTTTGTCTATTGGTAATAAATTTATAGGA\nGACTGTTTTATTCG'
    )

 #
 #*******************************************************************************
 #*******************************************************************************




















 #*******************************************************************************
 #*******************************************************************************
 ###DO NOT EDIT BELOW HERE - ENTER VALUES ABOVE###



 ###-----------------Actual Main function of script---------------------------###
 # last example at http://pymotw.com/2/ospath/ has a nice example where it makes # simple directory heierarchy with files. This is based on that.
 import os

 for idx, directory in enumerate(directories_to_make):
    os.mkdir(directory)
    file_write_handle = open (directory+"/prot_1.ffn", "w")
    file_write_handle.write(sequences_to_use[(2*idx)+0])
    file_write_handle.close()
    file_write_handle = open (directory+"/prot_2.ffn", "w")
    file_write_handle.write(sequences_to_use[(2*idx)+1])
    file_write_handle.close()
    file_write_handle = open (directory+"/genome.ffa", "w")
    file_write_handle.write(genomes_to_use[idx])
    file_write_handle.close()


 #*******************************************************************************
 #*******************************************************************************
	#! /usr/bin/env python

	# file_hierarchy_processorv0.1 by Wayne Decatur
	# ver 0.1
	#
	#
	#
	#*******************************************************************************
	# USES Python 2.7
	# PURPOSE: Takes a demo file hierarchy and traverses it, processing the FASTA-
	# formatted sequences of protein coding regions to get coordinates to mine
	# the corresponding genome FASTA sequence
	#
	# Dependencies:
	# Beyond biopython, only typical modules like os, sys, and fnmatch.
	#
	# Adjust the 'USER ADJUSTABLE VALUES' to match the settings you need.
	#
	# v.0.1. Started
	#
	#
	#
	# TO RUN:
	# For example, enter on the command line terminal, the line
	#-----------------------------------
	# python file_hierarchy_processsorv0.1
	#-----------------------------------
	# or run in your favorite IDE, such as IDLE or Canopy.
	#
	#
	#*******************************************************************************


	##################################
	# USER ADJUSTABLE VALUES #

	##################################
	#

	#
	#*******************************************************************************
	#*******************************************************************************






	#*******************************************************************************
	#*******************************************************************************
	###DO NOT EDIT BELOW HERE - ENTER VALUES ABOVE###























	###---------------------------HELPER FUNCTIONS---------------------------------###


	def Write2File_orPrint_Lines_Of_Output_List(a_list,OutputFile):
	'''
	*** DISCLAIMER STARTS HERE
	This is very clunky & not the best designed method,
	but works in the course of quick and dirty development.
	** DISCLAIMER ENDS **
	Prints contents of a list to lines in file or to stdout.
	Adapted from http://stackoverflow.com/questions/4675728/redirect-stdout-to-a-file-in-python,
	see MARCOG's answer and mgold's comments
	Developed as 'Print_Lines_Of_Output_List'
	for SPARTAN08_Fixerv0.7 so I could simply draft code with output
	going to stdout by running with the first and last two lines
	commented out. Then later easily can switch to it going to a file ---
	in the end to send to file instead of stdout just needed to add the
	uncomment first two lines and after close stream and restore process by
	uncomment last two lines.
	'''
	#stdout=sys.stdout
	#sys.stdout = open(OutputFile, 'w')
	for the_line in a_list:
	print the_line
	#sys.stdout.close()
	#sys.stdout = stdout



	###--------------------------END OF HELPER FUNCTIONS---------------------------###





	###-----------------Actual Main function of script---------------------------###

	import os
	import sys
	import fnmatch
	from Bio import SeqIO

	mined_text_list = []

	anchor_dir = '.'
	# Starts in the 'anchor' directory and gives us the following details
	# for each dierctory: absolute path and name of current directory, a list of
	# sub-directories in the current directory, and a list of files in the current
	# directory. Then it walks the directory tree doing that for each subdirectory.
	for current_dir_path_and_name, subdir_list, file_list in os.walk(anchor_dir):
	# Identify the genome sequence among the file_list;
	# it should be only to match '*.ffa' so we'll take first.
	# Added error handling because top directory won't have a genome file and so
	# indexerror will arise when try to index result to get first list instance.
	try:
	genome_file = fnmatch.filter(file_list, '*.ffa')[0]
	except IndexError:
	pass
	# now go through the files that end in '.ffn', and mine
	# information out of the desciption line and use it to
	# determine what simple information to capture from the genome file for
	# thsi simple demonstration.
	for file_name in fnmatch.filter(file_list, '*.ffn'):
	mined_genome_info = ""
	# SeqIO parse involves opening a file so handle like that in giving
	# the path, see Martijn's answer at
	# http://stackoverflow.com/questions/13571134/python-how-to-recursively-go-through-all-subdirectories-and-read-files
	for seq_record in SeqIO.parse(os.path.join(current_dir_path_and_name, file_name ), "fasta"):
	# mine info in description
	print(seq_record.id)
	info_list = seq_record.id.split(":")
	coordinates_list = info_list[-1].split("-")
	print coordinates_list
	# use coordinates_list information to get identify something to mine
	# as purely demo.
	seq_to_mine_start = int(coordinates_list[0][0])
	print seq_to_mine_start
	seq_to_mine_end = int(coordinates_list[1][0:2])
	print seq_to_mine_end
	# use those coordinates to mine info from genome_file
	for record in SeqIO.parse(os.path.join(current_dir_path_and_name, genome_file ), "fasta"):
	mined_genome_info = str(record.seq)[seq_to_mine_start:seq_to_mine_end + 1]
	mined_text_list.append(mined_genome_info)





	###--------GENERATE OUTPUT AND GIVER USER FEEDBACK---------------###

	output_file = 'mined_information.txt'
	Write2File_orPrint_Lines_Of_Output_List(mined_text_list,output_file)

	sys.stderr.write("The file " + output_file +" has been created.\n")





	#*******************************************************************************
	#*******************************************************************************
	#! /usr/bin/env python

	# file_hierarchy_simulation_generator.py by Wayne Decatur
	# ver 0.1
	#
	#
	#
	#*******************************************************************************
	# USES Python 2.7
	# PURPOSE: generate fake file hierarchy for testing iterative processing of
	# folders containing genome and protein sequence information.
	#
	# Dependencies:
	# Only typical modules like os.
	#
	# Adjust the 'USER ADJUSTABLE VALUES' to match the settings you need.
	#
	# v.0.1. Started
	#
	#
	#
	# TO RUN:
	# For example, enter on the command line terminal, the line
	#-----------------------------------
	# python file_hierarchy_simulation_generator.py
	#-----------------------------------
	# or run in your favorite IDE, such as IDLE or Canopy.
	#
	#
	#*******************************************************************************


	##################################
	# USER ADJUSTABLE VALUES #

	##################################
	#

	directories_to_make = (
	'a_marina',
	'e_coli',
	'b_subtilis',
	'v_parahaemolyticus',
	'c_perfringens',
	'l_acidophilus')

	sequences_to_use = (
	'> gi\|158303474\|gb\|CP000828.1\|:3233-4009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGTGCAATTGCATGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
	'> gi\|158303475\|gb\|CP000829.1\|:5233-6009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGGGCAATTGCCTGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
	'> gi\|758863732\|gb\|AJO84690.1\|:2973916-2975355 nitric oxidase, Escherichia coli, complete genome\nATGAATGTATTAGACTCCAAGCTGGTGTCGCTACTTCGTCAAGA',
	'> gi\|758863727\|gb\|AJO84685.1\|:322233-322933 sorbitol-6-phosphate 2-dehydrogenase, Escherichia coli, complete genome\nATGTCGGACAAGGCTTTGCGCGCTGGTGAGGATGGC',
	'> gi\|158303474\|gb\|CP000828.1\|:3233-4009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGTGCAATTGCATGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
	'> gi\|158303475\|gb\|CP000829.1\|:5233-6009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGGGCAATTGCCTGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
	'> gi\|758863732\|gb\|AJO84690.1\|:2973916-2975355 nitric oxidase, Escherichia coli, complete genome\nATGAATGTATTAGACTCCAAGCTGGTGTCGCTACTTCGTCAAGA',
	'> gi\|758863727\|gb\|AJO84685.1\|:322233-322933 sorbitol-6-phosphate 2-dehydrogenase, Escherichia coli, complete genome\nATGTCGGACAAGGCTTTGCGCGCTGGTGAGGATGGC',
	'> gi\|158303474\|gb\|CP000828.1\|:3233-4009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGTGCAATTGCATGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
	'> gi\|158303475\|gb\|CP000829.1\|:5233-6009 Acaryochloris marina MBIC11017, complete genome\nATGCTAGGGGCAATTGCCTGCTAGGTGCAATTGCATGCTAGGTGCAATTGC',
	'> gi\|758863732\|gb\|AJO84690.1\|:2973916-2975355 nitric oxidase, Escherichia coli, complete genome\nATGAATGTATTAGACTCCAAGCTGGTGTCGCTACTTCGTCAAGA',
	'> gi\|758863727\|gb\|AJO84685.1\|:322233-322933 sorbitol-6-phosphate 2-dehydrogenase, Escherichia coli, complete genome\nATGTCGGACAAGGCTTTGCGCGCTGGTGAGGATGGC'
	)

	genomes_to_use = (
	'>gi\|158303474\|gb\|CP000828.1\| Acaryochloris marina MBIC11017, complete genome\nAATAAATACTTACAGGTATTCCACCTGAAACTCTTTCTATGAATGACTTTCAAGTCTATATCCTATATTT\nATCCTCAATAAAATATGCACAATAGATCTCTACTGAGAAAACTTTATATTTTAGAAGCAATTCATCTCCC\nTTTTAAAATAC',
	'>gi\|26111730\|gb\|AE014075.1\| Escherichia coli CFT073, complete genome\nAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC\nTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA\nTATAGGCATAGCG',
	'>gi\|255767013\|ref\|NC_000964.3\| Bacillus subtilis subsp. subtilis str. 168 chromosome, complete genome\nATCTTTTTCGGCTTTTTTTAGTATCCACAGAGGTTATCGACAACATTTTCACATTACCAACCCCTGTGGA\nCAAGGTTTTTTCAACAGGTTGTCCGCTTTGTGGATAAGATTGTGACAACCATTGCAAGCTCTCGTTTATT\nTTGGTATTAT',
	'>gi\|686270189\|ref\|NZ_JNTW01000020.1\| Vibrio parahaemolyticus strain CFSAN001611 CFSAN001611_contig0019, whole genome shotgun sequence\nGCAATTGCTTGGTCTTTTTGTTTCGATTAAGGGCCTAAACAGCTATAAAACCGCTTTTTCTTTATTTTTT\nAGCAGCTTATCCATTTCATCTCGATTCGCGATGAAGGTTGCCATCTCCTTCTTAGGAACAGAGCTCGCCA\nTCGGAATATTTG',
	'>gi\|47118322\|dbj\|BA000016.3\| Clostridium perfringens str. 13 DNA, complete genome\nTCTAAATAAGTTTTACACAAAATAAGTTATCAACAGCTGTTATTTTTGTGGATAACTTATTGAATCCAAC\nTATACCTTTATGTTATCATATTAATGCATTGTGAATAACTTTATCTAATATAACAACTTATCCACACTTG\nTGAATAATCCTGTTGAT',
	'>gi\|238694164\|ref\|NZ_GG669567.1\| Lactobacillus acidophilus ATCC 4796 SCAFFOLD2, whole genome shotgun sequence\nCTATTGTTGAATTAAAATCGATTTGTTGGAATTCCTTGATTAGTTCAATTATAGATGGTGAAACATTTCC\nTTTTGATTTAGTTGCAATGAAGAAATCAATATAAATTAAACTTTTGTCTATTGGTAATAAATTTATAGGA\nGACTGTTTTATTCG'
	)

	#
	#*******************************************************************************
	#*******************************************************************************




















	#*******************************************************************************
	#*******************************************************************************
	###DO NOT EDIT BELOW HERE - ENTER VALUES ABOVE###



	###-----------------Actual Main function of script---------------------------###
	# last example at http://pymotw.com/2/ospath/ has a nice example where it makes # simple directory heierarchy with files. This is based on that.
	import os

	for idx, directory in enumerate(directories_to_make):
	os.mkdir(directory)
	file_write_handle = open (directory+"/prot_1.ffn", "w")
	file_write_handle.write(sequences_to_use[(2*idx)+0])
	file_write_handle.close()
	file_write_handle = open (directory+"/prot_2.ffn", "w")
	file_write_handle.write(sequences_to_use[(2*idx)+1])
	file_write_handle.close()
	file_write_handle = open (directory+"/genome.ffa", "w")
	file_write_handle.write(genomes_to_use[idx])
	file_write_handle.close()


	#*******************************************************************************
	#*******************************************************************************