|
#!/usr/bin/env python |
|
# File created on 12 Jun 2013 |
|
from __future__ import division |
|
|
|
__author__ = "Greg Caporaso" |
|
__copyright__ = "Copyright 2013, The BiPy project" |
|
__credits__ = ["Greg Caporaso", "Jason Sahl"] |
|
__license__ = "GPL" |
|
__version__ = "0.0.0-dev" |
|
__maintainer__ = "Greg Caporaso" |
|
__email__ = "[email protected]" |
|
__status__ = "Development" |
|
|
|
from os.path import join |
|
from qcli import parse_command_line_parameters, make_option |
|
|
|
script_info = {} |
|
script_info['brief_description'] = "" |
|
script_info['script_description'] = "" |
|
script_info['script_usage'] = [("","","")] |
|
script_info['output_description']= "" |
|
script_info['required_options'] = [ |
|
make_option('-i','--input_fp',type="existing_filepath",help='the input filepath'), |
|
make_option('-o','--output_dir',type="new_dirpath",help='the output directory'), |
|
] |
|
script_info['optional_options'] = [] |
|
script_info['version'] = __version__ |
|
|
|
def parse_snp_matrix(input_f): |
|
""" |
|
""" |
|
sample_ids = [] |
|
locus_ids = [] |
|
references = [] |
|
matrix = [] |
|
for line in input_f: |
|
line = line.strip() |
|
if len(line) == 0 or line.startswith('#'): |
|
continue |
|
elif len(sample_ids) == 0: |
|
# we're on the first line |
|
fields = line.split('\t') |
|
md_start = fields.index('#SNPcall') |
|
sample_ids = fields[2:md_start] |
|
else: |
|
fields = line.split('\t') |
|
locus_ids.append(fields[0]) |
|
reference = fields[1] |
|
references.append(reference) |
|
vector = [] |
|
for base in fields[2:md_start]: |
|
if base == reference: |
|
vector.append(0) |
|
else: |
|
vector.append(1) |
|
matrix.append(vector) |
|
|
|
return sample_ids, locus_ids, references, matrix |
|
|
|
def main(): |
|
option_parser, opts, args =\ |
|
parse_command_line_parameters(**script_info) |
|
sample_ids, locus_ids, references, matrix = parse_snp_matrix(open(opts.input_fp,'U')) |
|
# write mapping file |
|
map_f = open(join(opts.output_dir,'sample_map.txt'),'w') |
|
map_f.write('#SampleID\tDescription\n') |
|
for sample_id in sample_ids: |
|
map_f.write('%s\t%s\n' % (sample_id,sample_id)) |
|
map_f.close() |
|
# write legacy-formatted OTU table |
|
table_f = open(join(opts.output_dir,'table.txt'),'w') |
|
table_f.write('#ObservationId\t%s\n' % '\t'.join(sample_ids)) |
|
for i, vector in enumerate(matrix): |
|
table_f.write('%d\t%s\n' % (i, '\t'.join(map(str,vector)))) |
|
table_f.close() |
|
# write observation mapping file |
|
obs_map_f = open(join(opts.output_dir,'obs_map.txt'),'w') |
|
obs_map_f.write('#ObservationID\tDescription\n') |
|
for i, locus_id in enumerate(locus_ids): |
|
obs_map_f.write('%d\t%s\n' % (i, locus_id)) |
|
obs_map_f.close() |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
main() |