brantfaircloth · June 13, 2014 23:31 · elenamd · Aug 11, 2015 · RanjeetaM · Aug 12, 2015
diff --git a/gistfile1.txt b/gistfile1.txt
 # this is the SampleId columnd of your SampleSheet.csv
 [samples]
 sample1-nameA-blah-blah
 sample2-nameB-blah-blah
 sample3-nameC-blah-blah
diff --git a/merge_next_seq.py b/merge_next_seq.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 """
 (c) 2014 Brant Faircloth || http://faircloth-lab.org/
 All rights reserved.

 This code is distributed under a 3-clause BSD license. Please see
 LICENSE.txt for more information.

 Created on 13 June 2014 15:59 PDT (-0700)
 """

 """
 The next-seq outputs data as if it came from 4 lanes.  This also
 means that if you have multiplexed, now you have 4 lanes of data
 to merge.  This is annoying across many samples.

 This code will merge those reads.  Logging and some error-checking
 are turned off here, as the code requires external libraries (it is
 part of PHYLUCE).  It should run as-is, with just the python stdlib.

 See 2nd file in gist for config file example.

 """


 import os
 import glob
 import shutil
 import argparse
 import ConfigParser

 #from phyluce.helpers import is_file, is_dir, FullPaths, CreateDir
 #from phyluce.log import setup_logging

 #import pdb

 def get_args():
    """Get arguments from CLI"""
    parser = argparse.ArgumentParser(
        description="""Given a config file and inputs, merge multiple gz files into one"""
    )
    parser.add_argument(
        "--input",
        required=True,
        help="""The path to a directory containing the reads to merge."""
    )
    parser.add_argument(
        "--config",
        required=True,
        #type=is_file,
        #action=FullPaths,
        help="""The path to the config file to use for merging."""
    )
    parser.add_argument(
        "--output",
        required=True,
        #action=CreateDir,
        help="""The path to a directory in which to store the output."""
    )
    parser.add_argument(
        "--section",
        type=str,
        default="samples",
        help="""The section holding the merge info."""
    )
    '''
    parser.add_argument(
        "--verbosity",
        type=str,
        choices=["INFO", "WARN", "CRITICAL"],
        default="INFO",
        help="""The logging level to use."""
    )
    parser.add_argument(
        "--log-path",
        action=FullPaths,
        type=is_dir,
        default=None,
        help="""The path to a directory to hold logs."""
    )
    '''
    parser.add_argument(
        "--se",
        action="store_true",
        default=False,
        help="""Run is single-end.""",
    )
    return parser.parse_args()


 def main():
    args = get_args()
    # setup logging
    # log, my_name = setup_logging(args)
    # parse the config file - allowing no values (e.g. no ":" in config file)
    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.optionxform = str
    config.read(args.config)
    samples = config.items(args.section)
    #pdb.set_trace()
    for sample in samples:
        # text = " Sample {} ".format(sample[0])
        # log.info(text.center(65, "-"))
        read1 = "{}_S*_L*_R1_*.fastq.gz".format(sample[0])
        read1_files = sorted(glob.glob(os.path.join(args.input, read1)))
        fname = os.path.basename(read1_files[0]).replace("_L001_", "_L999_")
        with open(os.path.join(args.output, fname), 'wb') as outfile:
            for infile in read1_files:
                shutil.copyfileobj(open(infile), outfile)
                '''
                log.info("\tCopied {} to {}".format(
                    os.path.basename(infile),
                    fname
                ))
                '''
        if not args.se:
            read2 = "{}_S*_L*_R2_*.fastq.gz".format(sample[0])
            read2_files = sorted(glob.glob(os.path.join(args.input, read2)))
            fname = os.path.basename(read2_files[0]).replace("_L001_", "_L999_")
            with open(os.path.join(args.output, fname), 'wb') as outfile:
                for infile in read2_files:
                    shutil.copyfileobj(open(infile), outfile)
                    '''
                    log.info("\tCopied {} to {}".format(
                        os.path.basename(infile),
                        fname
                    ))
                    '''
    '''
    # end
    text = " Completed {} ".format(my_name)
    log.info(text.center(65, "="))
    '''


 if __name__ == '__main__':
    main()
	# this is the SampleId columnd of your SampleSheet.csv
	[samples]
	sample1-nameA-blah-blah
	sample2-nameB-blah-blah
	sample3-nameC-blah-blah
	#!/usr/bin/env python
	# -- coding: utf-8 --

	"""
	(c) 2014 Brant Faircloth \|\| http://faircloth-lab.org/
	All rights reserved.

	This code is distributed under a 3-clause BSD license. Please see
	LICENSE.txt for more information.

	Created on 13 June 2014 15:59 PDT (-0700)
	"""

	"""
	The next-seq outputs data as if it came from 4 lanes. This also
	means that if you have multiplexed, now you have 4 lanes of data
	to merge. This is annoying across many samples.

	This code will merge those reads. Logging and some error-checking
	are turned off here, as the code requires external libraries (it is
	part of PHYLUCE). It should run as-is, with just the python stdlib.

	See 2nd file in gist for config file example.

	"""


	import os
	import glob
	import shutil
	import argparse
	import ConfigParser

	#from phyluce.helpers import is_file, is_dir, FullPaths, CreateDir
	#from phyluce.log import setup_logging

	#import pdb

	def get_args():
	"""Get arguments from CLI"""
	parser = argparse.ArgumentParser(
	description="""Given a config file and inputs, merge multiple gz files into one"""
	)
	parser.add_argument(
	"--input",
	required=True,
	help="""The path to a directory containing the reads to merge."""
	)
	parser.add_argument(
	"--config",
	required=True,
	#type=is_file,
	#action=FullPaths,
	help="""The path to the config file to use for merging."""
	)
	parser.add_argument(
	"--output",
	required=True,
	#action=CreateDir,
	help="""The path to a directory in which to store the output."""
	)
	parser.add_argument(
	"--section",
	type=str,
	default="samples",
	help="""The section holding the merge info."""
	)
	'''
	parser.add_argument(
	"--verbosity",
	type=str,
	choices=["INFO", "WARN", "CRITICAL"],
	default="INFO",
	help="""The logging level to use."""
	)
	parser.add_argument(
	"--log-path",
	action=FullPaths,
	type=is_dir,
	default=None,
	help="""The path to a directory to hold logs."""
	)
	'''
	parser.add_argument(
	"--se",
	action="store_true",
	default=False,
	help="""Run is single-end.""",
	)
	return parser.parse_args()


	def main():
	args = get_args()
	# setup logging
	# log, my_name = setup_logging(args)
	# parse the config file - allowing no values (e.g. no ":" in config file)
	config = ConfigParser.RawConfigParser(allow_no_value=True)
	config.optionxform = str
	config.read(args.config)
	samples = config.items(args.section)
	#pdb.set_trace()
	for sample in samples:
	# text = " Sample {} ".format(sample[0])
	# log.info(text.center(65, "-"))
	read1 = "{}_S_L_R1_*.fastq.gz".format(sample[0])
	read1_files = sorted(glob.glob(os.path.join(args.input, read1)))
	fname = os.path.basename(read1_files[0]).replace("_L001_", "_L999_")
	with open(os.path.join(args.output, fname), 'wb') as outfile:
	for infile in read1_files:
	shutil.copyfileobj(open(infile), outfile)
	'''
	log.info("\tCopied {} to {}".format(
	os.path.basename(infile),
	fname
	))
	'''
	if not args.se:
	read2 = "{}_S_L_R2_*.fastq.gz".format(sample[0])
	read2_files = sorted(glob.glob(os.path.join(args.input, read2)))
	fname = os.path.basename(read2_files[0]).replace("_L001_", "_L999_")
	with open(os.path.join(args.output, fname), 'wb') as outfile:
	for infile in read2_files:
	shutil.copyfileobj(open(infile), outfile)
	'''
	log.info("\tCopied {} to {}".format(
	os.path.basename(infile),
	fname
	))
	'''
	'''
	# end
	text = " Completed {} ".format(my_name)
	log.info(text.center(65, "="))
	'''


	if __name__ == '__main__':
	main()