fomightez · October 30, 2022 07:11
diff --git a/argparse_snippets.py b/argparse_snippets.py
 # from `get_seq_following_seq_from_FASTA.py`
 if __name__ == "__main__" and '__file__' in globals():
    """ This is executed when run from the command line """
    # Code with just `if __name__ == "__main__":` alone will be run if pasted
    # into a notebook. The addition of ` and '__file__' in globals()` is based
    # on https://stackoverflow.com/a/22923872/8508004
    # See also https://stackoverflow.com/a/22424821/8508004 for an option to 
    # provide arguments when prototyping a full script in the notebook.
    ###-----------------for parsing command line arguments-------------------###
    import argparse
    
    parser = argparse.ArgumentParser(prog=
        'get_seq_following_seq_from_FASTA.py',
        description="get_seq_following_seq_from_FASTA.py \
        takes a sequence pattern string, a sequence file (FASTA-format), and a \
        record id and extracts a sequence of specified size following the \
        sequence pattern. Importantly, the regular expression search term \
        syntax is acceptable in the provided sequence pattern, although \
        anything dealing with case will be ignored. (The FASTA-formatted \
        sequence file is assumed by default to be \
        a multi-FASTA, i.e., multiple sequences in the provided file, \
        although it definitely doesn't have to be. In case it is only a \
        single sequence, the record id becomes moot, see below.) A sequence \
        string of the specified length will be returned. Redirect the output \
        to a file if that is what is needed. \
        **** Script by Wayne Decatur   \
        (fomightez @ github) ***")

    parser.add_argument("sequence_file", help="Name of sequence file to \
        use as input. Must be FASTA format. Can be a \
        multi-FASTA file, i.e., multiple sequences in FASTA format in one \
        file.", metavar="SEQUENCE_FILE")
    parser.add_argument("record_id", help="Specific identifier of sequence \
        entry in sequence file to mine. If the provided sequence file only \
        contains one sequence, that sequence will be mined and what is provided \
        for this parameter will be ignored. In other words, if the sequence \
        file is not a multi-FASTA file, you don't need to determine the \
        identifier and can instead just enter `blahblah` or any other \
        nonsensical string in this spot.", metavar="RECORD_ID")
    parser.add_argument("pattern", help="Sequence or sequence pattern to use \
        to locate site after which to get the sequence. Regular expressions \
        are accepted here; however any information about case will be ignored \
        as the provided sequence pattern and sequence will both be converted \
        to lower case to check for a match.", metavar="PATTERN")
    parser.add_argument("amount_to_get", type=int, help="Number (integer) of \
        residues \
        to retrieve following the match to the sequence. The length of this \
        sequence is to be given in common terms, where the first item is \
        referenced as `1` ,and so a provided argument of \
        `1` would a single residue following the match would be returned.", 
        metavar="NUMBER_TO_GET")
    parser.add_argument('-ld', '--leave_dashes', help="Add this flag when \
        calling the script in \
        order to be able to use gaps (represented as dashes) in the pattern \
        required to match. I.E., for matching with an aligned FASTA file. \
        (***ATYPICAL.***)", action="store_true")


    #I would also like trigger help to display if no arguments provided because 
    # need at least one for url
    if len(sys.argv)==1:    #from http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()
    sequence_file = args.sequence_file
    record_id = args.record_id
    seq_to_find = args.pattern
    amount_to_get = args.amount_to_get
    if args.leave_dashes:
        filter_dashes = False
    else:
        filter_dashes = True

        


        
 # from plot_expression_across_chromosomes.py
 parser = argparse.ArgumentParser(prog='plot_expression_across_chromosomes.py',
    description="plot_expression_across_chromosomes.py  plots a ratio of \
    expression values across chromosomes or scaffolds of a genome to highlight \
    regions of deviation. Besides the options listed here, there are several \
    `USER ADJUSTABLE VALUES` inside the script that can be edited for easy \
    customization. A similar plot is called a Manhattan plot and this \
    implementation borrows the plotting approach and some of the features from \
    Brent Pedersen's awesome `manhattan-plot.py` script.       \
    **** Script by Wayne Decatur   \
    (fomightez @ github) ***")

 parser.add_argument("annotation", help="Name of file containing the genome \
    annotation. REQUIRED. This is needed to determine the order of individual \
    data points along the chromosome and how to display the data across \
    chromosomes or scaffolds.", 
    type=argparse.FileType('r'), metavar="ANNOTATION_FILE")
 parser.add_argument("data", help="Name of file containing the summarized data \
    to plot, such as mean TPM or RPKM, etc. in tab-delimited form. REQUIRED. \
    See my script `plot_expression_across_chromosomes_from_raw.py` if you want \
    supply the individual `raw` data files with the level metric for each \
    sample and/or replicate.", 
    type=argparse.FileType('r'), metavar="DATA_FILE")
 parser.add_argument('-cols', '--columns', action='store', type=str, 
    default= '1,2,3', help="columns for gene, wild-type (baseline state) \
    expression value, experimental condition expression value, in that order. \
    This flag is used to specify the data in the summary file to be plotted. \
    Separate the column identifiers by commas, without spaces. \
    Default is `1,2,3`, where `1` indicates the first column, i.e., how \
    you'd refer to the columns in natural language (no zero-indexing). ") 
    # based on
    # https://stackoverflow.com/questions/15753701/argparse-option-for-passing-a-list-as-option
 parser.add_argument("-l", "--lines",help=
    "add this flag to plot the expression level ratio value as lines \
    extending from the x-axis rather than points in space. (The resulting \
    aesthetic may resemble a city skyline for which the `manhattan plot` is \
    named.)",
    action="store_true")
 parser.add_argument('-chr', '--chrs', action='store', type=str, 
    help="use this flag to limit plotting of the data to particular \
    chromosomes or scaffolds you specify immediately following this flag. \
    Separate the chromosome or scaffold identifiers by commas, without spaces. \
    Example use in a command is `--chrs I,IV,XVI`. \
    Default when this optional flag is not called is to plot that data for all \
    chromosomes or scaffolds. ") # based on
    # https://stackoverflow.com/questions/15753701/argparse-option-for-passing-a-list-as-option
 parser.add_argument("-nl", "--no_log",help=
    "add this flag to keep the expression level ratio to be plotted in the \
    common base 10 instead of converting to log2.",
    action="store_true")
 parser.add_argument("-nlim", "--no_limits",help=
    "add this flag to not impose a limit of above and below {} in plot window \
    when converting to log2. The cutoff can also be adjusted under \
    `user-adjustable settings` in the script. Issuing this flag has no effect \
     if all values are within +/- the cutoff interval or `--no_log` is used."
    .format(y_cutoff),
    action="store_true")
 parser.add_argument("-s", "--smooth",help=
    "add this flag to display a smoothing curve fit to the data points \
    (LOWESS) on a per chromosome basis. This option can enhance visualization \
    of deviations characteristic of aneuploidy and copy number variation across \
    the genome, both within and between chromosomes. Additionally, a \
    simplistically-based assesment will be made for aneuploidy at the \
    chromosome or scaffold level and a notice will be made as the program is \
    running if aneuploidy at the chromosome or scaffold level seems indicated \
    by this simple metric. Further examination is warranted regardless of \
    the result this automated assessment.",
    action="store_true")
 parser.add_argument('-ed', '--exp_desig', action='store', type=str, 
    default= 'experimental', help="Allows changing the text used in y-axis \
    label to reference experimental sample. Following `--exp_desig` type what \
    you'd like to read there instead of `experimental`.") 
 parser.add_argument('-bd', '--base_desig', action='store', type=str, 
    default= 'wild\mathrm{-}type', help="Allows changing the text used in y-axis \
    label to reference wild-type or baseline sample. Following `--base_desig` \
    type what you'd like to read there instead of `wild-type`.") 
 parser.add_argument("-svg", "--save_vg",help=
    "add this flag to save as vector graphics \
    (**RECOMMENDED FOR PUBLICATION***) instead of default png. Not default or \
    saved alongside default because file size can get large due to the large \
    number of points.",
    action="store_true")
 parser.add_argument("-ndh", "--no_data_header",help=
    "add this flag if there is no data header or no first line of column names \
    in the data file. Otherwise, it is assumed there is and any item read as \
    the first gene identifier from the first line won't be highlighted as \
    missing from annotation.\
    IMPORTANTLY, this only affects feedback provided as script is run. If the \
    first line resembles data, i.e., numbers in specified columns, it will be \
    automagically parsed as if data. Remove the header or column labels line \
    from your summary data file on the off-chance this causes issues in your \
    resulting plot.",
    action="store_true")
 parser.add_argument('-ac', '--advance_color', action='store', type=int, 
    default= '0', help="**FOR ADVANCED USE.*** Allows for advancing the color \
    selection iterator the specified number of times. The idea is it allows \
    the ability to control the color of the chromosome when specifying \
    a chromosome or scaffolds to plot so you could make the color match the \
    one used when all chromsome plotted if needed. Supply the number to \
    advance after the flag on the command line. For example, `-ac 4`.") 

 #I would also like trigger help to display if no arguments provided because need at least one input file
 if len(sys.argv)==1:    #from http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu
    parser.print_help()
    sys.exit(1)
 args = parser.parse_args()
 annotaton_file = args.annotation
 data_file = args.data
 data_columns_to_grab = [int(item) for item in args.columns.split(',')]
 no_log = args.no_log
 no_data_header = args.no_data_header
 lines = args.lines
 if args.chrs:
    if "," in args.chrs:
        limit_to_chrs = args.chrs.split(',')
    else:
        # means only one item
        limit_to_chrs = [args.chrs] #has to be a list for passing to Pandas `isin()` 
 else:
    limit_to_chrs = args.chrs # will make `limit_to_chrs` as `None`
 advance_color_increments = args.advance_color
 display_smooth = args.smooth
 no_limits = args.no_limits
 exp_designation = args.exp_desig
 baseline_designation = args.base_desig
 save_vg = args.save_vg




 parser.add_argument('-os', '--output_suffix', action='store', type=str, 
    default= suffix_for_saving, help="OPTIONAL: Set a suffix for including in file \
    name of output. \
    If none provided, '{}' will be used.".format(suffix_for_saving))
 parser.add_argument('-col', '--column', action='store', type=int, 
    default= '4', help="column for expression metric in data file. Assumes a \
    value of four to match fourth tab-separated column in Salmon-quantified \
    RNA-Seq data.") 
 parser.add_argument('-dfo', '--df_output', action='store', type=str, 
    default= df_save_as_name, help="OPTIONAL: Set file name for saving pickled \
    dataframe. If none provided, '{}' will be used. To force no dataframe to \
    be saved, enter `-dfo no_pickling` without quotes as output file \
    (ATYPICAL).".format(df_save_as_name))
 def valid_dibase(arg_string):
    if len(arg_string) == 2 and set(
        list(arg_string.upper())).issubset(nt_set):
        return arg_string
    else:
        msg = ("Not a valid set of letters representing two nucleotides: "
            "'{}'. Try something like `GC` or `AT`.".format(arg_string))
        raise argparse.ArgumentTypeError(arg_string)
 parser.add_argument("two_bases", type = valid_dibase, help="Two \
    letters representing one set of bases to check for imbalance vs. the \
    other two remaining out of the possible four from `GATC`. For example, \
    providing `GC` will result in checking for imbalance of nucleotides \
    `G` and `C` in close prox
 parser.add_argument('-bl', '--block_size', action='store', type=int, 
    default= chunk_size, help="OPTIONAL: Use the `--block_size` flag followed \
    by an interger to provide a value to use as the span size (window of \
    basepairs) to analyze instead of the default of '{}'.".format(chunk_size))
 parser.add_argument('-ov', '--overlap_size', action='store', type=int, 
    default= overlap_specified, help="OPTIONAL: Use the `--overlap_size` \
    flag followed by an integer to specify the amount of overlap to use \
    between the \
    analysis windows instead of the default of '{}'.".format(overlap_specified))


 parser.add_argument("input", nargs='?', help="**OPTIONAL**Name of the file \
    generated by Salmon \
    when run with your transcriptome of interest. Usually, this is \
    '"+input_file_name_default+"' &\
    if no input file name is provided then this will be used by \
    default.", default=input_file_name_default, metavar="INPUT_FILE")
 parser.add_argument("output", nargs='?', help="**OPTIONAL**Name of file to \
    save results. If BOTH input \
    and output file are not provided, '"+output_file_name_default+"', will \
    be used.", default=output_file_name_default, metavar="OUTPUT_FILE")
 # See
 # https://stackoverflow.com/questions/4480075/argparse-optional-positional-arguments 
 # and 
 # https://docs.python.org/2/library/argparse.html#nargs for use of `nargs='?'` 
 # to make input and output file names optional. Note that the square brackets
 # shown in the usage out signify optional according to 
 # https://stackoverflow.com/questions/4480075/argparse-optional-positional-arguments#comment40460395_4480202
 # , but because placed under positional I added clarifying text to help 
 # description.
 # IF MODIFYING THIS SCRIPT FOR USE ELSEWHERE AND DON'T NEED/WANT THE INPUT AND
 # OUTPUT FILES TO BE OPTIONAL, remove `nargs` (& default?) BUT KEEP WHERE NOT
 # USING `argparse.FileType` AND USING `with open` AS CONISDERED MORE PYTHONIC.
	# from `get_seq_following_seq_from_FASTA.py`
	if __name__ == "__main__" and '__file__' in globals():
	""" This is executed when run from the command line """
	# Code with just `if __name__ == "__main__":` alone will be run if pasted
	# into a notebook. The addition of ` and '__file__' in globals()` is based
	# on https://stackoverflow.com/a/22923872/8508004
	# See also https://stackoverflow.com/a/22424821/8508004 for an option to
	# provide arguments when prototyping a full script in the notebook.
	###-----------------for parsing command line arguments-------------------###
	import argparse

	parser = argparse.ArgumentParser(prog=
	'get_seq_following_seq_from_FASTA.py',
	description="get_seq_following_seq_from_FASTA.py \
	takes a sequence pattern string, a sequence file (FASTA-format), and a \
	record id and extracts a sequence of specified size following the \
	sequence pattern. Importantly, the regular expression search term \
	syntax is acceptable in the provided sequence pattern, although \
	anything dealing with case will be ignored. (The FASTA-formatted \
	sequence file is assumed by default to be \
	a multi-FASTA, i.e., multiple sequences in the provided file, \
	although it definitely doesn't have to be. In case it is only a \
	single sequence, the record id becomes moot, see below.) A sequence \
	string of the specified length will be returned. Redirect the output \
	to a file if that is what is needed. \
	**** Script by Wayne Decatur \
	(fomightez @ github) ***")

	parser.add_argument("sequence_file", help="Name of sequence file to \
	use as input. Must be FASTA format. Can be a \
	multi-FASTA file, i.e., multiple sequences in FASTA format in one \
	file.", metavar="SEQUENCE_FILE")
	parser.add_argument("record_id", help="Specific identifier of sequence \
	entry in sequence file to mine. If the provided sequence file only \
	contains one sequence, that sequence will be mined and what is provided \
	for this parameter will be ignored. In other words, if the sequence \
	file is not a multi-FASTA file, you don't need to determine the \
	identifier and can instead just enter `blahblah` or any other \
	nonsensical string in this spot.", metavar="RECORD_ID")
	parser.add_argument("pattern", help="Sequence or sequence pattern to use \
	to locate site after which to get the sequence. Regular expressions \
	are accepted here; however any information about case will be ignored \
	as the provided sequence pattern and sequence will both be converted \
	to lower case to check for a match.", metavar="PATTERN")
	parser.add_argument("amount_to_get", type=int, help="Number (integer) of \
	residues \
	to retrieve following the match to the sequence. The length of this \
	sequence is to be given in common terms, where the first item is \
	referenced as `1` ,and so a provided argument of \
	`1` would a single residue following the match would be returned.",
	metavar="NUMBER_TO_GET")
	parser.add_argument('-ld', '--leave_dashes', help="Add this flag when \
	calling the script in \
	order to be able to use gaps (represented as dashes) in the pattern \
	required to match. I.E., for matching with an aligned FASTA file. \
	(*ATYPICAL.*)", action="store_true")


	#I would also like trigger help to display if no arguments provided because
	# need at least one for url
	if len(sys.argv)==1: #from http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu
	parser.print_help()
	sys.exit(1)
	args = parser.parse_args()
	sequence_file = args.sequence_file
	record_id = args.record_id
	seq_to_find = args.pattern
	amount_to_get = args.amount_to_get
	if args.leave_dashes:
	filter_dashes = False
	else:
	filter_dashes = True





	# from plot_expression_across_chromosomes.py
	parser = argparse.ArgumentParser(prog='plot_expression_across_chromosomes.py',
	description="plot_expression_across_chromosomes.py plots a ratio of \
	expression values across chromosomes or scaffolds of a genome to highlight \
	regions of deviation. Besides the options listed here, there are several \
	`USER ADJUSTABLE VALUES` inside the script that can be edited for easy \
	customization. A similar plot is called a Manhattan plot and this \
	implementation borrows the plotting approach and some of the features from \
	Brent Pedersen's awesome `manhattan-plot.py` script. \
	**** Script by Wayne Decatur \
	(fomightez @ github) ***")

	parser.add_argument("annotation", help="Name of file containing the genome \
	annotation. REQUIRED. This is needed to determine the order of individual \
	data points along the chromosome and how to display the data across \
	chromosomes or scaffolds.",
	type=argparse.FileType('r'), metavar="ANNOTATION_FILE")
	parser.add_argument("data", help="Name of file containing the summarized data \
	to plot, such as mean TPM or RPKM, etc. in tab-delimited form. REQUIRED. \
	See my script `plot_expression_across_chromosomes_from_raw.py` if you want \
	supply the individual `raw` data files with the level metric for each \
	sample and/or replicate.",
	type=argparse.FileType('r'), metavar="DATA_FILE")
	parser.add_argument('-cols', '--columns', action='store', type=str,
	default= '1,2,3', help="columns for gene, wild-type (baseline state) \
	expression value, experimental condition expression value, in that order. \
	This flag is used to specify the data in the summary file to be plotted. \
	Separate the column identifiers by commas, without spaces. \
	Default is `1,2,3`, where `1` indicates the first column, i.e., how \
	you'd refer to the columns in natural language (no zero-indexing). ")
	# based on
	# https://stackoverflow.com/questions/15753701/argparse-option-for-passing-a-list-as-option
	parser.add_argument("-l", "--lines",help=
	"add this flag to plot the expression level ratio value as lines \
	extending from the x-axis rather than points in space. (The resulting \
	aesthetic may resemble a city skyline for which the `manhattan plot` is \
	named.)",
	action="store_true")
	parser.add_argument('-chr', '--chrs', action='store', type=str,
	help="use this flag to limit plotting of the data to particular \
	chromosomes or scaffolds you specify immediately following this flag. \
	Separate the chromosome or scaffold identifiers by commas, without spaces. \
	Example use in a command is `--chrs I,IV,XVI`. \
	Default when this optional flag is not called is to plot that data for all \
	chromosomes or scaffolds. ") # based on
	# https://stackoverflow.com/questions/15753701/argparse-option-for-passing-a-list-as-option
	parser.add_argument("-nl", "--no_log",help=
	"add this flag to keep the expression level ratio to be plotted in the \
	common base 10 instead of converting to log2.",
	action="store_true")
	parser.add_argument("-nlim", "--no_limits",help=
	"add this flag to not impose a limit of above and below {} in plot window \
	when converting to log2. The cutoff can also be adjusted under \
	`user-adjustable settings` in the script. Issuing this flag has no effect \
	if all values are within +/- the cutoff interval or `--no_log` is used."
	.format(y_cutoff),
	action="store_true")
	parser.add_argument("-s", "--smooth",help=
	"add this flag to display a smoothing curve fit to the data points \
	(LOWESS) on a per chromosome basis. This option can enhance visualization \
	of deviations characteristic of aneuploidy and copy number variation across \
	the genome, both within and between chromosomes. Additionally, a \
	simplistically-based assesment will be made for aneuploidy at the \
	chromosome or scaffold level and a notice will be made as the program is \
	running if aneuploidy at the chromosome or scaffold level seems indicated \
	by this simple metric. Further examination is warranted regardless of \
	the result this automated assessment.",
	action="store_true")
	parser.add_argument('-ed', '--exp_desig', action='store', type=str,
	default= 'experimental', help="Allows changing the text used in y-axis \
	label to reference experimental sample. Following `--exp_desig` type what \
	you'd like to read there instead of `experimental`.")
	parser.add_argument('-bd', '--base_desig', action='store', type=str,
	default= 'wild\mathrm{-}type', help="Allows changing the text used in y-axis \
	label to reference wild-type or baseline sample. Following `--base_desig` \
	type what you'd like to read there instead of `wild-type`.")
	parser.add_argument("-svg", "--save_vg",help=
	"add this flag to save as vector graphics \
	(RECOMMENDED FOR PUBLICATION*) instead of default png. Not default or \
	saved alongside default because file size can get large due to the large \
	number of points.",
	action="store_true")
	parser.add_argument("-ndh", "--no_data_header",help=
	"add this flag if there is no data header or no first line of column names \
	in the data file. Otherwise, it is assumed there is and any item read as \
	the first gene identifier from the first line won't be highlighted as \
	missing from annotation.\
	IMPORTANTLY, this only affects feedback provided as script is run. If the \
	first line resembles data, i.e., numbers in specified columns, it will be \
	automagically parsed as if data. Remove the header or column labels line \
	from your summary data file on the off-chance this causes issues in your \
	resulting plot.",
	action="store_true")
	parser.add_argument('-ac', '--advance_color', action='store', type=int,
	default= '0', help="FOR ADVANCED USE.* Allows for advancing the color \
	selection iterator the specified number of times. The idea is it allows \
	the ability to control the color of the chromosome when specifying \
	a chromosome or scaffolds to plot so you could make the color match the \
	one used when all chromsome plotted if needed. Supply the number to \
	advance after the flag on the command line. For example, `-ac 4`.")

	#I would also like trigger help to display if no arguments provided because need at least one input file
	if len(sys.argv)==1: #from http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu
	parser.print_help()
	sys.exit(1)
	args = parser.parse_args()
	annotaton_file = args.annotation
	data_file = args.data
	data_columns_to_grab = [int(item) for item in args.columns.split(',')]
	no_log = args.no_log
	no_data_header = args.no_data_header
	lines = args.lines
	if args.chrs:
	if "," in args.chrs:
	limit_to_chrs = args.chrs.split(',')
	else:
	# means only one item
	limit_to_chrs = [args.chrs] #has to be a list for passing to Pandas `isin()`
	else:
	limit_to_chrs = args.chrs # will make `limit_to_chrs` as `None`
	advance_color_increments = args.advance_color
	display_smooth = args.smooth
	no_limits = args.no_limits
	exp_designation = args.exp_desig
	baseline_designation = args.base_desig
	save_vg = args.save_vg




	parser.add_argument('-os', '--output_suffix', action='store', type=str,
	default= suffix_for_saving, help="OPTIONAL: Set a suffix for including in file \
	name of output. \
	If none provided, '{}' will be used.".format(suffix_for_saving))
	parser.add_argument('-col', '--column', action='store', type=int,
	default= '4', help="column for expression metric in data file. Assumes a \
	value of four to match fourth tab-separated column in Salmon-quantified \
	RNA-Seq data.")
	parser.add_argument('-dfo', '--df_output', action='store', type=str,
	default= df_save_as_name, help="OPTIONAL: Set file name for saving pickled \
	dataframe. If none provided, '{}' will be used. To force no dataframe to \
	be saved, enter `-dfo no_pickling` without quotes as output file \
	(ATYPICAL).".format(df_save_as_name))
	def valid_dibase(arg_string):
	if len(arg_string) == 2 and set(
	list(arg_string.upper())).issubset(nt_set):
	return arg_string
	else:
	msg = ("Not a valid set of letters representing two nucleotides: "
	"'{}'. Try something like `GC` or `AT`.".format(arg_string))
	raise argparse.ArgumentTypeError(arg_string)
	parser.add_argument("two_bases", type = valid_dibase, help="Two \
	letters representing one set of bases to check for imbalance vs. the \
	other two remaining out of the possible four from `GATC`. For example, \
	providing `GC` will result in checking for imbalance of nucleotides \
	`G` and `C` in close prox
	parser.add_argument('-bl', '--block_size', action='store', type=int,
	default= chunk_size, help="OPTIONAL: Use the `--block_size` flag followed \
	by an interger to provide a value to use as the span size (window of \
	basepairs) to analyze instead of the default of '{}'.".format(chunk_size))
	parser.add_argument('-ov', '--overlap_size', action='store', type=int,
	default= overlap_specified, help="OPTIONAL: Use the `--overlap_size` \
	flag followed by an integer to specify the amount of overlap to use \
	between the \
	analysis windows instead of the default of '{}'.".format(overlap_specified))


	parser.add_argument("input", nargs='?', help="OPTIONALName of the file \
	generated by Salmon \
	when run with your transcriptome of interest. Usually, this is \
	'"+input_file_name_default+"' &\
	if no input file name is provided then this will be used by \
	default.", default=input_file_name_default, metavar="INPUT_FILE")
	parser.add_argument("output", nargs='?', help="OPTIONALName of file to \
	save results. If BOTH input \
	and output file are not provided, '"+output_file_name_default+"', will \
	be used.", default=output_file_name_default, metavar="OUTPUT_FILE")
	# See
	# https://stackoverflow.com/questions/4480075/argparse-optional-positional-arguments
	# and
	# https://docs.python.org/2/library/argparse.html#nargs for use of `nargs='?'`
	# to make input and output file names optional. Note that the square brackets
	# shown in the usage out signify optional according to
	# https://stackoverflow.com/questions/4480075/argparse-optional-positional-arguments#comment40460395_4480202
	# , but because placed under positional I added clarifying text to help
	# description.
	# IF MODIFYING THIS SCRIPT FOR USE ELSEWHERE AND DON'T NEED/WANT THE INPUT AND
	# OUTPUT FILES TO BE OPTIONAL, remove `nargs` (& default?) BUT KEEP WHERE NOT
	# USING `argparse.FileType` AND USING `with open` AS CONISDERED MORE PYTHONIC.