vsbuffalo · March 19, 2020 16:26
diff --git a/Snakefile b/Snakefile
 import numpy as np
 np.random.seed(1)

 DATADIR = "sim_results/"
 SLIM = "/home/vsb/src/SLiM_build/slim "

 ## Parameters
 nreps = range(50)

 # ------- Shared Parameters -------
 Ns = [1000, 100]
 nmus = [1e-8]

 # ------- BGS Simlations -------
 # BGS paramters
 Us = [0.25, 0.5, 0.75, 1, 1.25, 1.5]
 bgs_rbps = [1e-8]
 selcoefs = [1e-1, 0.5e-1, 1e-2, 0]


 bgs_ouputs = ["stats.tsv", "neutfreqs.tsv"]

 bgs_pattern = ("bgs/bgs_{N}N_{rbp}rbp_{s}s_{nmu}nmu_{U}U_{nrep}_{sim_output}")

 bgs_results = expand(DATADIR + bgs_pattern,
                     s=selcoefs, nmu=nmus, U=Us, rbp=bgs_rbps,
                     N=Ns, nrep=nreps,
                     sim_output=bgs_ouputs)

 # this is a dummy rule so if we can run the BGS sims separately
 # (avoiding the "target may not contain wildcards" error 
 rule bgs_all:
  input:
    bgs_results

 rule bgs:
  input:
  output:
    DATADIR + bgs_pattern.replace("{sim_output}", "stats.tsv"),
    DATADIR + bgs_pattern.replace("{sim_output}", "neutfreqs.tsv")
  shell:
    """
    mkdir -p {DATADIR}/bgs/

    # the output files are automatically generated from the SLiM script
    echo "{SLIM} -d N={wildcards.N} \
     -d rbp={wildcards.rbp} -d nrep={wildcards.nrep} \
     -d s={wildcards.s} -d nmu={wildcards.nmu} -d U={wildcards.U} \
     -d run_generations=150 " | (tee {output[0]}) > {output[1]}
    """

 rule bgs_clean:
  shell:
    "find sim_results/bgs/ -maxdepth 1 -name 'bgs_*'  | xargs rm -rf "




 # ------- GSS Burnin Simlations -------
 # GSS paramters
 gss_rbps = [1e-8, 0.5e-8]    # rbp 
 alphas = [0.01]              # effect size
 tmus = [1e-8, 1e-9, 1e-10]   # trait mutation rate
 nmus = [1e-8]                # neutral mutation rate


 gss_burnin_outputs = ["fullsims.bin"]

 gss_burnin_pattern = ("gss_burnin/gss_burnin_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_"
                      "{tmu}tmu_{nrep}_{sim_output}")

 gss_burnin_results = expand(DATADIR + gss_burnin_pattern,
                     alpha=alphas, nmu=nmus, 
                     tmu=tmus, rbp=gss_rbps,
                     N=Ns, nrep=nreps,
                     sim_output=gss_burnin_outputs)

 # dummy rule 
 rule gss_burnin_all:
  input:
    gss_burnin_results

 rule gss_burnin:
  input:
  output:
    DATADIR + gss_burnin_pattern.replace("{sim_output}", "fullsims.bin"),
  shell:
    """
    mkdir -p DATADIR/gss_burnin/

    # the output files are automatically generated from the SLiM script
    echo "{SLIM} -d N={wildcards.N} -d rbp={wildcards.rbp} \
     -d tmu={wildcards.tmu} -d nmu={wildcards.nmu} \
     -d alpha={wildcards.alpha} -d nrep={wildcards.nrep} " | tee {output[0]} > {output[1]}
    """

 rule gss_burnin_clean:
  shell:
    "find sim_results/gss_burnin/ -maxdepth 1 -name 'gss_burnin_*'  | xargs rm -rf "


 # ------- Neutral Burnin Simlations -------
 # We use the same parameters as the GSS burnin

 neut_burnin_outputs = ["fullsims.bin"]

 neut_burnin_pattern = ("neutral/neut_burnin_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_"
                      "{tmu}tmu_{nrep}_{sim_output}")

 neut_burnin_results = expand(DATADIR + neut_burnin_pattern,
                             alpha=alphas, nmu=nmus, 
                             tmu=tmus, rbp=gss_rbps,
                             N=Ns, nrep=nreps,
                             sim_output=neut_burnin_outputs)

 # dummy rule 
 rule neut_burnin_all:
  input:
    neut_burnin_results

 rule neut_burnin:
  input:
  output:
    DATADIR + neut_burnin_pattern.replace("{sim_output}", "fullsims.bin"),
  shell:
    """
    mkdir -p {DATADIR}/neutral/

    # the output files are automatically generated from the SLiM script
    echo "{SLIM} -d N={wildcards.N} -d rbp={wildcards.rbp} \
    -d tmu={wildcards.tmu} -d nmu={wildcards.nmu} \
    -d alpha={wildcards.alpha} -d nrep={wildcards.nrep}  " } tee {output[0]} > {output[1]}
    """

 rule neut_burnin_clean:
  shell:
    "find ../data/sims -maxdepth 1 -name 'neut_burnin_*'  | xargs rm -rf "



 # ------- Sampled Line Simualtions, Optimum Shift -------
 # Optimum shift parameters
 # we borrow the following parameters from the burnin:
 # alpha, gss_rbps, tmus, nmus, Ns
 # these parameters *need* to be borrowed, since these files rely on those files.
 shift_moving = [0.001, 0.01]
 shift_sudden = [0.1, 0.5, 1]
 shifttype = ['converge', 'single', 'diverge']
 shifttime = [5]
 sampleN = [50, 100, 200, 1000]

 optshift_outputs = ["stats.tsv", "subpop1_neutfreqs.tsv", "subpop2_neutfreqs.tsv"]

 optshift_pattern = ("split_gss/split_gss_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_" + 
                    "{tmu}tmu_{shift}shift_{shifttime}shifttime_{moving}moving_" + 
                    "{shifttype}shifttype_{sampleN}sampleN_{nrep}_{sim_output}")
 

 optshift_results_moving = expand(DATADIR + optshift_pattern,
                                alpha=alphas, nmu=nmus, 
                                tmu=tmus, rbp=gss_rbps,
                                N=Ns, nrep=nreps,
                                moving=['T'], 
                                sampleN=sampleN,
                                shifttype=shifttype,
                                shifttime=shifttime,
                                shift=shift_moving,
                                sim_output=optshift_outputs)

 optshift_results_sudden = expand(DATADIR + optshift_pattern,
                                 alpha=alphas, nmu=nmus, 
                                 tmu=tmus, rbp=gss_rbps,
                                 N=Ns, nrep=nreps,
                                 moving=['F'], 
                                 sampleN=sampleN,
                                 shifttype=shifttype,
                                 shifttime=shifttime,
                                 shift=shift_sudden,
                                 sim_output=optshift_outputs)
 
 optshift_results = (optshift_results_moving + optshift_results_sudden)

 #print("** " + "\n** ".join(optshift_results)) 

 # dummy rule 
 rule optshift_all:
  input:
    optshift_results

 rule optshift:
  input:
  output:
    DATADIR + optshift_pattern.replace("{sim_output}", optshift_outputs[0]),
    DATADIR + optshift_pattern.replace("{sim_output}", optshift_outputs[1]),
    DATADIR + optshift_pattern.replace("{sim_output}", optshift_outputs[2]),
  params:
    # build up the corresponding burnin file from the parameters
    burnin_pop = DATADIR + gss_burnin_pattern.replace("{sim_output}", "fullsims.bin")
  shell:
    """
    mkdir -p {DATADIR}/split_gss/

    # the output files are automatically generated from the SLiM script
    echo "{SLIM} -d \"burninpop='{params.burnin_pop}'\" \
    -d N={wildcards.N} -d rbp={wildcards.rbp} \
    -d tmu={wildcards.tmu} -d nmu={wildcards.nmu} \
    -d alpha={wildcards.alpha} -d nrep={wildcards.nrep} \
    -d shift={wildcards.shift} -d moving={wildcards.moving} \
    -d sampleN={wildcards.sampleN} -d shifttime={wildcards.shifttime} \
    -d \"shifttype='{wildcards.shifttype}'\" " | tee {output[0]} | tee {output[1]} > {output[2]}
    """

 rule optshift_clean:
  shell:
    "find sim_results/split_gss/ -maxdepth 1 -name 'split_gss_*'  | xargs rm -rf "


 # ------- Sampled Line Simualtions, Truncation Selection -------
 # Truncation selection parameters
 # we borrow the following parameters from the burnin:
 # alpha, gss_rbps, tmus, nmus, Ns
 # parameters borrowed from optimum shift:
 # sampleN, shifttype, shifttime
 # these parameters *need* to be borrowed, since these files rely on those files.

 # tail probabilities
 tail = [0.01, 0.1, 0.25, 0.5]

 trunc_outputs = ["stats.tsv", "subpop1_neutfreqs.tsv", "subpop2_neutfreqs.tsv"]

 trunc_pattern = ("split_trunc/split_trunc_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_" + 
                 "{tmu}tmu_{tail}tail_{shifttime}shifttime_" + 
                 "{shifttype}shifttype_{sampleN}sampleN_{nrep}_{sim_output}")
 

 trunc_results = expand(DATADIR + trunc_pattern,
                       alpha=alphas, nmu=nmus, 
                       tmu=tmus, rbp=gss_rbps,
                       N=Ns, nrep=nreps,
                       shifttime=shifttime,
                       shifttype=shifttype,
                       sampleN=sampleN,
                       tail=tail,
                       sim_output=optshift_outputs)

 # dummy rule 
 rule trunc_all:
  input:
    trunc_results

 rule trunc:
  input:
  output:
    DATADIR + trunc_pattern.replace("{sim_output}", trunc_outputs[0]),
    DATADIR + trunc_pattern.replace("{sim_output}", trunc_outputs[1]),
    DATADIR + trunc_pattern.replace("{sim_output}", trunc_outputs[2]),
  params:
    # build up the corresponding burnin file from the parameters
    burnin_pop = DATADIR + gss_burnin_pattern.replace("{sim_output}", "fullsims.bin")
  shell:
    """
    mkdir -p {DATADIR}/split_trunc/

    # the output files are automatically generated from the SLiM script
    echo "{SLIM} -d \"burninpop='{params.burnin_pop}'\" \
    -d N={wildcards.N} -d rbp={wildcards.rbp} \
    -d tmu={wildcards.tmu} -d nmu={wildcards.nmu}  \
    -d alpha={wildcards.alpha} -d nrep={wildcards.nrep}  \
    -d tail={wildcards.tail} -d sampleN={wildcards.sampleN}  \
    -d shifttime={wildcards.shifttime} \
    -d \"shifttype='{wildcards.shifttype}'\" | tee {output[0]} | tee {output[1]} > {output[2]}
    """

 rule trunc_clean:
  shell:
    "find /split_trunc/ -maxdepth 1 -name 'split_trunc_*'  | xargs rm -rf "



 # ------- All Simulations -------
 all_results = bgs_results + optshift_results + trunc_results
 #print(all_results)

 rule all:
  input: 
    all_results
	import numpy as np
	np.random.seed(1)

	DATADIR = "sim_results/"
	SLIM = "/home/vsb/src/SLiM_build/slim "

	## Parameters
	nreps = range(50)

	# ------- Shared Parameters -------
	Ns = [1000, 100]
	nmus = [1e-8]

	# ------- BGS Simlations -------
	# BGS paramters
	Us = [0.25, 0.5, 0.75, 1, 1.25, 1.5]
	bgs_rbps = [1e-8]
	selcoefs = [1e-1, 0.5e-1, 1e-2, 0]


	bgs_ouputs = ["stats.tsv", "neutfreqs.tsv"]

	bgs_pattern = ("bgs/bgs_{N}N_{rbp}rbp_{s}s_{nmu}nmu_{U}U_{nrep}_{sim_output}")

	bgs_results = expand(DATADIR + bgs_pattern,
	s=selcoefs, nmu=nmus, U=Us, rbp=bgs_rbps,
	N=Ns, nrep=nreps,
	sim_output=bgs_ouputs)

	# this is a dummy rule so if we can run the BGS sims separately
	# (avoiding the "target may not contain wildcards" error
	rule bgs_all:
	input:
	bgs_results

	rule bgs:
	input:
	output:
	DATADIR + bgs_pattern.replace("{sim_output}", "stats.tsv"),
	DATADIR + bgs_pattern.replace("{sim_output}", "neutfreqs.tsv")
	shell:
	"""
	mkdir -p {DATADIR}/bgs/

	# the output files are automatically generated from the SLiM script
	echo "{SLIM} -d N={wildcards.N} \
	-d rbp={wildcards.rbp} -d nrep={wildcards.nrep} \
	-d s={wildcards.s} -d nmu={wildcards.nmu} -d U={wildcards.U} \
	-d run_generations=150 " \| (tee {output[0]}) > {output[1]}
	"""

	rule bgs_clean:
	shell:
	"find sim_results/bgs/ -maxdepth 1 -name 'bgs_*' \| xargs rm -rf "




	# ------- GSS Burnin Simlations -------
	# GSS paramters
	gss_rbps = [1e-8, 0.5e-8] # rbp
	alphas = [0.01] # effect size
	tmus = [1e-8, 1e-9, 1e-10] # trait mutation rate
	nmus = [1e-8] # neutral mutation rate


	gss_burnin_outputs = ["fullsims.bin"]

	gss_burnin_pattern = ("gss_burnin/gss_burnin_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_"
	"{tmu}tmu_{nrep}_{sim_output}")

	gss_burnin_results = expand(DATADIR + gss_burnin_pattern,
	alpha=alphas, nmu=nmus,
	tmu=tmus, rbp=gss_rbps,
	N=Ns, nrep=nreps,
	sim_output=gss_burnin_outputs)

	# dummy rule
	rule gss_burnin_all:
	input:
	gss_burnin_results

	rule gss_burnin:
	input:
	output:
	DATADIR + gss_burnin_pattern.replace("{sim_output}", "fullsims.bin"),
	shell:
	"""
	mkdir -p DATADIR/gss_burnin/

	# the output files are automatically generated from the SLiM script
	echo "{SLIM} -d N={wildcards.N} -d rbp={wildcards.rbp} \
	-d tmu={wildcards.tmu} -d nmu={wildcards.nmu} \
	-d alpha={wildcards.alpha} -d nrep={wildcards.nrep} " \| tee {output[0]} > {output[1]}
	"""

	rule gss_burnin_clean:
	shell:
	"find sim_results/gss_burnin/ -maxdepth 1 -name 'gss_burnin_*' \| xargs rm -rf "


	# ------- Neutral Burnin Simlations -------
	# We use the same parameters as the GSS burnin

	neut_burnin_outputs = ["fullsims.bin"]

	neut_burnin_pattern = ("neutral/neut_burnin_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_"
	"{tmu}tmu_{nrep}_{sim_output}")

	neut_burnin_results = expand(DATADIR + neut_burnin_pattern,
	alpha=alphas, nmu=nmus,
	tmu=tmus, rbp=gss_rbps,
	N=Ns, nrep=nreps,
	sim_output=neut_burnin_outputs)

	# dummy rule
	rule neut_burnin_all:
	input:
	neut_burnin_results

	rule neut_burnin:
	input:
	output:
	DATADIR + neut_burnin_pattern.replace("{sim_output}", "fullsims.bin"),
	shell:
	"""
	mkdir -p {DATADIR}/neutral/

	# the output files are automatically generated from the SLiM script
	echo "{SLIM} -d N={wildcards.N} -d rbp={wildcards.rbp} \
	-d tmu={wildcards.tmu} -d nmu={wildcards.nmu} \
	-d alpha={wildcards.alpha} -d nrep={wildcards.nrep} " } tee {output[0]} > {output[1]}
	"""

	rule neut_burnin_clean:
	shell:
	"find ../data/sims -maxdepth 1 -name 'neut_burnin_*' \| xargs rm -rf "



	# ------- Sampled Line Simualtions, Optimum Shift -------
	# Optimum shift parameters
	# we borrow the following parameters from the burnin:
	# alpha, gss_rbps, tmus, nmus, Ns
	# these parameters need to be borrowed, since these files rely on those files.
	shift_moving = [0.001, 0.01]
	shift_sudden = [0.1, 0.5, 1]
	shifttype = ['converge', 'single', 'diverge']
	shifttime = [5]
	sampleN = [50, 100, 200, 1000]

	optshift_outputs = ["stats.tsv", "subpop1_neutfreqs.tsv", "subpop2_neutfreqs.tsv"]

	optshift_pattern = ("split_gss/split_gss_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_" +
	"{tmu}tmu_{shift}shift_{shifttime}shifttime_{moving}moving_" +
	"{shifttype}shifttype_{sampleN}sampleN_{nrep}_{sim_output}")


	optshift_results_moving = expand(DATADIR + optshift_pattern,
	alpha=alphas, nmu=nmus,
	tmu=tmus, rbp=gss_rbps,
	N=Ns, nrep=nreps,
	moving=['T'],
	sampleN=sampleN,
	shifttype=shifttype,
	shifttime=shifttime,
	shift=shift_moving,
	sim_output=optshift_outputs)

	optshift_results_sudden = expand(DATADIR + optshift_pattern,
	alpha=alphas, nmu=nmus,
	tmu=tmus, rbp=gss_rbps,
	N=Ns, nrep=nreps,
	moving=['F'],
	sampleN=sampleN,
	shifttype=shifttype,
	shifttime=shifttime,
	shift=shift_sudden,
	sim_output=optshift_outputs)

	optshift_results = (optshift_results_moving + optshift_results_sudden)

	#print(" " + "\n ".join(optshift_results))

	# dummy rule
	rule optshift_all:
	input:
	optshift_results

	rule optshift:
	input:
	output:
	DATADIR + optshift_pattern.replace("{sim_output}", optshift_outputs[0]),
	DATADIR + optshift_pattern.replace("{sim_output}", optshift_outputs[1]),
	DATADIR + optshift_pattern.replace("{sim_output}", optshift_outputs[2]),
	params:
	# build up the corresponding burnin file from the parameters
	burnin_pop = DATADIR + gss_burnin_pattern.replace("{sim_output}", "fullsims.bin")
	shell:
	"""
	mkdir -p {DATADIR}/split_gss/

	# the output files are automatically generated from the SLiM script
	echo "{SLIM} -d \"burninpop='{params.burnin_pop}'\" \
	-d N={wildcards.N} -d rbp={wildcards.rbp} \
	-d tmu={wildcards.tmu} -d nmu={wildcards.nmu} \
	-d alpha={wildcards.alpha} -d nrep={wildcards.nrep} \
	-d shift={wildcards.shift} -d moving={wildcards.moving} \
	-d sampleN={wildcards.sampleN} -d shifttime={wildcards.shifttime} \
	-d \"shifttype='{wildcards.shifttype}'\" " \| tee {output[0]} \| tee {output[1]} > {output[2]}
	"""

	rule optshift_clean:
	shell:
	"find sim_results/split_gss/ -maxdepth 1 -name 'split_gss_*' \| xargs rm -rf "


	# ------- Sampled Line Simualtions, Truncation Selection -------
	# Truncation selection parameters
	# we borrow the following parameters from the burnin:
	# alpha, gss_rbps, tmus, nmus, Ns
	# parameters borrowed from optimum shift:
	# sampleN, shifttype, shifttime
	# these parameters need to be borrowed, since these files rely on those files.

	# tail probabilities
	tail = [0.01, 0.1, 0.25, 0.5]

	trunc_outputs = ["stats.tsv", "subpop1_neutfreqs.tsv", "subpop2_neutfreqs.tsv"]

	trunc_pattern = ("split_trunc/split_trunc_{N}N_{rbp}rbp_{alpha}alpha_{nmu}nmu_" +
	"{tmu}tmu_{tail}tail_{shifttime}shifttime_" +
	"{shifttype}shifttype_{sampleN}sampleN_{nrep}_{sim_output}")


	trunc_results = expand(DATADIR + trunc_pattern,
	alpha=alphas, nmu=nmus,
	tmu=tmus, rbp=gss_rbps,
	N=Ns, nrep=nreps,
	shifttime=shifttime,
	shifttype=shifttype,
	sampleN=sampleN,
	tail=tail,
	sim_output=optshift_outputs)

	# dummy rule
	rule trunc_all:
	input:
	trunc_results

	rule trunc:
	input:
	output:
	DATADIR + trunc_pattern.replace("{sim_output}", trunc_outputs[0]),
	DATADIR + trunc_pattern.replace("{sim_output}", trunc_outputs[1]),
	DATADIR + trunc_pattern.replace("{sim_output}", trunc_outputs[2]),
	params:
	# build up the corresponding burnin file from the parameters
	burnin_pop = DATADIR + gss_burnin_pattern.replace("{sim_output}", "fullsims.bin")
	shell:
	"""
	mkdir -p {DATADIR}/split_trunc/

	# the output files are automatically generated from the SLiM script
	echo "{SLIM} -d \"burninpop='{params.burnin_pop}'\" \
	-d N={wildcards.N} -d rbp={wildcards.rbp} \
	-d tmu={wildcards.tmu} -d nmu={wildcards.nmu} \
	-d alpha={wildcards.alpha} -d nrep={wildcards.nrep} \
	-d tail={wildcards.tail} -d sampleN={wildcards.sampleN} \
	-d shifttime={wildcards.shifttime} \
	-d \"shifttype='{wildcards.shifttype}'\" \| tee {output[0]} \| tee {output[1]} > {output[2]}
	"""

	rule trunc_clean:
	shell:
	"find /split_trunc/ -maxdepth 1 -name 'split_trunc_*' \| xargs rm -rf "



	# ------- All Simulations -------
	all_results = bgs_results + optshift_results + trunc_results
	#print(all_results)

	rule all:
	input:
	all_results