Last active
January 26, 2023 19:29
-
-
Save gregcaporaso/2c915878195614ade4e32895a61ace01 to your computer and use it in GitHub Desktop.
example provenance replay scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
############################################################################### | |
# Auto-generated by provenance_lib v.0.2.0 at 09:49:20 AM on 24 Jan, 2023 | |
# This document is a representation of the scholarly work of the creator of the | |
# QIIME 2 Results provided as input to provenance_lib, and may be protected by | |
# intellectual property law. Please respect all copyright restrictions and | |
# licenses governing the use, modification, and redistribution of this work. | |
# For User Support, post to the Community Plugin Support channel of the QIIME 2 | |
# Forum: https://forum.qiime2.org | |
# Documentation/issues: https://github.com/qiime2/provenance_lib | |
# UUIDs of all target QIIME 2 Results are shown at the end of the file | |
# Instructions for use: | |
# 1. Open this script in a text editor or IDE. Support for BASH | |
# syntax highlighting can be helpful. | |
# 2. Search or scan visually for '<' or '>' characters to find places where | |
# user input (e.g. a filepath or column name) is required. These must be | |
# replaced with your own values. E.g. <column name> -> 'patient_id'. | |
# Failure to remove '<' or '>' may result in `No such File ...` errors | |
# 3. Search for 'FIXME' comments in the script, and respond as directed. | |
# 4. Remove all 'FIXME' comments from the script completely. Failure to do so | |
# may result in 'Missing Option' errors | |
# 5. Adjust the arguments to the commands below to suit your data and metadata. | |
# If your data is not identical to that in the replayed analysis, | |
# changes may be required. (e.g. sample ids or rarefaction depth) | |
# 6. Optional: replace any filenames in this script that begin with 'XX' with | |
# unique file names to ensure they are preserved. QIIME 2 saves all outputs | |
# from all actions in this script to disk regardless of whether those | |
# outputs were in the original collection of replayed results. The filenames | |
# of "un-replayed" artifacts are prefixed with 'XX' so they may be easily | |
# located. These names are not guaranteed to be unique, so 'XX_table.qza' | |
# may be overwritten by another 'XX_table.qza' later in the script. | |
# 7. Activate your replay conda environment, and confirm you have installed all | |
# plugins used by the script. | |
# 8. Run this script with `bash <path to this script>`, or copy-paste commands | |
# into the terminal for a more interactive analysis. | |
# 9. Optional: to delete all results not required to produce the figures and | |
# data used to generate this script, navigate to the directory in which you | |
# ran the script and `rm XX*.qz*` | |
############################################################################### | |
# This tells bash to -e exit immediately if a command fails | |
# and -x show all commands in stdout so you can track progress | |
set -e -x | |
qiime tools import \ | |
--type 'FeatureData[Taxonomy]' \ | |
--input-path <your data here> \ | |
--output-path feature-data-taxonomy-0.qza | |
qiime tools import \ | |
--type 'FeatureData[Sequence]' \ | |
--input-path <your data here> \ | |
--output-path feature-data-sequence-0.qza | |
qiime tools import \ | |
--type 'SampleData[SequencesWithQuality]' \ | |
--input-path <your data here> \ | |
--output-path sample-data-sequences-with-quality-0.qza | |
qiime feature-classifier fit-classifier-naive-bayes \ | |
--i-reference-reads feature-data-sequence-0.qza \ | |
--i-reference-taxonomy feature-data-taxonomy-0.qza \ | |
--p-classify--alpha 0.001 \ | |
--p-classify--chunk-size 20000 \ | |
--p-classify--class-prior null \ | |
--p-no-classify--fit-prior \ | |
--p-no-feat-ext--alternate-sign \ | |
--p-feat-ext--analyzer char_wb \ | |
--p-no-feat-ext--binary \ | |
--p-feat-ext--decode-error strict \ | |
--p-feat-ext--encoding utf-8 \ | |
--p-feat-ext--input content \ | |
--p-feat-ext--lowercase \ | |
--p-feat-ext--n-features 8192 \ | |
--p-feat-ext--ngram-range '[7, 7]' \ | |
--p-feat-ext--norm l2 \ | |
--p-feat-ext--preprocessor null \ | |
--p-feat-ext--stop-words null \ | |
--p-feat-ext--strip-accents null \ | |
--p-feat-ext--token-pattern '(?u)\b\w\w+\b' \ | |
--p-feat-ext--tokenizer null \ | |
--p-no-verbose \ | |
--o-classifier classifier-0.qza | |
qiime cutadapt trim-single \ | |
--i-demultiplexed-sequences sample-data-sequences-with-quality-0.qza \ | |
--p-cores 15 \ | |
--p-adapter CCGTCAATTCMTTTRAGT...CTGCTGCCTCCCGTAGG \ | |
--p-error-rate 0.1 \ | |
--p-indels \ | |
--p-times 1 \ | |
--p-overlap 3 \ | |
--p-no-match-read-wildcards \ | |
--p-match-adapter-wildcards \ | |
--p-minimum-length 1 \ | |
--p-discard-untrimmed \ | |
--o-trimmed-sequences trimmed-sequences-0.qza | |
qiime dada2 denoise-pyro \ | |
--i-demultiplexed-seqs trimmed-sequences-0.qza \ | |
--p-trunc-len 150 \ | |
--p-trim-left 0 \ | |
--p-max-ee 2.0 \ | |
--p-trunc-q 2 \ | |
--p-max-len 0 \ | |
--p-pooling-method independent \ | |
--p-chimera-method consensus \ | |
--p-min-fold-parent-over-abundance 1.0 \ | |
--p-no-allow-one-off \ | |
--p-n-threads 0 \ | |
--p-n-reads-learn 250000 \ | |
--p-hashed-feature-ids \ | |
--o-representative-sequences representative-sequences-0.qza \ | |
--o-table table-0.qza \ | |
--o-denoising-stats XX_denoising_stats | |
qiime feature-table filter-features \ | |
--i-table table-0.qza \ | |
--p-min-frequency 0 \ | |
--p-min-samples 2 \ | |
--p-no-exclude-ids \ | |
--p-filter-empty-samples \ | |
--o-filtered-table filtered-table-0.qza | |
qiime feature-table filter-seqs \ | |
--i-data representative-sequences-0.qza \ | |
--i-table filtered-table-0.qza \ | |
--p-no-exclude-ids \ | |
--o-filtered-data filtered-data-0.qza | |
qiime phylogeny align-to-tree-mafft-fasttree \ | |
--i-sequences filtered-data-0.qza \ | |
--p-n-threads 16 \ | |
--p-mask-max-gap-frequency 1.0 \ | |
--p-mask-min-conservation 0.4 \ | |
--p-no-parttree \ | |
--o-rooted-tree rooted-tree-0.qza \ | |
--o-alignment XX_alignment \ | |
--o-masked-alignment XX_masked_alignment \ | |
--o-tree XX_tree | |
qiime feature-classifier classify-sklearn \ | |
--i-reads filtered-data-0.qza \ | |
--i-classifier classifier-0.qza \ | |
--p-reads-per-batch auto \ | |
--p-n-jobs 8 \ | |
--p-pre-dispatch '2*n_jobs' \ | |
--p-confidence 0.7 \ | |
--p-read-orientation auto \ | |
--o-classification classification-0.qza | |
# Replay attempts to represent metadata inputs accurately, but metadata .tsv | |
# files are merged automatically by some interfaces, rendering distinctions | |
# between file inputs invisible in provenance. We output the recorded | |
# metadata to disk to enable visual inspection. | |
# The following command may have received additional metadata .tsv files. To | |
# confirm you have covered your metadata needs adequately, review the | |
# original metadata, saved at | |
# './recorded_metadata/diversity_core_metrics_phylogenetic_0/' | |
qiime diversity core-metrics-phylogenetic \ | |
--i-table filtered-table-0.qza \ | |
--i-phylogeny rooted-tree-0.qza \ | |
--p-sampling-depth 4000 \ | |
--m-metadata-file <your metadata filepath> \ | |
--p-no-with-replacement \ | |
--p-n-jobs-or-threads 16 \ | |
--o-rarefied-table rarefied-table-0.qza \ | |
--o-unweighted-unifrac-pcoa-results unweighted-unifrac-pcoa-results-0.qza \ | |
--o-faith-pd-vector XX_faith_pd_vector \ | |
--o-observed-features-vector XX_observed_features_vector \ | |
--o-shannon-vector XX_shannon_vector \ | |
--o-evenness-vector XX_evenness_vector \ | |
--o-unweighted-unifrac-distance-matrix XX_unweighted_unifrac_distance_matrix \ | |
--o-weighted-unifrac-distance-matrix XX_weighted_unifrac_distance_matrix \ | |
--o-jaccard-distance-matrix XX_jaccard_distance_matrix \ | |
--o-bray-curtis-distance-matrix XX_bray_curtis_distance_matrix \ | |
--o-weighted-unifrac-pcoa-results XX_weighted_unifrac_pcoa_results \ | |
--o-jaccard-pcoa-results XX_jaccard_pcoa_results \ | |
--o-bray-curtis-pcoa-results XX_bray_curtis_pcoa_results \ | |
--o-unweighted-unifrac-emperor XX_unweighted_unifrac_emperor \ | |
--o-weighted-unifrac-emperor XX_weighted_unifrac_emperor \ | |
--o-jaccard-emperor XX_jaccard_emperor \ | |
--o-bray-curtis-emperor XX_bray_curtis_emperor | |
qiime feature-table relative-frequency \ | |
--i-table rarefied-table-0.qza \ | |
--o-relative-frequency-table relative-frequency-table-0.qza | |
qiime diversity pcoa-biplot \ | |
--i-pcoa unweighted-unifrac-pcoa-results-0.qza \ | |
--i-features relative-frequency-table-0.qza \ | |
--o-biplot biplot-0.qza | |
# The following command may have received additional metadata .tsv files. To | |
# confirm you have covered your metadata needs adequately, review the | |
# original metadata, saved at | |
# './recorded_metadata/empress_community_plot_0/' | |
# The following command may have received additional metadata .tsv files. To | |
# confirm you have covered your metadata needs adequately, review the | |
# original metadata, saved at | |
# './recorded_metadata/empress_community_plot_0/' | |
qiime empress community-plot \ | |
--i-tree rooted-tree-0.qza \ | |
--i-feature-table relative-frequency-table-0.qza \ | |
--i-pcoa biplot-0.qza \ | |
--m-sample-metadata-file <your metadata filepath> \ | |
--m-feature-metadata-file classification-0.qza \ | |
--p-no-ignore-missing-samples \ | |
--p-no-filter-extra-samples \ | |
--p-no-filter-missing-features \ | |
--p-number-of-features 5 \ | |
--p-shear-to-table \ | |
--o-visualization visualization-0.qzv | |
############################################################################### | |
# The following QIIME 2 Results were parsed to produce this script: | |
# a1a46509-66fc-4719-b72b-eaa23443bed4 | |
############################################################################### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment