Created
January 26, 2023 19:29
-
-
Save gregcaporaso/4b3e2e6cd585e77589cc24ef1263ca34 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# ----------------------------------------------------------------------------- | |
# Auto-generated by provenance_lib v.0.2.0 at 09:49:51 AM on 24 Jan, 2023 | |
# This document is a representation of the scholarly work of the creator of the | |
# QIIME 2 Results provided as input to provenance_lib, and may be protected by | |
# intellectual property law. Please respect all copyright restrictions and | |
# licenses governing the use, modification, and redistribution of this work. | |
# For User Support, post to the Community Plugin Support channel of the QIIME 2 | |
# Forum: https://forum.qiime2.org | |
# Documentation/issues: https://github.com/qiime2/provenance_lib | |
# UUIDs of all target QIIME 2 Results are shown at the end of the file | |
# Instructions for use: | |
# 1. Open this script in a text editor or IDE. Support for Python | |
# syntax highlighting is helpful. | |
# 2. Search or scan visually for '<' or '>' characters to find places where | |
# user input (e.g. a filepath or column name) is required. If syntax | |
# highlighting is enabled, '<' and '>' will appear as syntax errors. | |
# 3. Search for 'FIXME' comments in the script, and respond as directed. | |
# 4. Remove all 'FIXME' comments from the script completely. Failure to do so | |
# may result in 'Missing Option' errors | |
# 5. Adjust the arguments to the commands below to suit your data and metadata. | |
# If your data is not identical to that in the replayed analysis, | |
# changes may be required. (e.g. sample ids or rarefaction depth) | |
# 6. Optional: search for 'SAVE' comments in the script, commenting out the | |
# `some_result.save` lines for any Results you do not want saved to disk. | |
# 7. Activate your replay conda environment, and confirm you have installed all | |
# plugins used by the script. | |
# 8. Run this script with `python <path to this script>`, or paste commands | |
# into a python interpreter or jupyter notebook for an interactive analysis | |
# ----------------------------------------------------------------------------- | |
from qiime2 import Artifact | |
from qiime2 import Metadata | |
import qiime2.plugins.cutadapt.actions as cutadapt_actions | |
import qiime2.plugins.dada2.actions as dada2_actions | |
import qiime2.plugins.diversity.actions as diversity_actions | |
import qiime2.plugins.empress.actions as empress_actions | |
import qiime2.plugins.feature_classifier.actions as feature_classifier_actions | |
import qiime2.plugins.feature_table.actions as feature_table_actions | |
import qiime2.plugins.phylogeny.actions as phylogeny_actions | |
feature_data_taxonomy_0 = Artifact.import_data( | |
'FeatureData[Taxonomy]', | |
<your data here>, | |
) | |
# SAVE: comment out the following with '# ' to skip saving this Result to disk | |
feature_data_taxonomy_0.save('feature_data_taxonomy_0') | |
feature_data_sequence_0 = Artifact.import_data( | |
'FeatureData[Sequence]', | |
<your data here>, | |
) | |
# SAVE: comment out the following with '# ' to skip saving this Result to disk | |
feature_data_sequence_0.save('feature_data_sequence_0') | |
sample_data_sequences_with_quality_0 = Artifact.import_data( | |
'SampleData[SequencesWithQuality]', | |
<your data here>, | |
) | |
# SAVE: comment out the following with '# ' to skip saving this Result to disk | |
sample_data_sequences_with_quality_0.save('sample_data_sequences_with_quality_0') | |
classifier_0, = feature_classifier_actions.fit_classifier_naive_bayes( | |
reference_reads=feature_data_sequence_0, | |
reference_taxonomy=feature_data_taxonomy_0, | |
classify__alpha=0.001, | |
classify__chunk_size=20000, | |
classify__class_prior='null', | |
classify__fit_prior=False, | |
feat_ext__alternate_sign=False, | |
feat_ext__analyzer='char_wb', | |
feat_ext__binary=False, | |
feat_ext__decode_error='strict', | |
feat_ext__encoding='utf-8', | |
feat_ext__input='content', | |
feat_ext__lowercase=True, | |
feat_ext__n_features=8192, | |
feat_ext__ngram_range='[7, 7]', | |
feat_ext__norm='l2', | |
feat_ext__preprocessor='null', | |
feat_ext__stop_words='null', | |
feat_ext__strip_accents='null', | |
feat_ext__token_pattern='(?u)\\b\\w\\w+\\b', | |
feat_ext__tokenizer='null', | |
verbose=False, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
classifier_0.save('classifier_0') | |
trimmed_sequences_0, = cutadapt_actions.trim_single( | |
demultiplexed_sequences=sample_data_sequences_with_quality_0, | |
cores=15, | |
adapter=['CCGTCAATTCMTTTRAGT...CTGCTGCCTCCCGTAGG'], | |
error_rate=0.1, | |
indels=True, | |
times=1, | |
overlap=3, | |
match_read_wildcards=False, | |
match_adapter_wildcards=True, | |
minimum_length=1, | |
discard_untrimmed=True, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
trimmed_sequences_0.save('trimmed_sequences_0') | |
table_0, representative_sequences_0, _ = dada2_actions.denoise_pyro( | |
demultiplexed_seqs=trimmed_sequences_0, | |
trunc_len=150, | |
trim_left=0, | |
max_ee=2.0, | |
trunc_q=2, | |
max_len=0, | |
pooling_method='independent', | |
chimera_method='consensus', | |
min_fold_parent_over_abundance=1.0, | |
allow_one_off=False, | |
n_threads=0, | |
n_reads_learn=250000, | |
hashed_feature_ids=True, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
representative_sequences_0.save('representative_sequences_0') | |
table_0.save('table_0') | |
filtered_table_0, = feature_table_actions.filter_features( | |
table=table_0, | |
min_frequency=0, | |
min_samples=2, | |
exclude_ids=False, | |
filter_empty_samples=True, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
filtered_table_0.save('filtered_table_0') | |
filtered_data_0, = feature_table_actions.filter_seqs( | |
data=representative_sequences_0, | |
table=filtered_table_0, | |
exclude_ids=False, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
filtered_data_0.save('filtered_data_0') | |
_, _, _, rooted_tree_0 = phylogeny_actions.align_to_tree_mafft_fasttree( | |
sequences=filtered_data_0, | |
n_threads=16, | |
mask_max_gap_frequency=1.0, | |
mask_min_conservation=0.4, | |
parttree=False, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
rooted_tree_0.save('rooted_tree_0') | |
classification_0, = feature_classifier_actions.classify_sklearn( | |
reads=filtered_data_0, | |
classifier=classifier_0, | |
reads_per_batch='auto', | |
n_jobs=8, | |
pre_dispatch='2*n_jobs', | |
confidence=0.7, | |
read_orientation='auto', | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
classification_0.save('classification_0') | |
# Replay attempts to represent metadata inputs accurately, but metadata .tsv | |
# files are merged automatically by some interfaces, rendering distinctions | |
# between file inputs invisible in provenance. We output the recorded metadata | |
# to disk to enable visual inspection. | |
# The following command may have received additional metadata .tsv files. To | |
# confirm you have covered your metadata needs adequately, review the original | |
# metadata, saved at | |
# './recorded_metadata/diversity_core_metrics_phylogenetic_0/' | |
# NOTE: You may substitute already-loaded Metadata for the following, or cast a | |
# pandas.DataFrame to Metadata as needed. | |
metadata_0_md = Metadata.load(<your metadata filepath>) | |
action_results = diversity_actions.core_metrics_phylogenetic( | |
table=filtered_table_0, | |
phylogeny=rooted_tree_0, | |
sampling_depth=4000, | |
metadata=metadata_0_md, | |
with_replacement=False, | |
n_jobs_or_threads=16, | |
) | |
rarefied_table_0 = action_results.rarefied_table | |
unweighted_unifrac_pcoa_results_0 = action_results.unweighted_unifrac_pcoa_results | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
rarefied_table_0.save('rarefied_table_0') | |
unweighted_unifrac_pcoa_results_0.save('unweighted_unifrac_pcoa_results_0') | |
relative_frequency_table_0, = feature_table_actions.relative_frequency( | |
table=rarefied_table_0, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
relative_frequency_table_0.save('relative_frequency_table_0') | |
biplot_0, = diversity_actions.pcoa_biplot( | |
pcoa=unweighted_unifrac_pcoa_results_0, | |
features=relative_frequency_table_0, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
biplot_0.save('biplot_0') | |
# The following command may have received additional metadata .tsv files. To | |
# confirm you have covered your metadata needs adequately, review the original | |
# metadata, saved at './recorded_metadata/empress_community_plot_0/' | |
# NOTE: You may substitute already-loaded Metadata for the following, or cast a | |
# pandas.DataFrame to Metadata as needed. | |
sample_metadata_0_md = Metadata.load(<your metadata filepath>) | |
# The following command may have received additional metadata .tsv files. To | |
# confirm you have covered your metadata needs adequately, review the original | |
# metadata, saved at './recorded_metadata/empress_community_plot_0/' | |
classification_0_a_0_md = classification_0.view(Metadata) | |
visualization_0_viz, = empress_actions.community_plot( | |
tree=rooted_tree_0, | |
feature_table=relative_frequency_table_0, | |
pcoa=biplot_0, | |
sample_metadata=sample_metadata_0_md, | |
feature_metadata=classification_0_a_0_md, | |
ignore_missing_samples=False, | |
filter_extra_samples=False, | |
filter_missing_features=False, | |
number_of_features=5, | |
shear_to_table=True, | |
) | |
# SAVE: comment out the following with '# ' to skip saving Results to disk | |
visualization_0_viz.save('visualization_0_viz') | |
# ----------------------------------------------------------------------------- | |
# The following QIIME 2 Results were parsed to produce this script: | |
# a1a46509-66fc-4719-b72b-eaa23443bed4 | |
# ----------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment