Skip to content

Instantly share code, notes, and snippets.

@gregcaporaso
Created January 26, 2023 19:29
Show Gist options
  • Save gregcaporaso/4b3e2e6cd585e77589cc24ef1263ca34 to your computer and use it in GitHub Desktop.
Save gregcaporaso/4b3e2e6cd585e77589cc24ef1263ca34 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Auto-generated by provenance_lib v.0.2.0 at 09:49:51 AM on 24 Jan, 2023
# This document is a representation of the scholarly work of the creator of the
# QIIME 2 Results provided as input to provenance_lib, and may be protected by
# intellectual property law. Please respect all copyright restrictions and
# licenses governing the use, modification, and redistribution of this work.
# For User Support, post to the Community Plugin Support channel of the QIIME 2
# Forum: https://forum.qiime2.org
# Documentation/issues: https://github.com/qiime2/provenance_lib
# UUIDs of all target QIIME 2 Results are shown at the end of the file
# Instructions for use:
# 1. Open this script in a text editor or IDE. Support for Python
# syntax highlighting is helpful.
# 2. Search or scan visually for '<' or '>' characters to find places where
# user input (e.g. a filepath or column name) is required. If syntax
# highlighting is enabled, '<' and '>' will appear as syntax errors.
# 3. Search for 'FIXME' comments in the script, and respond as directed.
# 4. Remove all 'FIXME' comments from the script completely. Failure to do so
# may result in 'Missing Option' errors
# 5. Adjust the arguments to the commands below to suit your data and metadata.
# If your data is not identical to that in the replayed analysis,
# changes may be required. (e.g. sample ids or rarefaction depth)
# 6. Optional: search for 'SAVE' comments in the script, commenting out the
# `some_result.save` lines for any Results you do not want saved to disk.
# 7. Activate your replay conda environment, and confirm you have installed all
# plugins used by the script.
# 8. Run this script with `python <path to this script>`, or paste commands
# into a python interpreter or jupyter notebook for an interactive analysis
# -----------------------------------------------------------------------------
from qiime2 import Artifact
from qiime2 import Metadata
import qiime2.plugins.cutadapt.actions as cutadapt_actions
import qiime2.plugins.dada2.actions as dada2_actions
import qiime2.plugins.diversity.actions as diversity_actions
import qiime2.plugins.empress.actions as empress_actions
import qiime2.plugins.feature_classifier.actions as feature_classifier_actions
import qiime2.plugins.feature_table.actions as feature_table_actions
import qiime2.plugins.phylogeny.actions as phylogeny_actions
feature_data_taxonomy_0 = Artifact.import_data(
'FeatureData[Taxonomy]',
<your data here>,
)
# SAVE: comment out the following with '# ' to skip saving this Result to disk
feature_data_taxonomy_0.save('feature_data_taxonomy_0')
feature_data_sequence_0 = Artifact.import_data(
'FeatureData[Sequence]',
<your data here>,
)
# SAVE: comment out the following with '# ' to skip saving this Result to disk
feature_data_sequence_0.save('feature_data_sequence_0')
sample_data_sequences_with_quality_0 = Artifact.import_data(
'SampleData[SequencesWithQuality]',
<your data here>,
)
# SAVE: comment out the following with '# ' to skip saving this Result to disk
sample_data_sequences_with_quality_0.save('sample_data_sequences_with_quality_0')
classifier_0, = feature_classifier_actions.fit_classifier_naive_bayes(
reference_reads=feature_data_sequence_0,
reference_taxonomy=feature_data_taxonomy_0,
classify__alpha=0.001,
classify__chunk_size=20000,
classify__class_prior='null',
classify__fit_prior=False,
feat_ext__alternate_sign=False,
feat_ext__analyzer='char_wb',
feat_ext__binary=False,
feat_ext__decode_error='strict',
feat_ext__encoding='utf-8',
feat_ext__input='content',
feat_ext__lowercase=True,
feat_ext__n_features=8192,
feat_ext__ngram_range='[7, 7]',
feat_ext__norm='l2',
feat_ext__preprocessor='null',
feat_ext__stop_words='null',
feat_ext__strip_accents='null',
feat_ext__token_pattern='(?u)\\b\\w\\w+\\b',
feat_ext__tokenizer='null',
verbose=False,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
classifier_0.save('classifier_0')
trimmed_sequences_0, = cutadapt_actions.trim_single(
demultiplexed_sequences=sample_data_sequences_with_quality_0,
cores=15,
adapter=['CCGTCAATTCMTTTRAGT...CTGCTGCCTCCCGTAGG'],
error_rate=0.1,
indels=True,
times=1,
overlap=3,
match_read_wildcards=False,
match_adapter_wildcards=True,
minimum_length=1,
discard_untrimmed=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
trimmed_sequences_0.save('trimmed_sequences_0')
table_0, representative_sequences_0, _ = dada2_actions.denoise_pyro(
demultiplexed_seqs=trimmed_sequences_0,
trunc_len=150,
trim_left=0,
max_ee=2.0,
trunc_q=2,
max_len=0,
pooling_method='independent',
chimera_method='consensus',
min_fold_parent_over_abundance=1.0,
allow_one_off=False,
n_threads=0,
n_reads_learn=250000,
hashed_feature_ids=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
representative_sequences_0.save('representative_sequences_0')
table_0.save('table_0')
filtered_table_0, = feature_table_actions.filter_features(
table=table_0,
min_frequency=0,
min_samples=2,
exclude_ids=False,
filter_empty_samples=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
filtered_table_0.save('filtered_table_0')
filtered_data_0, = feature_table_actions.filter_seqs(
data=representative_sequences_0,
table=filtered_table_0,
exclude_ids=False,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
filtered_data_0.save('filtered_data_0')
_, _, _, rooted_tree_0 = phylogeny_actions.align_to_tree_mafft_fasttree(
sequences=filtered_data_0,
n_threads=16,
mask_max_gap_frequency=1.0,
mask_min_conservation=0.4,
parttree=False,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
rooted_tree_0.save('rooted_tree_0')
classification_0, = feature_classifier_actions.classify_sklearn(
reads=filtered_data_0,
classifier=classifier_0,
reads_per_batch='auto',
n_jobs=8,
pre_dispatch='2*n_jobs',
confidence=0.7,
read_orientation='auto',
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
classification_0.save('classification_0')
# Replay attempts to represent metadata inputs accurately, but metadata .tsv
# files are merged automatically by some interfaces, rendering distinctions
# between file inputs invisible in provenance. We output the recorded metadata
# to disk to enable visual inspection.
# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the original
# metadata, saved at
# './recorded_metadata/diversity_core_metrics_phylogenetic_0/'
# NOTE: You may substitute already-loaded Metadata for the following, or cast a
# pandas.DataFrame to Metadata as needed.
metadata_0_md = Metadata.load(<your metadata filepath>)
action_results = diversity_actions.core_metrics_phylogenetic(
table=filtered_table_0,
phylogeny=rooted_tree_0,
sampling_depth=4000,
metadata=metadata_0_md,
with_replacement=False,
n_jobs_or_threads=16,
)
rarefied_table_0 = action_results.rarefied_table
unweighted_unifrac_pcoa_results_0 = action_results.unweighted_unifrac_pcoa_results
# SAVE: comment out the following with '# ' to skip saving Results to disk
rarefied_table_0.save('rarefied_table_0')
unweighted_unifrac_pcoa_results_0.save('unweighted_unifrac_pcoa_results_0')
relative_frequency_table_0, = feature_table_actions.relative_frequency(
table=rarefied_table_0,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
relative_frequency_table_0.save('relative_frequency_table_0')
biplot_0, = diversity_actions.pcoa_biplot(
pcoa=unweighted_unifrac_pcoa_results_0,
features=relative_frequency_table_0,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
biplot_0.save('biplot_0')
# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the original
# metadata, saved at './recorded_metadata/empress_community_plot_0/'
# NOTE: You may substitute already-loaded Metadata for the following, or cast a
# pandas.DataFrame to Metadata as needed.
sample_metadata_0_md = Metadata.load(<your metadata filepath>)
# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the original
# metadata, saved at './recorded_metadata/empress_community_plot_0/'
classification_0_a_0_md = classification_0.view(Metadata)
visualization_0_viz, = empress_actions.community_plot(
tree=rooted_tree_0,
feature_table=relative_frequency_table_0,
pcoa=biplot_0,
sample_metadata=sample_metadata_0_md,
feature_metadata=classification_0_a_0_md,
ignore_missing_samples=False,
filter_extra_samples=False,
filter_missing_features=False,
number_of_features=5,
shear_to_table=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
visualization_0_viz.save('visualization_0_viz')
# -----------------------------------------------------------------------------
# The following QIIME 2 Results were parsed to produce this script:
# a1a46509-66fc-4719-b72b-eaa23443bed4
# -----------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment