-
-
Save wasertech/70e641c96a801713a81c389fe8755d07 to your computer and use it in GitHub Desktop.
version 1.4.0-alpha.1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@3412bf80716e:/code# cat VERSION | |
1.4.0-alpha.1 | |
root@3412bf80716e:/code# python -m coqui_stt_training.train --help | |
WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them. | |
usage: train.py [-h] [--train_files [TRAIN_FILES [TRAIN_FILES ...]]] | |
[--dev_files [DEV_FILES [DEV_FILES ...]]] | |
[--test_files [TEST_FILES [TEST_FILES ...]]] | |
[--metrics_files [METRICS_FILES [METRICS_FILES ...]]] | |
[--auto_input_dataset AUTO_INPUT_DATASET] | |
[--vocab_file VOCAB_FILE] [--read_buffer READ_BUFFER] | |
[--feature_cache FEATURE_CACHE] | |
[--cache_for_epochs CACHE_FOR_EPOCHS] | |
[--shuffle_batches true/false] [--shuffle_start SHUFFLE_START] | |
[--shuffle_buffer SHUFFLE_BUFFER] | |
[--feature_win_len FEATURE_WIN_LEN] | |
[--feature_win_step FEATURE_WIN_STEP] | |
[--audio_sample_rate AUDIO_SAMPLE_RATE] | |
[--normalize_sample_rate true/false] | |
[--augment [AUGMENT [AUGMENT ...]]] [--epochs EPOCHS] | |
[--dropout_rate DROPOUT_RATE] [--dropout_rate2 DROPOUT_RATE2] | |
[--dropout_rate3 DROPOUT_RATE3] | |
[--dropout_rate4 DROPOUT_RATE4] | |
[--dropout_rate5 DROPOUT_RATE5] | |
[--dropout_rate6 DROPOUT_RATE6] [--relu_clip RELU_CLIP] | |
[--beta1 BETA1] [--beta2 BETA2] [--epsilon EPSILON] | |
[--learning_rate LEARNING_RATE] | |
[--train_batch_size TRAIN_BATCH_SIZE] | |
[--dev_batch_size DEV_BATCH_SIZE] | |
[--test_batch_size TEST_BATCH_SIZE] | |
[--export_batch_size EXPORT_BATCH_SIZE] | |
[--skip_batch_test true/false] | |
[--inter_op_parallelism_threads INTER_OP_PARALLELISM_THREADS] | |
[--intra_op_parallelism_threads INTRA_OP_PARALLELISM_THREADS] | |
[--use_allow_growth true/false] [--load_cudnn true/false] | |
[--train_cudnn true/false] | |
[--automatic_mixed_precision true/false] | |
[--limit_test LIMIT_TEST] [--reverse_test true/false] | |
[--checkpoint_dir CHECKPOINT_DIR] | |
[--load_checkpoint_dir LOAD_CHECKPOINT_DIR] | |
[--save_checkpoint_dir SAVE_CHECKPOINT_DIR] | |
[--checkpoint_secs CHECKPOINT_SECS] | |
[--max_to_keep MAX_TO_KEEP] [--load_train LOAD_TRAIN] | |
[--load_evaluate LOAD_EVALUATE] | |
[--drop_source_layers DROP_SOURCE_LAYERS] | |
[--export_dir EXPORT_DIR] [--remove_export true/false] | |
[--export_tflite true/false] [--export_quantize true/false] | |
[--export_savedmodel true/false] [--n_steps N_STEPS] | |
[--export_zip true/false] | |
[--export_file_name EXPORT_FILE_NAME] | |
[--export_beam_width EXPORT_BEAM_WIDTH] | |
[--export_author_id EXPORT_AUTHOR_ID] | |
[--export_model_name EXPORT_MODEL_NAME] | |
[--export_model_version EXPORT_MODEL_VERSION] | |
[--export_contact_info EXPORT_CONTACT_INFO] | |
[--export_license EXPORT_LICENSE] | |
[--export_language EXPORT_LANGUAGE] | |
[--export_min_stt_version EXPORT_MIN_STT_VERSION] | |
[--export_max_stt_version EXPORT_MAX_STT_VERSION] | |
[--export_description EXPORT_DESCRIPTION] | |
[--log_level LOG_LEVEL] [--show_progressbar true/false] | |
[--log_placement true/false] [--report_count REPORT_COUNT] | |
[--summary_dir SUMMARY_DIR] | |
[--test_output_file TEST_OUTPUT_FILE] [--n_hidden N_HIDDEN] | |
[--layer_norm true/false] [--random_seed RANDOM_SEED] | |
[--early_stop true/false] [--es_epochs ES_EPOCHS] | |
[--es_min_delta ES_MIN_DELTA] | |
[--reduce_lr_on_plateau true/false] | |
[--plateau_epochs PLATEAU_EPOCHS] | |
[--plateau_reduction PLATEAU_REDUCTION] | |
[--force_initialize_learning_rate true/false] | |
[--bytes_output_mode true/false] | |
[--alphabet_config_path ALPHABET_CONFIG_PATH] | |
[--scorer_path SCORER_PATH] [--beam_width BEAM_WIDTH] | |
[--lm_alpha LM_ALPHA] [--lm_beta LM_BETA] | |
[--cutoff_prob CUTOFF_PROB] [--cutoff_top_n CUTOFF_TOP_N] | |
[--one_shot_infer ONE_SHOT_INFER] | |
[--lm_alpha_max LM_ALPHA_MAX] [--lm_beta_max LM_BETA_MAX] | |
[--n_trials N_TRIALS] | |
optional arguments: | |
-h, --help show this help message and exit | |
--train_files [TRAIN_FILES [TRAIN_FILES ...]] | |
Coqpit Field: space-separated list of files specifying | |
the datasets used for training. Multiple files will | |
get merged. If empty, training will not be run. | |
--dev_files [DEV_FILES [DEV_FILES ...]] | |
Coqpit Field: space-separated list of files specifying | |
the datasets used for validation. Multiple files will | |
get reported separately. If empty, validation will not | |
be run. | |
--test_files [TEST_FILES [TEST_FILES ...]] | |
Coqpit Field: space-separated list of files specifying | |
the datasets used for testing. Multiple files will get | |
reported separately. If empty, the model will not be | |
tested. | |
--metrics_files [METRICS_FILES [METRICS_FILES ...]] | |
Coqpit Field: space-separated list of files specifying | |
the datasets used for tracking of metrics (after | |
validation step). Currently the only metric is the CTC | |
loss but without affecting the tracking of best | |
validation loss. Multiple files will get reported | |
separately. If empty, metrics will not be computed. | |
--auto_input_dataset AUTO_INPUT_DATASET | |
Coqpit Field: path to a single CSV file to use for | |
training. Cannot be specified alongside --train_files, | |
--dev_files, --test_files. Training/validation/testing | |
subsets will be automatically generated from the | |
input, alongside with an alphabet file, if not already | |
present. | |
--vocab_file VOCAB_FILE | |
Coqpit Field: For use with evaluate_flashlight - text | |
file containing vocabulary of scorer, one word per | |
line. | |
--read_buffer READ_BUFFER | |
Coqpit Field: buffer-size for reading samples from | |
datasets (supports file-size suffixes KB, MB, GB, TB) | |
--feature_cache FEATURE_CACHE | |
Coqpit Field: cache MFCC features to disk to speed up | |
future training runs on the same data. This flag | |
specifies the path where cached features extracted | |
from --train_files will be saved. If empty, or if | |
online augmentation flags are enabled, caching will be | |
disabled. | |
--cache_for_epochs CACHE_FOR_EPOCHS | |
Coqpit Field: after how many epochs the feature cache | |
is invalidated again - 0 for "never" | |
--shuffle_batches true/false | |
Coqpit Field: reshuffle batches every epoch, starting | |
after N epochs, where N is set by the shuffle_start | |
flag. | |
--shuffle_start SHUFFLE_START | |
Coqpit Field: epoch to start shuffling batches from | |
(zero-based). | |
--shuffle_buffer SHUFFLE_BUFFER | |
Coqpit Field: how many batches to keep in shuffle | |
buffer when shuffling batches. | |
--feature_win_len FEATURE_WIN_LEN | |
Coqpit Field: feature extraction audio window length | |
in milliseconds | |
--feature_win_step FEATURE_WIN_STEP | |
Coqpit Field: feature extraction window step length in | |
milliseconds | |
--audio_sample_rate AUDIO_SAMPLE_RATE | |
Coqpit Field: sample rate value expected by model | |
--normalize_sample_rate true/false | |
Coqpit Field: normalize sample rate of all train_files | |
to --audio_sample_rate | |
--augment [AUGMENT [AUGMENT ...]] | |
Coqpit Field: space-separated list of augmenations for | |
training samples. Format is "--augment | |
operation1[param1=value1, ...] | |
operation2[param1=value1, ...] ..." | |
--epochs EPOCHS Coqpit Field: how many epochs (complete runs through | |
the train files) to train for | |
--dropout_rate DROPOUT_RATE | |
Coqpit Field: dropout rate for feedforward layers | |
--dropout_rate2 DROPOUT_RATE2 | |
Coqpit Field: dropout rate for layer 2 - defaults to | |
dropout_rate | |
--dropout_rate3 DROPOUT_RATE3 | |
Coqpit Field: dropout rate for layer 3 - defaults to | |
dropout_rate | |
--dropout_rate4 DROPOUT_RATE4 | |
Coqpit Field: dropout rate for layer 4 - defaults to | |
0.0 | |
--dropout_rate5 DROPOUT_RATE5 | |
Coqpit Field: dropout rate for layer 5 - defaults to | |
0.0 | |
--dropout_rate6 DROPOUT_RATE6 | |
Coqpit Field: dropout rate for layer 6 - defaults to | |
dropout_rate | |
--relu_clip RELU_CLIP | |
Coqpit Field: ReLU clipping value for non-recurrent | |
layers | |
--beta1 BETA1 Coqpit Field: beta 1 parameter of Adam optimizer | |
--beta2 BETA2 Coqpit Field: beta 2 parameter of Adam optimizer | |
--epsilon EPSILON Coqpit Field: epsilon parameter of Adam optimizer | |
--learning_rate LEARNING_RATE | |
Coqpit Field: learning rate of Adam optimizer | |
--train_batch_size TRAIN_BATCH_SIZE | |
Coqpit Field: number of elements in a training batch | |
--dev_batch_size DEV_BATCH_SIZE | |
Coqpit Field: number of elements in a validation batch | |
--test_batch_size TEST_BATCH_SIZE | |
Coqpit Field: number of elements in a test batch | |
--export_batch_size EXPORT_BATCH_SIZE | |
Coqpit Field: number of elements per batch on the | |
exported graph | |
--skip_batch_test true/false | |
Coqpit Field: skip batch size memory test before | |
training | |
--inter_op_parallelism_threads INTER_OP_PARALLELISM_THREADS | |
Coqpit Field: number of inter-op parallelism threads - | |
see tf.ConfigProto for more details. USE OF THIS FLAG | |
IS UNSUPPORTED | |
--intra_op_parallelism_threads INTRA_OP_PARALLELISM_THREADS | |
Coqpit Field: number of intra-op parallelism threads - | |
see tf.ConfigProto for more details. USE OF THIS FLAG | |
IS UNSUPPORTED | |
--use_allow_growth true/false | |
Coqpit Field: use Allow Growth flag which will | |
allocate only required amount of GPU memory and | |
prevent full allocation of available GPU memory | |
--load_cudnn true/false | |
Coqpit Field: Specifying this flag allows one to | |
convert a CuDNN RNN checkpoint to a checkpoint capable | |
of running on a CPU graph. | |
--train_cudnn true/false | |
Coqpit Field: use CuDNN RNN backend for training on | |
GPU. Note that checkpoints created with this flag can | |
only be used with CuDNN RNN, i.e. fine tuning on a CPU | |
device will not work | |
--automatic_mixed_precision true/false | |
Coqpit Field: whether to allow automatic mixed | |
precision training. USE OF THIS FLAG IS UNSUPPORTED. | |
Checkpoints created with automatic mixed precision | |
training will not be usable without mixed precision. | |
--limit_test LIMIT_TEST | |
Coqpit Field: maximum number of elements to use from | |
test set - 0 means no limit | |
--reverse_test true/false | |
Coqpit Field: if to reverse sample order of the test | |
set | |
--checkpoint_dir CHECKPOINT_DIR | |
Coqpit Field: directory from which checkpoints are | |
loaded and to which they are saved | |
--load_checkpoint_dir LOAD_CHECKPOINT_DIR | |
Coqpit Field: directory in which checkpoints are | |
stored | |
--save_checkpoint_dir SAVE_CHECKPOINT_DIR | |
Coqpit Field: directory to which checkpoints are saved | |
--checkpoint_secs CHECKPOINT_SECS | |
Coqpit Field: checkpoint saving interval in seconds | |
--max_to_keep MAX_TO_KEEP | |
Coqpit Field: number of checkpoint files to keep - | |
default value is 5 | |
--load_train LOAD_TRAIN | |
Coqpit Field: what checkpoint to load before starting | |
the training process. "last" for loading most recent | |
epoch checkpoint, "best" for loading best validation | |
loss checkpoint, "init" for initializing a new | |
checkpoint, "auto" for trying several options. | |
--load_evaluate LOAD_EVALUATE | |
Coqpit Field: what checkpoint to load for evaluation | |
tasks (test epochs, model export, single file | |
inference, etc). "last" for loading most recent epoch | |
checkpoint, "best" for loading best validation loss | |
checkpoint, "auto" for trying several options. | |
--drop_source_layers DROP_SOURCE_LAYERS | |
Coqpit Field: single integer for how many layers to | |
drop from source model (to drop just output == 1, drop | |
penultimate and output ==2, etc) | |
--export_dir EXPORT_DIR | |
Coqpit Field: directory in which exported models are | |
stored - if omitted, the model won't get exported | |
--remove_export true/false | |
Coqpit Field: whether to remove old exported models | |
--export_tflite true/false | |
Coqpit Field: export a graph ready for TF Lite engine | |
--export_quantize true/false | |
Coqpit Field: export a quantized model (optimized for | |
size) | |
--export_savedmodel true/false | |
Coqpit Field: export model in TF SavedModel format | |
--n_steps N_STEPS Coqpit Field: how many timesteps to process at once by | |
the export graph, higher values mean more latency | |
--export_zip true/false | |
Coqpit Field: export a TFLite model and package with | |
LM and info.json | |
--export_file_name EXPORT_FILE_NAME | |
Coqpit Field: name for the exported model file name | |
--export_beam_width EXPORT_BEAM_WIDTH | |
Coqpit Field: default beam width to embed into | |
exported graph | |
--export_author_id EXPORT_AUTHOR_ID | |
Coqpit Field: author of the exported model. GitHub | |
user or organization name used to uniquely identify | |
the author of this model | |
--export_model_name EXPORT_MODEL_NAME | |
Coqpit Field: name of the exported model. Must not | |
contain forward slashes. | |
--export_model_version EXPORT_MODEL_VERSION | |
Coqpit Field: semantic version of the exported model. | |
See https://semver.org/. This is fully controlled by | |
you as author of the model and has no required | |
connection with Coqui STT versions | |
--export_contact_info EXPORT_CONTACT_INFO | |
Coqpit Field: public contact information of the | |
author. Can be an email address, or a link to a | |
contact form, issue tracker, or discussion forum. Must | |
provide a way to reach the model authors | |
--export_license EXPORT_LICENSE | |
Coqpit Field: SPDX identifier of the license of the | |
exported model. See https://spdx.org/licenses/. If the | |
license does not have an SPDX identifier, use the | |
license name. | |
--export_language EXPORT_LANGUAGE | |
Coqpit Field: language the model was trained on - IETF | |
BCP 47 language tag including at least language, | |
script and region subtags. E.g. "en-Latn-UK" or "de- | |
Latn-DE" or "cmn-Hans-CN". Include as much info as you | |
can without loss of precision. For example, if a model | |
is trained on Scottish English, include the variant | |
subtag: "en-Latn-GB-Scotland". | |
--export_min_stt_version EXPORT_MIN_STT_VERSION | |
Coqpit Field: minimum Coqui STT version (inclusive) | |
the exported model is compatible with | |
--export_max_stt_version EXPORT_MAX_STT_VERSION | |
Coqpit Field: maximum Coqui STT version (inclusive) | |
the exported model is compatible with | |
--export_description EXPORT_DESCRIPTION | |
Coqpit Field: Freeform description of the model being | |
exported. Markdown accepted. You can also leave this | |
flag unchanged and edit the generated .md file | |
directly. Useful things to describe are demographic | |
and acoustic characteristics of the data used to train | |
the model, any architectural changes, names of public | |
datasets that were used when applicable, | |
hyperparameters used for training, evaluation results | |
on standard benchmark datasets, etc. | |
--log_level LOG_LEVEL | |
Coqpit Field: log level for console logs - 0: DEBUG, | |
1: INFO, 2: WARN, 3: ERROR | |
--show_progressbar true/false | |
Coqpit Field: Show progress for training, validation | |
and testing processes. Log level should be > 0. | |
--log_placement true/false | |
Coqpit Field: whether to log device placement of the | |
operators to the console | |
--report_count REPORT_COUNT | |
Coqpit Field: number of phrases for each of best WER, | |
median WER and worst WER to print out during a WER | |
report | |
--summary_dir SUMMARY_DIR | |
Coqpit Field: target directory for TensorBoard | |
summaries - defaults to directory "summaries" within | |
the checkpoint folder | |
--test_output_file TEST_OUTPUT_FILE | |
Coqpit Field: path to a file to save all | |
src/decoded/distance/loss tuples generated during a | |
test epoch | |
--n_hidden N_HIDDEN Coqpit Field: layer width to use when initialising | |
layers | |
--layer_norm true/false | |
Coqpit Field: wether to use layer-normalization after | |
each fully-connected layer (except the last one) | |
--random_seed RANDOM_SEED | |
Coqpit Field: default random seed that is used to | |
initialize variables | |
--early_stop true/false | |
Coqpit Field: Enable early stopping mechanism over | |
validation dataset. If validation is not being run, | |
early stopping is disabled. | |
--es_epochs ES_EPOCHS | |
Coqpit Field: Number of epochs with no improvement | |
after which training will be stopped. Loss is not | |
stored in the checkpoint so when checkpoint is revived | |
it starts the loss calculation from start at that | |
point | |
--es_min_delta ES_MIN_DELTA | |
Coqpit Field: Minimum change in loss to qualify as an | |
improvement. This value will also be used in Reduce | |
learning rate on plateau | |
--reduce_lr_on_plateau true/false | |
Coqpit Field: Enable reducing the learning rate if a | |
plateau is reached. This is the case if the validation | |
loss did not improve for some epochs. | |
--plateau_epochs PLATEAU_EPOCHS | |
Coqpit Field: Number of epochs to consider for RLROP. | |
Has to be smaller than es_epochs from early stopping | |
--plateau_reduction PLATEAU_REDUCTION | |
Coqpit Field: Multiplicative factor to apply to the | |
current learning rate if a plateau has occurred. | |
--force_initialize_learning_rate true/false | |
Coqpit Field: Force re-initialization of learning rate | |
which was previously reduced. | |
--bytes_output_mode true/false | |
Coqpit Field: enable Bytes Output Mode mode. When this | |
is used the model outputs UTF-8 byte values directly | |
rather than using an alphabet mapping. The | |
--alphabet_config_path option will be ignored. See the | |
training documentation for more details. | |
--alphabet_config_path ALPHABET_CONFIG_PATH | |
Coqpit Field: path to the configuration file | |
specifying the alphabet used by the network. See the | |
comment in data/alphabet.txt for a description of the | |
format. | |
--scorer_path SCORER_PATH | |
Coqpit Field: path to the external scorer file. | |
--beam_width BEAM_WIDTH | |
Coqpit Field: beam width used in the CTC decoder when | |
building candidate transcriptions | |
--lm_alpha LM_ALPHA Coqpit Field: the alpha hyperparameter of the CTC | |
decoder. Language Model weight. | |
--lm_beta LM_BETA Coqpit Field: the beta hyperparameter of the CTC | |
decoder. Word insertion weight. | |
--cutoff_prob CUTOFF_PROB | |
Coqpit Field: only consider characters until this | |
probability mass is reached. 1.0 = disabled. | |
--cutoff_top_n CUTOFF_TOP_N | |
Coqpit Field: only process this number of characters | |
sorted by probability mass for each time step. If | |
bigger than alphabet size, disabled. | |
--one_shot_infer ONE_SHOT_INFER | |
Coqpit Field: one-shot inference mode: specify a wav | |
file and the script will load the checkpoint and | |
perform inference on it. | |
--lm_alpha_max LM_ALPHA_MAX | |
Coqpit Field: the maximum of the alpha hyperparameter | |
of the CTC decoder explored during hyperparameter | |
optimization. Language Model weight. | |
--lm_beta_max LM_BETA_MAX | |
Coqpit Field: the maximum beta hyperparameter of the | |
CTC decoder explored during hyperparameter | |
optimization. Word insertion weight. | |
--n_trials N_TRIALS Coqpit Field: the number of trials to run during | |
hyperparameter optimization. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment