Created
April 22, 2020 13:57
-
-
Save sshleifer/4d51e77d836f013a9dc8e4fb15d88049 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
EN_DE_CONFIG = { | |
"bert-train-type-embeddings": "true", | |
"bert-type-vocab-size": "2", | |
"dec-cell": "gru", | |
"dec-cell-base-depth": "2", | |
"dec-cell-high-depth": "1", | |
"dec-depth": 6, | |
"dim-emb": "512", | |
"dim-rnn": "1024", #IGNORE | |
"dim-vocabs": ["58100", "58100"], | |
"enc-cell": "gru", # IGNORE | |
"enc-cell-depth": "1", | |
"enc-depth": 6, | |
"enc-type": "bidirectional", | |
"input-types": [], | |
"layer-normalization": "false", | |
"lemma-dim-emb": "0", | |
"right-left": "false", | |
"skip": "false", | |
"tied-embeddings": "false", | |
"tied-embeddings-all": "true", # "Tie all embedding layers and output layer" | |
"tied-embeddings-src": "false", | |
## FFN and AAN params identical | |
"transformer-aan-activation": "swish", | |
"transformer-aan-depth": "2", # What does AAN stand for? | |
"transformer-aan-nogate": "false", | |
"transformer-decoder-autoreg": "self-attention", | |
"transformer-dim-aan": "2048", | |
"transformer-dim-ffn": "2048", | |
"transformer-ffn-activation": "swish", | |
"transformer-ffn-depth": "2", | |
"transformer-guided-alignment-layer": "last", | |
"transformer-heads": 8, | |
"transformer-no-projection": "false", # Omit linear projection after multi-head attention (transformer) | |
"transformer-postprocess": "dan", #Dropout, add, normalize | |
"transformer-postprocess-emb": "d",# Operation after transformer embedding layer: d = dropout, a = add, n = normalize | |
"transformer-preprocess": "", # Operation before each transformer layer: d = dropout, a = add, n = normalize | |
"transformer-tied-layers": [], | |
"transformer-train-position-embeddings": "false", # Train positional embeddings instead of using static sinusoidal embeddings | |
"type": "transformer", | |
"ulr": "false", | |
"ulr-dim-emb": "0", | |
"ulr-trainable-transformation": "false", | |
"version": "v1.8.2 2111c28 2019-10-16 08:36:48 -0700", | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment