Last active
July 6, 2018 02:35
-
-
Save b0noI/174959e2056702e64a5ca39853a9816d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Dialog convertor | |
| git clone https://github.com/b0noI/dialog_converter.git | |
| cd dialog_converter | |
| git checkout b9cc7b7d82a959c80e5048b18e956841233c7688 | |
| python3 ./converter.py | |
| cd .. | |
| # Subwork NMT | |
| git clone https://github.com/b0noI/subword-nmt.git | |
| cd subword-nmt | |
| git checkout dbe97c8f95f14d06b2e46b8053e2e2f9b9bf804e | |
| cd .. | |
| # Create unique words (vocabulary) from training data | |
| subword-nmt/learn_joint_bpe_and_vocab.py --input dialog_converter/train.a dialog_converter/train.b -s 50000 -o code.bpe --write-vocabulary vocab.train.bpe.a vocab.train.bpe.b | |
| # Remove the tab from vocabulary | |
| sed -i '/\t/d' ./vocab.train.bpe.a | |
| sed -i '/\t/d' ./vocab.train.bpe.b | |
| # Remove the frequency from vocabulary | |
| cat vocab.train.bpe.a | cut -f1 --delimiter=' ' > revocab.train.bpe.a | |
| cat vocab.train.bpe.b | cut -f1 --delimiter=' ' > revocab.train.bpe.b | |
| # Generate training/test data | |
| subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.a --vocabulary-threshold 5 < dialog_converter/train.a > train.bpe.a | |
| subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.b --vocabulary-threshold 5 < dialog_converter/train.b > train.bpe.b | |
| subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.a --vocabulary-threshold 5 < dialog_converter/test.a > test.bpe.a | |
| subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.b --vocabulary-threshold 5 < dialog_converter/test.b > test.bpe.b | |
| # cloning NMT | |
| git clone https://github.com/tensorflow/nmt/ | |
| # Creating content dir with all the required files in it | |
| mkdir ~/content | |
| mkdir -p ~/content/nmt_model | |
| cp dialog_converter/train.a ~/content/nmt_model | |
| cp dialog_converter/train.b ~/content/nmt_model | |
| cp dialog_converter/test.a ~/content/nmt_model | |
| cp dialog_converter/test.b ~/content/nmt_model | |
| cp revocab.train.bpe.a ~/content/nmt_model | |
| cp revocab.train.bpe.b ~/content/nmt_model | |
| cp train.bpe.a ~/content/nmt_model | |
| cp test.bpe.a ~/content/nmt_model | |
| cp train.bpe.b ~/content/nmt_model | |
| cp test.bpe.b ~/content/nmt_model | |
| # actual training | |
| cd nmt | |
| python3 -m nmt.nmt \ | |
| --src=a --tgt=b \ | |
| --vocab_prefix=$HOME/content/nmt_model/revocab.train.bpe \ | |
| --train_prefix=$HOME/content/nmt_model/train.bpe \ | |
| --dev_prefix=$HOME/content/nmt_model/test.bpe \ | |
| --test_prefix=$HOME/content/nmt_model/test.bpe \ | |
| --out_dir=$HOME/content/nmt_model \ | |
| --num_train_steps=45000000 \ | |
| --steps_per_stats=100000 \ | |
| --num_layers=2 \ | |
| --num_units=128 \ | |
| --batch_size=16 \ | |
| --num_gpus=1 \ | |
| --dropout=0.2 \ | |
| --learning_rate=0.2 \ | |
| --metrics=bleu | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment