Skip to content

Instantly share code, notes, and snippets.

@b0noI
Last active July 6, 2018 02:35
Show Gist options
  • Select an option

  • Save b0noI/174959e2056702e64a5ca39853a9816d to your computer and use it in GitHub Desktop.

Select an option

Save b0noI/174959e2056702e64a5ca39853a9816d to your computer and use it in GitHub Desktop.
# Dialog convertor
git clone https://github.com/b0noI/dialog_converter.git
cd dialog_converter
git checkout b9cc7b7d82a959c80e5048b18e956841233c7688
python3 ./converter.py
cd ..
# Subwork NMT
git clone https://github.com/b0noI/subword-nmt.git
cd subword-nmt
git checkout dbe97c8f95f14d06b2e46b8053e2e2f9b9bf804e
cd ..
# Create unique words (vocabulary) from training data
subword-nmt/learn_joint_bpe_and_vocab.py --input dialog_converter/train.a dialog_converter/train.b -s 50000 -o code.bpe --write-vocabulary vocab.train.bpe.a vocab.train.bpe.b
# Remove the tab from vocabulary
sed -i '/\t/d' ./vocab.train.bpe.a
sed -i '/\t/d' ./vocab.train.bpe.b
# Remove the frequency from vocabulary
cat vocab.train.bpe.a | cut -f1 --delimiter=' ' > revocab.train.bpe.a
cat vocab.train.bpe.b | cut -f1 --delimiter=' ' > revocab.train.bpe.b
# Generate training/test data
subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.a --vocabulary-threshold 5 < dialog_converter/train.a > train.bpe.a
subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.b --vocabulary-threshold 5 < dialog_converter/train.b > train.bpe.b
subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.a --vocabulary-threshold 5 < dialog_converter/test.a > test.bpe.a
subword-nmt/apply_bpe.py -c code.bpe --vocabulary vocab.train.bpe.b --vocabulary-threshold 5 < dialog_converter/test.b > test.bpe.b
# cloning NMT
git clone https://github.com/tensorflow/nmt/
# Creating content dir with all the required files in it
mkdir ~/content
mkdir -p ~/content/nmt_model
cp dialog_converter/train.a ~/content/nmt_model
cp dialog_converter/train.b ~/content/nmt_model
cp dialog_converter/test.a ~/content/nmt_model
cp dialog_converter/test.b ~/content/nmt_model
cp revocab.train.bpe.a ~/content/nmt_model
cp revocab.train.bpe.b ~/content/nmt_model
cp train.bpe.a ~/content/nmt_model
cp test.bpe.a ~/content/nmt_model
cp train.bpe.b ~/content/nmt_model
cp test.bpe.b ~/content/nmt_model
# actual training
cd nmt
python3 -m nmt.nmt \
--src=a --tgt=b \
--vocab_prefix=$HOME/content/nmt_model/revocab.train.bpe \
--train_prefix=$HOME/content/nmt_model/train.bpe \
--dev_prefix=$HOME/content/nmt_model/test.bpe \
--test_prefix=$HOME/content/nmt_model/test.bpe \
--out_dir=$HOME/content/nmt_model \
--num_train_steps=45000000 \
--steps_per_stats=100000 \
--num_layers=2 \
--num_units=128 \
--batch_size=16 \
--num_gpus=1 \
--dropout=0.2 \
--learning_rate=0.2 \
--metrics=bleu
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment