Created
August 30, 2019 05:47
-
-
Save taylanbil/0f6ef92e8d083996b84552eb6856abb6 to your computer and use it in GitHub Desktop.
Fairseq Transformer on GPU
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Mon Aug 19 21:56:00 UTC 2019 | |
#!/bin/bash | |
taskname=fairseq_e2e_gpu | |
vol_fairseq=/home/taylanbil/fairseq/ | |
vol_data=/home/taylanbil/data/wmt18_en_de_bpej32k | |
#vol_data=/home/taylanbil/data/dummy | |
python_cli="ipython -i" | |
python_cli="python" | |
other_flags=" | |
--clip-norm 0.0 \ | |
--curriculum=4 \ | |
--num-workers=2 \ | |
--distributed-world-size=1 \ | |
" | |
$python_cli $vol_fairseq/train.py \ | |
$vol_data \ | |
--arch=transformer_vaswani_wmt_en_de_big \ | |
--max-source-positions=64 \ | |
--max-target-positions=64 \ | |
--required-batch-size-multiple=8 \ | |
--max-tokens=4096 \ | |
--no-save \ | |
--attention-dropout=0.1 \ | |
--no-progress-bar \ | |
--criterion=label_smoothed_cross_entropy \ | |
--log-interval=10 \ | |
--source-lang=en \ | |
--lr-scheduler=inverse_sqrt \ | |
--min-lr 1e-09 \ | |
--skip-invalid-size-inputs-valid-test \ | |
--target-lang=de \ | |
--label-smoothing=0.1 \ | |
--max-epoch=50 \ | |
--update-freq=1 \ | |
--optimizer adam \ | |
--warmup-init-lr 1e-07 \ | |
--lr 0.0005 \ | |
--warmup-updates 4000 \ | |
--adam-betas='(0.9,0.98)' \ | |
--fp16 \ | |
--share-all-embeddings \ | |
--dropout 0.3 \ | |
--weight-decay 0.0 \ | |
--valid-subset=valid | |
-------------- | |
nohup: ignoring input | |
| distributed init (rank 3): tcp://localhost:16094 | |
| distributed init (rank 1): tcp://localhost:16094 | |
| distributed init (rank 2): tcp://localhost:16094 | |
| distributed init (rank 0): tcp://localhost:16094 | |
| initialized host pytorch-gpu as rank 3 | |
| initialized host pytorch-gpu as rank 1 | |
| initialized host pytorch-gpu as rank 2 | |
| initialized host pytorch-gpu as rank 0 | |
Namespace(activation_dropout=0.0, activation_fn='relu', adam_betas='(0.9,0.98)', adam_eps=1e-08, adaptive_input=False, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer_vaswani_wmt_en_de_big', attention_dropout=0.1, best_checkpoint_metric='loss', bpe=None, bucket_cap_mb=25, clip_norm=25, cpu=False, criterion='label_smoothed_cross_entropy', curriculum=0, data='/home/taylanbil/data/wmt18_en_de_bpej32k', dataset_impl=None, ddp_backend='c10d', decoder_attention_heads=16, decoder_embed_dim=1024, decoder_embed_path=None, decoder_ffn_embed_dim=4096, decoder_input_dim=1024, decoder_layers=6, decoder_learned_pos=False, decoder_normalize_before=False, decoder_output_dim=1024, device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method='tcp://localhost:16094', distributed_no_spawn=False, distributed_port=-1, distributed_rank=0, distributed_world_size=4, dropout=0.3, encoder_attention_heads=16, encoder_embed_dim=1024, encoder_embed_path=None, encoder_ffn_embed_dim=4096, encoder_layers=6, encoder_learned_pos=False, encoder_normalize_before=False, find_unused_parameters=False, fix_batches_to_gpus=False, fp16=True, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, keep_interval_updates=-1, keep_last_epochs=-1, label_smoothing=0.1, lazy_load=False, left_pad_source='True', left_pad_target='False', log_format=None, log_interval=10, lr=[0.0005], lr_scheduler='inverse_sqrt', max_epoch=50, max_sentences=None, max_sentences_valid=None, max_source_positions=64, max_target_positions=64, max_tokens=4096, max_tokens_valid=4096, max_update=0, maximize_best_checkpoint_metric=False, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=1e-09, no_epoch_checkpoints=False, no_last_checkpoints=False, no_progress_bar=True, no_save=True, no_save_optimizer_state=False, no_token_positional_embeddings=False, num_workers=1, optimizer='adam', optimizer_overrides='{}', raw_text=False, required_batch_size_multiple=8, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='checkpoints', save_interval=1, save_interval_updates=0, seed=1, sentence_avg=False, share_all_embeddings=True, share_decoder_input_output_embed=False, skip_invalid_size_inputs_valid_test=True, source_lang='en', target_lang='de', task='translation', tbmf_wrapper=False, tensorboard_logdir='', threshold_loss_scale=None, tokenizer=None, train_subset='train', update_freq=[1], upsample_primary=1, use_bmuf=False, user_dir=None, valid_subset='valid', validate_interval=1, warmup_init_lr=1e-07, warmup_updates=4000, weight_decay=0.0) | |
| [en] dictionary: 35662 types | |
| [de] dictionary: 35662 types | |
| loaded 52385 examples from: /home/taylanbil/data/wmt18_en_de_bpej32k/valid.en-de.en | |
| loaded 52385 examples from: /home/taylanbil/data/wmt18_en_de_bpej32k/valid.en-de.de | |
| /home/taylanbil/data/wmt18_en_de_bpej32k valid en-de 52385 examples | |
TransformerModel( | |
(encoder): TransformerEncoder( | |
(embed_tokens): Embedding(35662, 1024, padding_idx=1) | |
(embed_positions): SinusoidalPositionalEmbedding() | |
(layers): ModuleList( | |
(0): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(1): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(2): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(3): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(4): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(5): TransformerEncoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
) | |
) | |
(decoder): TransformerDecoder( | |
(embed_tokens): Embedding(35662, 1024, padding_idx=1) | |
(embed_positions): SinusoidalPositionalEmbedding() | |
(layers): ModuleList( | |
(0): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(1): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(2): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(3): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(4): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
(5): TransformerDecoderLayer( | |
(self_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(self_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(encoder_attn): MultiheadAttention( | |
(out_proj): Linear(in_features=1024, out_features=1024, bias=True) | |
) | |
(encoder_attn_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
(fc1): Linear(in_features=1024, out_features=4096, bias=True) | |
(fc2): Linear(in_features=4096, out_features=1024, bias=True) | |
(final_layer_norm): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True) | |
) | |
) | |
) | |
) | |
| model transformer_vaswani_wmt_en_de_big, criterion LabelSmoothedCrossEntropyCriterion | |
| num. model params: 212875264 (num. trained: 212875264) | |
| training on 4 GPUs | |
| max tokens per GPU = 4096 and max sentences per GPU = None | |
| no existing checkpoint found checkpoints/checkpoint_last.pt | |
| loading train data for epoch 0 | |
| loaded 5186259 examples from: /home/taylanbil/data/wmt18_en_de_bpej32k/train.en-de.en | |
| loaded 5186259 examples from: /home/taylanbil/data/wmt18_en_de_bpej32k/train.en-de.de | |
| /home/taylanbil/data/wmt18_en_de_bpej32k train en-de 5186259 examples | |
| WARNING: 240829 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[1422704, 2718830, 2897878, 3673048, 2016896, 2200333, 3886976, 2097242, 3124502, 2871279] | |
| WARNING: overflow detected, setting loss scale to: 64.0 | |
| WARNING: overflow detected, setting loss scale to: 32.0 | |
| epoch 001: 10 / 8862 loss=15.650, nll_loss=15.629, ppl=50662.34, wps=1806, ups=0, wpb=15322.667, bsz=545.778, num_updates=9, lr=1.22478e-06, gnorm=6.189, clip=0.000, oom=0.000, loss_scale=32.000, wall=76, train_wall=3 | |
| epoch 001: 20 / 8862 loss=15.381, nll_loss=15.329, ppl=41170.89, wps=3660, ups=0, wpb=15279.579, bsz=528.000, num_updates=19, lr=2.47453e-06, gnorm=5.761, clip=0.000, oom=0.000, loss_scale=32.000, wall=79, train_wall=6 | |
| epoch 001: 30 / 8862 loss=15.046, nll_loss=14.955, ppl=31769.95, wps=5384, ups=0, wpb=15278.862, bsz=530.759, num_updates=29, lr=3.72428e-06, gnorm=4.979, clip=0.000, oom=0.000, loss_scale=32.000, wall=82, train_wall=9 | |
| epoch 001: 40 / 8862 loss=14.764, nll_loss=14.639, ppl=25508.20, wps=6993, ups=0, wpb=15291.667, bsz=554.872, num_updates=39, lr=4.97403e-06, gnorm=4.291, clip=0.000, oom=0.000, loss_scale=32.000, wall=85, train_wall=12 | |
| epoch 001: 50 / 8862 loss=14.546, nll_loss=14.394, ppl=21522.65, wps=8500, ups=1, wpb=15304.633, bsz=544.327, num_updates=49, lr=6.22378e-06, gnorm=3.780, clip=0.000, oom=0.000, loss_scale=32.000, wall=88, train_wall=14 | |
| epoch 001: 60 / 8862 loss=14.373, nll_loss=14.201, ppl=18829.74, wps=9887, ups=1, wpb=15272.593, bsz=534.508, num_updates=59, lr=7.47353e-06, gnorm=3.382, clip=0.000, oom=0.000, loss_scale=32.000, wall=91, train_wall=17 | |
| epoch 001: 70 / 8862 loss=14.228, nll_loss=14.038, ppl=16817.80, wps=11220, ups=1, wpb=15290.551, bsz=521.507, num_updates=69, lr=8.72328e-06, gnorm=3.078, clip=0.000, oom=0.000, loss_scale=32.000, wall=94, train_wall=20 | |
| epoch 001: 80 / 8862 loss=14.102, nll_loss=13.897, ppl=15258.85, wps=12437, ups=1, wpb=15263.797, bsz=525.063, num_updates=79, lr=9.97303e-06, gnorm=2.858, clip=0.000, oom=0.000, loss_scale=32.000, wall=97, train_wall=22 | |
| epoch 001: 90 / 8862 loss=13.990, nll_loss=13.773, ppl=13995.37, wps=13608, ups=1, wpb=15268.719, bsz=527.011, num_updates=89, lr=1.12228e-05, gnorm=2.690, clip=0.000, oom=0.000, loss_scale=32.000, wall=100, train_wall=25 | |
| epoch 001: 100 / 8862 loss=13.889, nll_loss=13.660, ppl=12942.76, wps=14696, ups=1, wpb=15258.869, bsz=524.364, num_updates=99, lr=1.24725e-05, gnorm=2.531, clip=0.000, oom=0.000, loss_scale=32.000, wall=103, train_wall=28 | |
| epoch 001: 110 / 8862 loss=13.793, nll_loss=13.553, ppl=12014.67, wps=15690, ups=1, wpb=15217.486, bsz=527.853, num_updates=109, lr=1.37223e-05, gnorm=2.413, clip=0.000, oom=0.000, loss_scale=32.000, wall=106, train_wall=31 | |
| epoch 001: 120 / 8862 loss=13.700, nll_loss=13.449, ppl=11183.72, wps=16658, ups=1, wpb=15204.235, bsz=528.538, num_updates=119, lr=1.4972e-05, gnorm=2.350, clip=0.000, oom=0.000, loss_scale=32.000, wall=109, train_wall=33 | |
| epoch 001: 130 / 8862 loss=13.607, nll_loss=13.346, ppl=10411.01, wps=17581, ups=1, wpb=15206.202, bsz=532.527, num_updates=129, lr=1.62218e-05, gnorm=2.365, clip=0.000, oom=0.000, loss_scale=32.000, wall=112, train_wall=36 | |
| epoch 001: 140 / 8862 loss=13.515, nll_loss=13.243, ppl=9695.70, wps=18456, ups=1, wpb=15203.669, bsz=535.194, num_updates=139, lr=1.74715e-05, gnorm=2.290, clip=0.000, oom=0.000, loss_scale=32.000, wall=115, train_wall=39 | |
| epoch 001: 150 / 8862 loss=13.426, nll_loss=13.143, ppl=9044.80, wps=19272, ups=1, wpb=15186.074, bsz=540.993, num_updates=149, lr=1.87213e-05, gnorm=2.223, clip=0.000, oom=0.000, loss_scale=32.000, wall=117, train_wall=41 | |
| epoch 001: 160 / 8862 loss=13.340, nll_loss=13.046, ppl=8457.24, wps=20086, ups=1, wpb=15201.642, bsz=541.736, num_updates=159, lr=1.9971e-05, gnorm=2.170, clip=0.000, oom=0.000, loss_scale=32.000, wall=120, train_wall=44 | |
| epoch 001: 170 / 8862 loss=13.261, nll_loss=12.957, ppl=7949.50, wps=20829, ups=1, wpb=15201.036, bsz=538.982, num_updates=169, lr=2.12208e-05, gnorm=2.131, clip=0.000, oom=0.000, loss_scale=32.000, wall=123, train_wall=47 | |
| epoch 001: 180 / 8862 loss=13.181, nll_loss=12.867, ppl=7470.97, wps=21567, ups=1, wpb=15210.374, bsz=542.168, num_updates=179, lr=2.24705e-05, gnorm=2.093, clip=0.000, oom=0.000, loss_scale=32.000, wall=126, train_wall=50 | |
| epoch 001: 190 / 8862 loss=13.108, nll_loss=12.784, ppl=7054.84, wps=22254, ups=1, wpb=15211.132, bsz=544.381, num_updates=189, lr=2.37203e-05, gnorm=2.092, clip=0.000, oom=0.000, loss_scale=32.000, wall=129, train_wall=52 | |
| epoch 001: 200 / 8862 loss=13.038, nll_loss=12.705, ppl=6677.09, wps=22887, ups=2, wpb=15199.000, bsz=549.829, num_updates=199, lr=2.497e-05, gnorm=2.075, clip=0.000, oom=0.000, loss_scale=32.000, wall=132, train_wall=55 | |
| epoch 001: 210 / 8862 loss=12.973, nll_loss=12.630, ppl=6339.78, wps=23531, ups=2, wpb=15206.072, bsz=548.938, num_updates=209, lr=2.62198e-05, gnorm=2.058, clip=0.000, oom=0.000, loss_scale=32.000, wall=135, train_wall=58 | |
| epoch 001: 220 / 8862 loss=12.910, nll_loss=12.558, ppl=6030.99, wps=24125, ups=2, wpb=15206.584, bsz=549.918, num_updates=219, lr=2.74695e-05, gnorm=2.042, clip=0.000, oom=0.000, loss_scale=32.000, wall=138, train_wall=60 | |
| epoch 001: 230 / 8862 loss=12.851, nll_loss=12.491, ppl=5755.57, wps=24686, ups=2, wpb=15199.122, bsz=549.380, num_updates=229, lr=2.87193e-05, gnorm=2.020, clip=0.000, oom=0.000, loss_scale=32.000, wall=141, train_wall=63 | |
| epoch 001: 240 / 8862 loss=12.795, nll_loss=12.425, ppl=5500.23, wps=25261, ups=2, wpb=15211.155, bsz=549.891, num_updates=239, lr=2.9969e-05, gnorm=2.006, clip=0.000, oom=0.000, loss_scale=32.000, wall=144, train_wall=66 | |
| epoch 001: 250 / 8862 loss=12.742, nll_loss=12.364, ppl=5272.44, wps=25795, ups=2, wpb=15209.382, bsz=551.550, num_updates=249, lr=3.12188e-05, gnorm=1.997, clip=0.000, oom=0.000, loss_scale=32.000, wall=147, train_wall=69 | |
| epoch 001: 260 / 8862 loss=12.691, nll_loss=12.305, ppl=5060.73, wps=26297, ups=2, wpb=15212.931, bsz=549.436, num_updates=259, lr=3.24685e-05, gnorm=1.973, clip=0.000, oom=0.000, loss_scale=32.000, wall=150, train_wall=71 | |
| epoch 001: 270 / 8862 loss=12.644, nll_loss=12.250, ppl=4872.63, wps=26804, ups=2, wpb=15218.257, bsz=547.420, num_updates=269, lr=3.37183e-05, gnorm=1.972, clip=0.000, oom=0.000, loss_scale=32.000, wall=153, train_wall=74 | |
| epoch 001: 280 / 8862 loss=12.600, nll_loss=12.199, ppl=4701.64, wps=27267, ups=2, wpb=15211.663, bsz=547.097, num_updates=279, lr=3.4968e-05, gnorm=1.954, clip=0.000, oom=0.000, loss_scale=32.000, wall=156, train_wall=77 | |
| epoch 001: 290 / 8862 loss=12.557, nll_loss=12.148, ppl=4539.11, wps=27719, ups=2, wpb=15208.644, bsz=547.626, num_updates=289, lr=3.62178e-05, gnorm=1.950, clip=0.000, oom=0.000, loss_scale=32.000, wall=159, train_wall=79 | |
| epoch 001: 300 / 8862 loss=12.516, nll_loss=12.100, ppl=4391.25, wps=28154, ups=2, wpb=15207.579, bsz=547.023, num_updates=299, lr=3.74675e-05, gnorm=1.940, clip=0.000, oom=0.000, loss_scale=32.000, wall=162, train_wall=82 | |
| epoch 001: 310 / 8862 loss=12.476, nll_loss=12.054, ppl=4251.13, wps=28580, ups=2, wpb=15207.411, bsz=548.583, num_updates=309, lr=3.87173e-05, gnorm=1.928, clip=0.000, oom=0.000, loss_scale=32.000, wall=164, train_wall=85 | |
| epoch 001: 320 / 8862 loss=12.434, nll_loss=12.005, ppl=4109.99, wps=28985, ups=2, wpb=15210.969, bsz=555.060, num_updates=319, lr=3.9967e-05, gnorm=1.911, clip=0.000, oom=0.000, loss_scale=32.000, wall=167, train_wall=88 | |
| epoch 001: 330 / 8862 loss=12.398, nll_loss=11.963, ppl=3991.71, wps=29378, ups=2, wpb=15207.322, bsz=554.894, num_updates=329, lr=4.12168e-05, gnorm=1.896, clip=0.000, oom=0.000, loss_scale=32.000, wall=170, train_wall=90 | |
| epoch 001: 340 / 8862 loss=12.365, nll_loss=11.923, ppl=3883.61, wps=29754, ups=2, wpb=15202.811, bsz=553.628, num_updates=339, lr=4.24665e-05, gnorm=1.872, clip=0.000, oom=0.000, loss_scale=32.000, wall=173, train_wall=93 | |
| epoch 001: 350 / 8862 loss=12.331, nll_loss=11.884, ppl=3780.35, wps=30122, ups=2, wpb=15199.966, bsz=552.756, num_updates=349, lr=4.37163e-05, gnorm=1.863, clip=0.000, oom=0.000, loss_scale=32.000, wall=176, train_wall=96 | |
| epoch 001: 360 / 8862 loss=12.298, nll_loss=11.845, ppl=3678.02, wps=30486, ups=2, wpb=15204.493, bsz=553.248, num_updates=359, lr=4.4966e-05, gnorm=1.860, clip=0.000, oom=0.000, loss_scale=32.000, wall=179, train_wall=98 | |
| epoch 001: 370 / 8862 loss=12.266, nll_loss=11.808, ppl=3585.57, wps=30834, ups=2, wpb=15207.612, bsz=552.477, num_updates=369, lr=4.62158e-05, gnorm=1.842, clip=0.000, oom=0.000, loss_scale=32.000, wall=182, train_wall=101 | |
| epoch 001: 380 / 8862 loss=12.236, nll_loss=11.772, ppl=3497.97, wps=31177, ups=2, wpb=15212.272, bsz=552.253, num_updates=379, lr=4.74655e-05, gnorm=1.833, clip=0.000, oom=0.000, loss_scale=32.000, wall=185, train_wall=104 | |
| epoch 001: 390 / 8862 loss=12.203, nll_loss=11.734, ppl=3407.33, wps=31506, ups=2, wpb=15214.321, bsz=556.360, num_updates=389, lr=4.87153e-05, gnorm=1.828, clip=0.000, oom=0.000, loss_scale=32.000, wall=188, train_wall=107 | |
| epoch 001: 400 / 8862 loss=12.174, nll_loss=11.700, ppl=3328.01, wps=31825, ups=2, wpb=15213.060, bsz=556.050, num_updates=399, lr=4.9965e-05, gnorm=1.835, clip=0.000, oom=0.000, loss_scale=32.000, wall=191, train_wall=109 | |
| epoch 001: 410 / 8862 loss=12.146, nll_loss=11.668, ppl=3252.89, wps=32144, ups=2, wpb=15216.829, bsz=556.225, num_updates=409, lr=5.12148e-05, gnorm=1.832, clip=0.000, oom=0.000, loss_scale=32.000, wall=194, train_wall=112 | |
| epoch 001: 420 / 8862 loss=12.119, nll_loss=11.635, ppl=3180.52, wps=32446, ups=2, wpb=15215.520, bsz=556.468, num_updates=419, lr=5.24645e-05, gnorm=1.817, clip=0.000, oom=0.000, loss_scale=32.000, wall=196, train_wall=115 | |
| epoch 001: 430 / 8862 loss=12.091, nll_loss=11.603, ppl=3110.66, wps=32729, ups=2, wpb=15211.261, bsz=556.345, num_updates=429, lr=5.37143e-05, gnorm=1.806, clip=0.000, oom=0.000, loss_scale=32.000, wall=199, train_wall=117 | |
| epoch 001: 440 / 8862 loss=12.061, nll_loss=11.568, ppl=3036.02, wps=33016, ups=2, wpb=15213.702, bsz=559.872, num_updates=439, lr=5.4964e-05, gnorm=1.802, clip=0.000, oom=0.000, loss_scale=32.000, wall=202, train_wall=120 | |
| epoch 001: 450 / 8862 loss=12.035, nll_loss=11.538, ppl=2973.13, wps=33301, ups=2, wpb=15217.040, bsz=558.468, num_updates=449, lr=5.62138e-05, gnorm=1.795, clip=0.000, oom=0.000, loss_scale=32.000, wall=205, train_wall=123 | |
| epoch 001: 460 / 8862 loss=12.010, nll_loss=11.508, ppl=2912.29, wps=33566, ups=2, wpb=15216.612, bsz=557.856, num_updates=459, lr=5.74635e-05, gnorm=1.789, clip=0.000, oom=0.000, loss_scale=32.000, wall=208, train_wall=125 | |
| WARNING: overflow detected, setting loss scale to: 16.0 | |
| epoch 001: 470 / 8862 loss=11.986, nll_loss=11.480, ppl=2857.21, wps=33747, ups=2, wpb=15218.053, bsz=559.402, num_updates=468, lr=5.85883e-05, gnorm=1.785, clip=0.000, oom=0.000, loss_scale=16.000, wall=211, train_wall=128 | |
| epoch 001: 480 / 8862 loss=11.962, nll_loss=11.453, ppl=2802.81, wps=33987, ups=2, wpb=15208.571, bsz=557.941, num_updates=478, lr=5.98381e-05, gnorm=1.772, clip=0.000, oom=0.000, loss_scale=16.000, wall=214, train_wall=131 | |
| epoch 001: 490 / 8862 loss=11.937, nll_loss=11.423, ppl=2746.47, wps=34244, ups=2, wpb=15212.576, bsz=557.902, num_updates=488, lr=6.10878e-05, gnorm=1.768, clip=0.000, oom=0.000, loss_scale=16.000, wall=217, train_wall=133 | |
| epoch 001: 500 / 8862 loss=11.912, nll_loss=11.394, ppl=2691.44, wps=34492, ups=2, wpb=15215.811, bsz=558.072, num_updates=498, lr=6.23376e-05, gnorm=1.757, clip=0.000, oom=0.000, loss_scale=16.000, wall=220, train_wall=136 | |
| epoch 001: 510 / 8862 loss=11.888, nll_loss=11.366, ppl=2638.83, wps=34737, ups=2, wpb=15219.498, bsz=556.866, num_updates=508, lr=6.35873e-05, gnorm=1.748, clip=0.000, oom=0.000, loss_scale=16.000, wall=223, train_wall=139 | |
| epoch 001: 520 / 8862 loss=11.863, nll_loss=11.337, ppl=2587.24, wps=34958, ups=2, wpb=15214.921, bsz=557.390, num_updates=518, lr=6.48371e-05, gnorm=1.746, clip=0.000, oom=0.000, loss_scale=16.000, wall=225, train_wall=141 | |
| epoch 001: 530 / 8862 loss=11.840, nll_loss=11.311, ppl=2539.91, wps=35187, ups=2, wpb=15216.157, bsz=557.273, num_updates=528, lr=6.60868e-05, gnorm=1.746, clip=0.000, oom=0.000, loss_scale=16.000, wall=228, train_wall=144 | |
| epoch 001: 540 / 8862 loss=11.816, nll_loss=11.283, ppl=2491.43, wps=35394, ups=2, wpb=15212.901, bsz=558.320, num_updates=538, lr=6.73366e-05, gnorm=1.743, clip=0.000, oom=0.000, loss_scale=16.000, wall=231, train_wall=147 | |
| epoch 001: 550 / 8862 loss=11.793, nll_loss=11.255, ppl=2444.42, wps=35593, ups=2, wpb=15210.162, bsz=559.416, num_updates=548, lr=6.85863e-05, gnorm=1.739, clip=0.000, oom=0.000, loss_scale=16.000, wall=234, train_wall=149 | |
| epoch 001: 560 / 8862 loss=11.772, nll_loss=11.231, ppl=2403.73, wps=35797, ups=2, wpb=15208.260, bsz=558.681, num_updates=558, lr=6.98361e-05, gnorm=1.738, clip=0.000, oom=0.000, loss_scale=16.000, wall=237, train_wall=152 | |
| epoch 001: 570 / 8862 loss=11.748, nll_loss=11.203, ppl=2357.66, wps=35996, ups=2, wpb=15207.607, bsz=559.690, num_updates=568, lr=7.10858e-05, gnorm=1.732, clip=0.000, oom=0.000, loss_scale=16.000, wall=240, train_wall=155 | |
| epoch 001: 580 / 8862 loss=11.726, nll_loss=11.177, ppl=2315.63, wps=36199, ups=2, wpb=15209.666, bsz=559.156, num_updates=578, lr=7.23356e-05, gnorm=1.728, clip=0.000, oom=0.000, loss_scale=16.000, wall=243, train_wall=157 | |
| epoch 001: 590 / 8862 loss=11.704, nll_loss=11.152, ppl=2275.99, wps=36386, ups=2, wpb=15207.384, bsz=557.673, num_updates=588, lr=7.35853e-05, gnorm=1.725, clip=0.000, oom=0.000, loss_scale=16.000, wall=246, train_wall=160 | |
| WARNING: overflow detected, setting loss scale to: 8.0 | |
| epoch 001: 600 / 8862 loss=11.684, nll_loss=11.129, ppl=2239.46, wps=36515, ups=2, wpb=15212.945, bsz=557.508, num_updates=597, lr=7.47101e-05, gnorm=1.717, clip=0.000, oom=0.000, loss_scale=8.000, wall=249, train_wall=163 | |
| epoch 001: 610 / 8862 loss=11.662, nll_loss=11.104, ppl=2200.40, wps=36702, ups=2, wpb=15213.774, bsz=557.680, num_updates=607, lr=7.59598e-05, gnorm=1.711, clip=0.000, oom=0.000, loss_scale=8.000, wall=252, train_wall=166 | |
| epoch 001: 620 / 8862 loss=11.640, nll_loss=11.077, ppl=2160.54, wps=36895, ups=2, wpb=15219.113, bsz=558.301, num_updates=617, lr=7.72096e-05, gnorm=1.706, clip=0.000, oom=0.000, loss_scale=8.000, wall=255, train_wall=168 | |
| epoch 001: 630 / 8862 loss=11.618, nll_loss=11.052, ppl=2123.53, wps=37076, ups=2, wpb=15221.193, bsz=558.941, num_updates=627, lr=7.84593e-05, gnorm=1.707, clip=0.000, oom=0.000, loss_scale=8.000, wall=257, train_wall=171 | |
| epoch 001: 640 / 8862 loss=11.599, nll_loss=11.030, ppl=2091.34, wps=37252, ups=2, wpb=15222.907, bsz=558.355, num_updates=637, lr=7.97091e-05, gnorm=1.714, clip=0.000, oom=0.000, loss_scale=8.000, wall=260, train_wall=174 | |
| epoch 001: 650 / 8862 loss=11.580, nll_loss=11.008, ppl=2059.08, wps=37413, ups=2, wpb=15218.621, bsz=558.034, num_updates=647, lr=8.09588e-05, gnorm=1.710, clip=0.000, oom=0.000, loss_scale=8.000, wall=263, train_wall=176 | |
| epoch 001: 660 / 8862 loss=11.561, nll_loss=10.985, ppl=2026.82, wps=37580, ups=2, wpb=15219.973, bsz=556.798, num_updates=657, lr=8.22086e-05, gnorm=1.705, clip=0.000, oom=0.000, loss_scale=8.000, wall=266, train_wall=179 | |
| epoch 001: 670 / 8862 loss=11.540, nll_loss=10.961, ppl=1993.86, wps=37741, ups=2, wpb=15220.345, bsz=557.433, num_updates=667, lr=8.34583e-05, gnorm=1.696, clip=0.000, oom=0.000, loss_scale=8.000, wall=269, train_wall=182 | |
| epoch 001: 680 / 8862 loss=11.519, nll_loss=10.937, ppl=1960.67, wps=37904, ups=2, wpb=15221.637, bsz=557.412, num_updates=677, lr=8.47081e-05, gnorm=1.694, clip=0.000, oom=0.000, loss_scale=8.000, wall=272, train_wall=184 | |
| epoch 001: 690 / 8862 loss=11.500, nll_loss=10.914, ppl=1930.13, wps=38057, ups=3, wpb=15219.595, bsz=557.135, num_updates=687, lr=8.59578e-05, gnorm=1.690, clip=0.000, oom=0.000, loss_scale=8.000, wall=275, train_wall=187 | |
| epoch 001: 700 / 8862 loss=11.481, nll_loss=10.892, ppl=1900.46, wps=38220, ups=3, wpb=15223.812, bsz=556.499, num_updates=697, lr=8.72076e-05, gnorm=1.685, clip=0.000, oom=0.000, loss_scale=8.000, wall=278, train_wall=190 | |
| epoch 001: 710 / 8862 loss=11.461, nll_loss=10.870, ppl=1870.95, wps=38371, ups=3, wpb=15224.754, bsz=555.893, num_updates=707, lr=8.84573e-05, gnorm=1.682, clip=0.000, oom=0.000, loss_scale=8.000, wall=281, train_wall=192 | |
| epoch 001: 720 / 8862 loss=11.442, nll_loss=10.847, ppl=1842.11, wps=38519, ups=3, wpb=15224.866, bsz=555.146, num_updates=717, lr=8.97071e-05, gnorm=1.677, clip=0.000, oom=0.000, loss_scale=8.000, wall=283, train_wall=195 | |
| epoch 001: 730 / 8862 loss=11.422, nll_loss=10.824, ppl=1813.18, wps=38654, ups=3, wpb=15222.132, bsz=555.488, num_updates=727, lr=9.09568e-05, gnorm=1.673, clip=0.000, oom=0.000, loss_scale=8.000, wall=286, train_wall=198 | |
| epoch 001: 740 / 8862 loss=11.403, nll_loss=10.802, ppl=1785.55, wps=38796, ups=3, wpb=15223.145, bsz=555.919, num_updates=737, lr=9.22066e-05, gnorm=1.675, clip=0.000, oom=0.000, loss_scale=8.000, wall=289, train_wall=200 | |
| epoch 001: 750 / 8862 loss=11.386, nll_loss=10.781, ppl=1759.91, wps=38926, ups=3, wpb=15218.902, bsz=555.909, num_updates=747, lr=9.34563e-05, gnorm=1.671, clip=0.000, oom=0.000, loss_scale=8.000, wall=292, train_wall=203 | |
| epoch 001: 760 / 8862 loss=11.367, nll_loss=10.760, ppl=1733.76, wps=39049, ups=3, wpb=15214.055, bsz=555.857, num_updates=757, lr=9.47061e-05, gnorm=1.665, clip=0.000, oom=0.000, loss_scale=8.000, wall=295, train_wall=206 | |
| epoch 001: 770 / 8862 loss=11.348, nll_loss=10.737, ppl=1706.93, wps=39178, ups=3, wpb=15214.094, bsz=556.464, num_updates=767, lr=9.59558e-05, gnorm=1.662, clip=0.000, oom=0.000, loss_scale=8.000, wall=298, train_wall=208 | |
| epoch 001: 780 / 8862 loss=11.329, nll_loss=10.715, ppl=1681.08, wps=39314, ups=3, wpb=15217.609, bsz=556.242, num_updates=777, lr=9.72056e-05, gnorm=1.659, clip=0.000, oom=0.000, loss_scale=8.000, wall=301, train_wall=211 | |
| epoch 001: 790 / 8862 loss=11.309, nll_loss=10.693, ppl=1654.89, wps=39448, ups=3, wpb=15219.888, bsz=556.717, num_updates=787, lr=9.84553e-05, gnorm=1.653, clip=0.000, oom=0.000, loss_scale=8.000, wall=304, train_wall=214 | |
| epoch 001: 800 / 8862 loss=11.290, nll_loss=10.671, ppl=1629.86, wps=39580, ups=3, wpb=15222.711, bsz=557.039, num_updates=797, lr=9.97051e-05, gnorm=1.650, clip=0.000, oom=0.000, loss_scale=8.000, wall=307, train_wall=216 | |
| epoch 001: 810 / 8862 loss=11.272, nll_loss=10.649, ppl=1606.23, wps=39699, ups=3, wpb=15221.410, bsz=557.026, num_updates=807, lr=0.000100955, gnorm=1.646, clip=0.000, oom=0.000, loss_scale=8.000, wall=309, train_wall=219 | |
| epoch 001: 820 / 8862 loss=11.254, nll_loss=10.628, ppl=1583.05, wps=39816, ups=3, wpb=15219.878, bsz=556.259, num_updates=817, lr=0.000102205, gnorm=1.641, clip=0.000, oom=0.000, loss_scale=8.000, wall=312, train_wall=222 | |
| epoch 001: 830 / 8862 loss=11.237, nll_loss=10.609, ppl=1561.29, wps=39928, ups=3, wpb=15217.243, bsz=555.357, num_updates=827, lr=0.000103454, gnorm=1.635, clip=0.000, oom=0.000, loss_scale=8.000, wall=315, train_wall=224 | |
| epoch 001: 840 / 8862 loss=11.220, nll_loss=10.588, ppl=1539.52, wps=40045, ups=3, wpb=15216.535, bsz=554.237, num_updates=837, lr=0.000104704, gnorm=1.631, clip=0.000, oom=0.000, loss_scale=8.000, wall=318, train_wall=227 | |
| epoch 001: 850 / 8862 loss=11.202, nll_loss=10.568, ppl=1518.16, wps=40149, ups=3, wpb=15213.176, bsz=555.230, num_updates=847, lr=0.000105954, gnorm=1.634, clip=0.000, oom=0.000, loss_scale=8.000, wall=321, train_wall=230 | |
| epoch 001: 860 / 8862 loss=11.186, nll_loss=10.549, ppl=1497.95, wps=40258, ups=3, wpb=15214.488, bsz=556.378, num_updates=857, lr=0.000107204, gnorm=1.640, clip=0.000, oom=0.000, loss_scale=8.000, wall=324, train_wall=232 | |
| epoch 001: 870 / 8862 loss=11.169, nll_loss=10.529, ppl=1477.96, wps=40371, ups=3, wpb=15216.131, bsz=556.097, num_updates=867, lr=0.000108453, gnorm=1.637, clip=0.000, oom=0.000, loss_scale=8.000, wall=327, train_wall=235 | |
| epoch 001: 880 / 8862 loss=11.152, nll_loss=10.510, ppl=1458.15, wps=40477, ups=3, wpb=15215.403, bsz=555.685, num_updates=877, lr=0.000109703, gnorm=1.632, clip=0.000, oom=0.000, loss_scale=8.000, wall=330, train_wall=238 | |
| epoch 001: 890 / 8862 loss=11.138, nll_loss=10.493, ppl=1440.80, wps=40567, ups=3, wpb=15214.110, bsz=558.070, num_updates=887, lr=0.000110953, gnorm=1.636, clip=0.000, oom=0.000, loss_scale=8.000, wall=333, train_wall=241 | |
| epoch 001: 900 / 8862 loss=11.122, nll_loss=10.474, ppl=1422.53, wps=40672, ups=3, wpb=15214.756, bsz=557.940, num_updates=897, lr=0.000112203, gnorm=1.635, clip=0.000, oom=0.000, loss_scale=8.000, wall=336, train_wall=243 | |
| epoch 001: 910 / 8862 loss=11.107, nll_loss=10.456, ppl=1405.14, wps=40770, ups=3, wpb=15213.095, bsz=556.983, num_updates=907, lr=0.000113452, gnorm=1.631, clip=0.000, oom=0.000, loss_scale=8.000, wall=338, train_wall=246 | |
| epoch 001: 920 / 8862 loss=11.089, nll_loss=10.436, ppl=1385.79, wps=40868, ups=3, wpb=15212.960, bsz=557.304, num_updates=917, lr=0.000114702, gnorm=1.625, clip=0.000, oom=0.000, loss_scale=8.000, wall=341, train_wall=249 | |
| epoch 001: 930 / 8862 loss=11.074, nll_loss=10.418, ppl=1368.53, wps=40960, ups=3, wpb=15210.107, bsz=557.005, num_updates=927, lr=0.000115952, gnorm=1.620, clip=0.000, oom=0.000, loss_scale=8.000, wall=344, train_wall=251 | |
| epoch 001: 940 / 8862 loss=11.057, nll_loss=10.399, ppl=1350.23, wps=41051, ups=3, wpb=15207.060, bsz=557.712, num_updates=937, lr=0.000117202, gnorm=1.617, clip=0.000, oom=0.000, loss_scale=8.000, wall=347, train_wall=254 | |
| epoch 001: 950 / 8862 loss=11.041, nll_loss=10.380, ppl=1332.46, wps=41143, ups=3, wpb=15205.571, bsz=558.057, num_updates=947, lr=0.000118451, gnorm=1.613, clip=0.000, oom=0.000, loss_scale=8.000, wall=350, train_wall=257 | |
| epoch 001: 960 / 8862 loss=11.024, nll_loss=10.361, ppl=1314.72, wps=41223, ups=3, wpb=15201.103, bsz=558.713, num_updates=957, lr=0.000119701, gnorm=1.611, clip=0.000, oom=0.000, loss_scale=8.000, wall=353, train_wall=259 | |
| epoch 001: 970 / 8862 loss=11.008, nll_loss=10.342, ppl=1297.99, wps=41311, ups=3, wpb=15199.422, bsz=558.469, num_updates=967, lr=0.000120951, gnorm=1.610, clip=0.000, oom=0.000, loss_scale=8.000, wall=356, train_wall=262 | |
| epoch 001: 980 / 8862 loss=10.993, nll_loss=10.324, ppl=1281.80, wps=41398, ups=3, wpb=15198.689, bsz=558.698, num_updates=977, lr=0.000122201, gnorm=1.608, clip=0.000, oom=0.000, loss_scale=8.000, wall=359, train_wall=265 | |
| epoch 001: 990 / 8862 loss=10.977, nll_loss=10.306, ppl=1265.54, wps=41495, ups=3, wpb=15201.466, bsz=558.663, num_updates=987, lr=0.00012345, gnorm=1.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=362, train_wall=267 | |
| epoch 001: 1000 / 8862 loss=10.961, nll_loss=10.287, ppl=1249.77, wps=41578, ups=3, wpb=15199.739, bsz=558.644, num_updates=997, lr=0.0001247, gnorm=1.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=364, train_wall=270 | |
| epoch 001: 1010 / 8862 loss=10.945, nll_loss=10.269, ppl=1233.93, wps=41667, ups=3, wpb=15200.473, bsz=558.761, num_updates=1007, lr=0.00012595, gnorm=1.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=367, train_wall=273 | |
| epoch 001: 1020 / 8862 loss=10.930, nll_loss=10.251, ppl=1218.66, wps=41759, ups=3, wpb=15203.085, bsz=558.600, num_updates=1017, lr=0.0001272, gnorm=1.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=370, train_wall=275 | |
| epoch 001: 1030 / 8862 loss=10.915, nll_loss=10.234, ppl=1204.16, wps=41838, ups=3, wpb=15202.685, bsz=559.159, num_updates=1027, lr=0.000128449, gnorm=1.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=373, train_wall=278 | |
| epoch 001: 1040 / 8862 loss=10.902, nll_loss=10.218, ppl=1191.08, wps=41910, ups=3, wpb=15198.386, bsz=558.619, num_updates=1037, lr=0.000129699, gnorm=1.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=376, train_wall=281 | |
| epoch 001: 1050 / 8862 loss=10.887, nll_loss=10.201, ppl=1176.83, wps=41986, ups=3, wpb=15197.744, bsz=559.305, num_updates=1047, lr=0.000130949, gnorm=1.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=379, train_wall=283 | |
| epoch 001: 1060 / 8862 loss=10.872, nll_loss=10.184, ppl=1163.19, wps=42066, ups=3, wpb=15197.095, bsz=559.470, num_updates=1057, lr=0.000132199, gnorm=1.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=382, train_wall=286 | |
| epoch 001: 1070 / 8862 loss=10.857, nll_loss=10.166, ppl=1149.24, wps=42145, ups=3, wpb=15197.007, bsz=559.295, num_updates=1067, lr=0.000133448, gnorm=1.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=385, train_wall=289 | |
| epoch 001: 1080 / 8862 loss=10.842, nll_loss=10.149, ppl=1135.02, wps=42222, ups=3, wpb=15196.911, bsz=559.889, num_updates=1077, lr=0.000134698, gnorm=1.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=388, train_wall=291 | |
| epoch 001: 1090 / 8862 loss=10.827, nll_loss=10.131, ppl=1121.67, wps=42304, ups=3, wpb=15198.595, bsz=559.573, num_updates=1087, lr=0.000135948, gnorm=1.568, clip=0.000, oom=0.000, loss_scale=8.000, wall=391, train_wall=294 | |
| epoch 001: 1100 / 8862 loss=10.814, nll_loss=10.116, ppl=1109.81, wps=42376, ups=3, wpb=15196.641, bsz=558.512, num_updates=1097, lr=0.000137198, gnorm=1.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=393, train_wall=297 | |
| epoch 001: 1110 / 8862 loss=10.801, nll_loss=10.101, ppl=1097.95, wps=42449, ups=3, wpb=15195.856, bsz=558.128, num_updates=1107, lr=0.000138447, gnorm=1.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=396, train_wall=299 | |
| epoch 001: 1120 / 8862 loss=10.787, nll_loss=10.085, ppl=1086.24, wps=42517, ups=3, wpb=15193.775, bsz=557.665, num_updates=1117, lr=0.000139697, gnorm=1.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=399, train_wall=302 | |
| epoch 001: 1130 / 8862 loss=10.774, nll_loss=10.069, ppl=1074.48, wps=42586, ups=3, wpb=15191.588, bsz=557.402, num_updates=1127, lr=0.000140947, gnorm=1.561, clip=0.000, oom=0.000, loss_scale=8.000, wall=402, train_wall=305 | |
| epoch 001: 1140 / 8862 loss=10.760, nll_loss=10.054, ppl=1062.77, wps=42652, ups=3, wpb=15192.142, bsz=559.043, num_updates=1137, lr=0.000142197, gnorm=1.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=405, train_wall=307 | |
| epoch 001: 1150 / 8862 loss=10.747, nll_loss=10.038, ppl=1051.32, wps=42729, ups=3, wpb=15194.888, bsz=558.675, num_updates=1147, lr=0.000143446, gnorm=1.562, clip=0.000, oom=0.000, loss_scale=8.000, wall=408, train_wall=310 | |
| epoch 001: 1160 / 8862 loss=10.734, nll_loss=10.023, ppl=1040.42, wps=42796, ups=3, wpb=15193.631, bsz=558.382, num_updates=1157, lr=0.000144696, gnorm=1.560, clip=0.000, oom=0.000, loss_scale=8.000, wall=411, train_wall=313 | |
| epoch 001: 1170 / 8862 loss=10.721, nll_loss=10.007, ppl=1029.28, wps=42861, ups=3, wpb=15193.014, bsz=558.423, num_updates=1167, lr=0.000145946, gnorm=1.556, clip=0.000, oom=0.000, loss_scale=8.000, wall=414, train_wall=315 | |
| epoch 001: 1180 / 8862 loss=10.707, nll_loss=9.992, ppl=1018.28, wps=42926, ups=3, wpb=15192.370, bsz=558.206, num_updates=1177, lr=0.000147196, gnorm=1.550, clip=0.000, oom=0.000, loss_scale=8.000, wall=417, train_wall=318 | |
| epoch 001: 1190 / 8862 loss=10.694, nll_loss=9.976, ppl=1007.00, wps=42994, ups=3, wpb=15194.058, bsz=558.457, num_updates=1187, lr=0.000148445, gnorm=1.546, clip=0.000, oom=0.000, loss_scale=8.000, wall=419, train_wall=321 | |
| epoch 001: 1200 / 8862 loss=10.680, nll_loss=9.960, ppl=996.19, wps=43061, ups=3, wpb=15195.341, bsz=559.178, num_updates=1197, lr=0.000149695, gnorm=1.544, clip=0.000, oom=0.000, loss_scale=8.000, wall=422, train_wall=324 | |
| epoch 001: 1210 / 8862 loss=10.668, nll_loss=9.946, ppl=986.37, wps=43118, ups=3, wpb=15192.472, bsz=558.575, num_updates=1207, lr=0.000150945, gnorm=1.543, clip=0.000, oom=0.000, loss_scale=8.000, wall=425, train_wall=326 | |
| epoch 001: 1220 / 8862 loss=10.656, nll_loss=9.931, ppl=976.49, wps=43176, ups=3, wpb=15189.879, bsz=558.159, num_updates=1217, lr=0.000152195, gnorm=1.540, clip=0.000, oom=0.000, loss_scale=8.000, wall=428, train_wall=329 | |
| epoch 001: 1230 / 8862 loss=10.643, nll_loss=9.917, ppl=966.85, wps=43235, ups=3, wpb=15188.277, bsz=557.614, num_updates=1227, lr=0.000153444, gnorm=1.536, clip=0.000, oom=0.000, loss_scale=8.000, wall=431, train_wall=331 | |
| epoch 001: 1240 / 8862 loss=10.630, nll_loss=9.902, ppl=956.76, wps=43298, ups=3, wpb=15188.947, bsz=557.497, num_updates=1237, lr=0.000154694, gnorm=1.533, clip=0.000, oom=0.000, loss_scale=8.000, wall=434, train_wall=334 | |
| epoch 001: 1250 / 8862 loss=10.617, nll_loss=9.887, ppl=946.84, wps=43360, ups=3, wpb=15189.010, bsz=557.254, num_updates=1247, lr=0.000155944, gnorm=1.529, clip=0.000, oom=0.000, loss_scale=8.000, wall=437, train_wall=337 | |
| epoch 001: 1260 / 8862 loss=10.605, nll_loss=9.873, ppl=937.39, wps=43422, ups=3, wpb=15188.964, bsz=556.951, num_updates=1257, lr=0.000157194, gnorm=1.525, clip=0.000, oom=0.000, loss_scale=8.000, wall=440, train_wall=340 | |
| epoch 001: 1270 / 8862 loss=10.592, nll_loss=9.858, ppl=927.77, wps=43481, ups=3, wpb=15188.691, bsz=557.272, num_updates=1267, lr=0.000158443, gnorm=1.524, clip=0.000, oom=0.000, loss_scale=8.000, wall=443, train_wall=342 | |
| epoch 001: 1280 / 8862 loss=10.580, nll_loss=9.843, ppl=918.62, wps=43537, ups=3, wpb=15187.771, bsz=556.974, num_updates=1277, lr=0.000159693, gnorm=1.521, clip=0.000, oom=0.000, loss_scale=8.000, wall=445, train_wall=345 | |
| epoch 001: 1290 / 8862 loss=10.567, nll_loss=9.829, ppl=909.24, wps=43592, ups=3, wpb=15186.793, bsz=557.153, num_updates=1287, lr=0.000160943, gnorm=1.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=448, train_wall=348 | |
| epoch 001: 1300 / 8862 loss=10.555, nll_loss=9.815, ppl=900.47, wps=43652, ups=3, wpb=15187.278, bsz=556.688, num_updates=1297, lr=0.000162193, gnorm=1.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=451, train_wall=350 | |
| epoch 001: 1310 / 8862 loss=10.544, nll_loss=9.801, ppl=892.07, wps=43706, ups=3, wpb=15185.586, bsz=556.266, num_updates=1307, lr=0.000163442, gnorm=1.512, clip=0.000, oom=0.000, loss_scale=8.000, wall=454, train_wall=353 | |
| epoch 001: 1320 / 8862 loss=10.532, nll_loss=9.787, ppl=883.47, wps=43763, ups=3, wpb=15185.807, bsz=556.240, num_updates=1317, lr=0.000164692, gnorm=1.509, clip=0.000, oom=0.000, loss_scale=8.000, wall=457, train_wall=355 | |
| epoch 001: 1330 / 8862 loss=10.519, nll_loss=9.772, ppl=874.32, wps=43822, ups=3, wpb=15188.904, bsz=556.732, num_updates=1327, lr=0.000165942, gnorm=1.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=460, train_wall=358 | |
| epoch 001: 1340 / 8862 loss=10.507, nll_loss=9.758, ppl=866.11, wps=43870, ups=3, wpb=15187.545, bsz=556.416, num_updates=1337, lr=0.000167192, gnorm=1.502, clip=0.000, oom=0.000, loss_scale=8.000, wall=463, train_wall=361 | |
| epoch 001: 1350 / 8862 loss=10.496, nll_loss=9.746, ppl=858.44, wps=43923, ups=3, wpb=15187.333, bsz=556.033, num_updates=1347, lr=0.000168441, gnorm=1.500, clip=0.000, oom=0.000, loss_scale=8.000, wall=466, train_wall=364 | |
| epoch 001: 1360 / 8862 loss=10.485, nll_loss=9.732, ppl=850.59, wps=43974, ups=3, wpb=15186.065, bsz=555.496, num_updates=1357, lr=0.000169691, gnorm=1.497, clip=0.000, oom=0.000, loss_scale=8.000, wall=469, train_wall=366 | |
| epoch 001: 1370 / 8862 loss=10.474, nll_loss=9.720, ppl=843.12, wps=44022, ups=3, wpb=15183.778, bsz=554.622, num_updates=1367, lr=0.000170941, gnorm=1.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=471, train_wall=369 | |
| epoch 001: 1380 / 8862 loss=10.462, nll_loss=9.706, ppl=835.06, wps=44073, ups=3, wpb=15183.880, bsz=554.423, num_updates=1377, lr=0.000172191, gnorm=1.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=474, train_wall=372 | |
| epoch 001: 1390 / 8862 loss=10.450, nll_loss=9.692, ppl=826.96, wps=44125, ups=3, wpb=15185.092, bsz=554.532, num_updates=1387, lr=0.00017344, gnorm=1.487, clip=0.000, oom=0.000, loss_scale=8.000, wall=477, train_wall=374 | |
| epoch 001: 1400 / 8862 loss=10.438, nll_loss=9.679, ppl=819.51, wps=44177, ups=3, wpb=15185.842, bsz=553.941, num_updates=1397, lr=0.00017469, gnorm=1.484, clip=0.000, oom=0.000, loss_scale=8.000, wall=480, train_wall=377 | |
| epoch 001: 1410 / 8862 loss=10.427, nll_loss=9.666, ppl=812.25, wps=44222, ups=3, wpb=15184.659, bsz=553.655, num_updates=1407, lr=0.00017594, gnorm=1.482, clip=0.000, oom=0.000, loss_scale=8.000, wall=483, train_wall=380 | |
| epoch 001: 1420 / 8862 loss=10.415, nll_loss=9.652, ppl=804.31, wps=44271, ups=3, wpb=15185.694, bsz=554.100, num_updates=1417, lr=0.00017719, gnorm=1.480, clip=0.000, oom=0.000, loss_scale=8.000, wall=486, train_wall=382 | |
| epoch 001: 1430 / 8862 loss=10.403, nll_loss=9.638, ppl=796.66, wps=44320, ups=3, wpb=15186.435, bsz=554.461, num_updates=1427, lr=0.000178439, gnorm=1.478, clip=0.000, oom=0.000, loss_scale=8.000, wall=489, train_wall=385 | |
| epoch 001: 1440 / 8862 loss=10.392, nll_loss=9.625, ppl=789.35, wps=44370, ups=3, wpb=15186.713, bsz=554.283, num_updates=1437, lr=0.000179689, gnorm=1.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=492, train_wall=388 | |
| epoch 001: 1450 / 8862 loss=10.381, nll_loss=9.612, ppl=782.27, wps=44415, ups=3, wpb=15186.972, bsz=554.040, num_updates=1447, lr=0.000180939, gnorm=1.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=495, train_wall=390 | |
| epoch 001: 1460 / 8862 loss=10.370, nll_loss=9.599, ppl=775.51, wps=44460, ups=3, wpb=15186.293, bsz=553.680, num_updates=1457, lr=0.000182189, gnorm=1.469, clip=0.000, oom=0.000, loss_scale=8.000, wall=498, train_wall=393 | |
| epoch 001: 1470 / 8862 loss=10.359, nll_loss=9.586, ppl=768.56, wps=44502, ups=3, wpb=15184.636, bsz=553.832, num_updates=1467, lr=0.000183438, gnorm=1.467, clip=0.000, oom=0.000, loss_scale=8.000, wall=501, train_wall=396 | |
| epoch 001: 1480 / 8862 loss=10.348, nll_loss=9.573, ppl=761.88, wps=44546, ups=3, wpb=15184.217, bsz=553.636, num_updates=1477, lr=0.000184688, gnorm=1.464, clip=0.000, oom=0.000, loss_scale=8.000, wall=503, train_wall=398 | |
| epoch 001: 1490 / 8862 loss=10.337, nll_loss=9.561, ppl=755.12, wps=44595, ups=3, wpb=15185.845, bsz=553.447, num_updates=1487, lr=0.000185938, gnorm=1.461, clip=0.000, oom=0.000, loss_scale=8.000, wall=506, train_wall=401 | |
| epoch 001: 1500 / 8862 loss=10.326, nll_loss=9.548, ppl=748.36, wps=44641, ups=3, wpb=15187.168, bsz=553.283, num_updates=1497, lr=0.000187188, gnorm=1.459, clip=0.000, oom=0.000, loss_scale=8.000, wall=509, train_wall=404 | |
| epoch 001: 1510 / 8862 loss=10.315, nll_loss=9.535, ppl=741.85, wps=44681, ups=3, wpb=15186.297, bsz=553.088, num_updates=1507, lr=0.000188437, gnorm=1.456, clip=0.000, oom=0.000, loss_scale=8.000, wall=512, train_wall=407 | |
| epoch 001: 1520 / 8862 loss=10.304, nll_loss=9.522, ppl=735.05, wps=44725, ups=3, wpb=15187.020, bsz=553.561, num_updates=1517, lr=0.000189687, gnorm=1.454, clip=0.000, oom=0.000, loss_scale=8.000, wall=515, train_wall=409 | |
| epoch 001: 1530 / 8862 loss=10.293, nll_loss=9.509, ppl=728.45, wps=44763, ups=3, wpb=15184.874, bsz=554.007, num_updates=1527, lr=0.000190937, gnorm=1.452, clip=0.000, oom=0.000, loss_scale=8.000, wall=518, train_wall=412 | |
| epoch 001: 1540 / 8862 loss=10.281, nll_loss=9.495, ppl=721.81, wps=44801, ups=3, wpb=15183.963, bsz=554.280, num_updates=1537, lr=0.000192187, gnorm=1.450, clip=0.000, oom=0.000, loss_scale=8.000, wall=521, train_wall=415 | |
| epoch 001: 1550 / 8862 loss=10.270, nll_loss=9.483, ppl=715.40, wps=44843, ups=3, wpb=15184.522, bsz=554.105, num_updates=1547, lr=0.000193436, gnorm=1.447, clip=0.000, oom=0.000, loss_scale=8.000, wall=524, train_wall=417 | |
| epoch 001: 1560 / 8862 loss=10.259, nll_loss=9.470, ppl=709.15, wps=44879, ups=3, wpb=15183.380, bsz=554.230, num_updates=1557, lr=0.000194686, gnorm=1.446, clip=0.000, oom=0.000, loss_scale=8.000, wall=527, train_wall=420 | |
| epoch 001: 1570 / 8862 loss=10.249, nll_loss=9.458, ppl=703.19, wps=44919, ups=3, wpb=15183.250, bsz=554.073, num_updates=1567, lr=0.000195936, gnorm=1.444, clip=0.000, oom=0.000, loss_scale=8.000, wall=530, train_wall=423 | |
| epoch 001: 1580 / 8862 loss=10.239, nll_loss=9.446, ppl=697.54, wps=44962, ups=3, wpb=15183.965, bsz=553.781, num_updates=1577, lr=0.000197186, gnorm=1.442, clip=0.000, oom=0.000, loss_scale=8.000, wall=533, train_wall=425 | |
| epoch 001: 1590 / 8862 loss=10.228, nll_loss=9.434, ppl=691.56, wps=44998, ups=3, wpb=15183.063, bsz=554.037, num_updates=1587, lr=0.000198435, gnorm=1.440, clip=0.000, oom=0.000, loss_scale=8.000, wall=535, train_wall=428 | |
| epoch 001: 1600 / 8862 loss=10.218, nll_loss=9.422, ppl=686.13, wps=45038, ups=3, wpb=15182.926, bsz=553.653, num_updates=1597, lr=0.000199685, gnorm=1.438, clip=0.000, oom=0.000, loss_scale=8.000, wall=538, train_wall=431 | |
| epoch 001: 1610 / 8862 loss=10.208, nll_loss=9.411, ppl=680.55, wps=45075, ups=3, wpb=15181.733, bsz=554.031, num_updates=1607, lr=0.000200935, gnorm=1.437, clip=0.000, oom=0.000, loss_scale=8.000, wall=541, train_wall=433 | |
| epoch 001: 1620 / 8862 loss=10.197, nll_loss=9.398, ppl=674.61, wps=45117, ups=3, wpb=15183.721, bsz=554.271, num_updates=1617, lr=0.000202185, gnorm=1.434, clip=0.000, oom=0.000, loss_scale=8.000, wall=544, train_wall=436 | |
| epoch 001: 1630 / 8862 loss=10.187, nll_loss=9.386, ppl=669.12, wps=45153, ups=3, wpb=15182.846, bsz=554.242, num_updates=1627, lr=0.000203434, gnorm=1.431, clip=0.000, oom=0.000, loss_scale=8.000, wall=547, train_wall=439 | |
| epoch 001: 1640 / 8862 loss=10.177, nll_loss=9.374, ppl=663.65, wps=45195, ups=3, wpb=15183.541, bsz=553.857, num_updates=1637, lr=0.000204684, gnorm=1.429, clip=0.000, oom=0.000, loss_scale=8.000, wall=550, train_wall=441 | |
| epoch 001: 1650 / 8862 loss=10.167, nll_loss=9.363, ppl=658.47, wps=45233, ups=3, wpb=15183.038, bsz=553.472, num_updates=1647, lr=0.000205934, gnorm=1.426, clip=0.000, oom=0.000, loss_scale=8.000, wall=553, train_wall=444 | |
| epoch 001: 1660 / 8862 loss=10.158, nll_loss=9.351, ppl=653.25, wps=45267, ups=3, wpb=15181.751, bsz=553.091, num_updates=1657, lr=0.000207184, gnorm=1.424, clip=0.000, oom=0.000, loss_scale=8.000, wall=556, train_wall=447 | |
| epoch 001: 1670 / 8862 loss=10.148, nll_loss=9.340, ppl=648.16, wps=45303, ups=3, wpb=15181.098, bsz=552.725, num_updates=1667, lr=0.000208433, gnorm=1.422, clip=0.000, oom=0.000, loss_scale=8.000, wall=559, train_wall=449 | |
| epoch 001: 1680 / 8862 loss=10.137, nll_loss=9.328, ppl=642.71, wps=45339, ups=3, wpb=15181.556, bsz=553.436, num_updates=1677, lr=0.000209683, gnorm=1.420, clip=0.000, oom=0.000, loss_scale=8.000, wall=562, train_wall=452 | |
| epoch 001: 1690 / 8862 loss=10.127, nll_loss=9.316, ppl=637.56, wps=45372, ups=3, wpb=15180.680, bsz=553.560, num_updates=1687, lr=0.000210933, gnorm=1.418, clip=0.000, oom=0.000, loss_scale=8.000, wall=564, train_wall=455 | |
| epoch 001: 1700 / 8862 loss=10.118, nll_loss=9.305, ppl=632.54, wps=45402, ups=3, wpb=15178.540, bsz=553.678, num_updates=1697, lr=0.000212183, gnorm=1.416, clip=0.000, oom=0.000, loss_scale=8.000, wall=567, train_wall=458 | |
| epoch 001: 1710 / 8862 loss=10.107, nll_loss=9.293, ppl=627.34, wps=45442, ups=3, wpb=15180.166, bsz=553.893, num_updates=1707, lr=0.000213432, gnorm=1.414, clip=0.000, oom=0.000, loss_scale=8.000, wall=570, train_wall=460 | |
| epoch 001: 1720 / 8862 loss=10.098, nll_loss=9.282, ppl=622.46, wps=45475, ups=3, wpb=15179.603, bsz=554.027, num_updates=1717, lr=0.000214682, gnorm=1.412, clip=0.000, oom=0.000, loss_scale=8.000, wall=573, train_wall=463 | |
| epoch 001: 1730 / 8862 loss=10.088, nll_loss=9.270, ppl=617.35, wps=45512, ups=3, wpb=15180.954, bsz=554.085, num_updates=1727, lr=0.000215932, gnorm=1.409, clip=0.000, oom=0.000, loss_scale=8.000, wall=576, train_wall=466 | |
| epoch 001: 1740 / 8862 loss=10.078, nll_loss=9.259, ppl=612.75, wps=45546, ups=3, wpb=15180.557, bsz=553.838, num_updates=1737, lr=0.000217182, gnorm=1.407, clip=0.000, oom=0.000, loss_scale=8.000, wall=579, train_wall=468 | |
| epoch 001: 1750 / 8862 loss=10.068, nll_loss=9.247, ppl=607.72, wps=45582, ups=3, wpb=15181.072, bsz=553.992, num_updates=1747, lr=0.000218431, gnorm=1.405, clip=0.000, oom=0.000, loss_scale=8.000, wall=582, train_wall=471 | |
| epoch 001: 1760 / 8862 loss=10.059, nll_loss=9.236, ppl=603.18, wps=45621, ups=3, wpb=15182.474, bsz=553.890, num_updates=1757, lr=0.000219681, gnorm=1.404, clip=0.000, oom=0.000, loss_scale=8.000, wall=585, train_wall=474 | |
| epoch 001: 1770 / 8862 loss=10.050, nll_loss=9.226, ppl=598.83, wps=45653, ups=3, wpb=15181.231, bsz=553.825, num_updates=1767, lr=0.000220931, gnorm=1.403, clip=0.000, oom=0.000, loss_scale=8.000, wall=588, train_wall=476 | |
| epoch 001: 1780 / 8862 loss=10.040, nll_loss=9.215, ppl=594.26, wps=45685, ups=3, wpb=15181.177, bsz=554.071, num_updates=1777, lr=0.000222181, gnorm=1.402, clip=0.000, oom=0.000, loss_scale=8.000, wall=591, train_wall=479 | |
| epoch 001: 1790 / 8862 loss=10.031, nll_loss=9.204, ppl=589.93, wps=45717, ups=3, wpb=15180.619, bsz=553.844, num_updates=1787, lr=0.00022343, gnorm=1.399, clip=0.000, oom=0.000, loss_scale=8.000, wall=593, train_wall=482 | |
| epoch 001: 1800 / 8862 loss=10.021, nll_loss=9.193, ppl=585.28, wps=45757, ups=3, wpb=15183.229, bsz=554.003, num_updates=1797, lr=0.00022468, gnorm=1.396, clip=0.000, oom=0.000, loss_scale=8.000, wall=596, train_wall=484 | |
| epoch 001: 1810 / 8862 loss=10.011, nll_loss=9.181, ppl=580.53, wps=45797, ups=3, wpb=15185.828, bsz=554.253, num_updates=1807, lr=0.00022593, gnorm=1.394, clip=0.000, oom=0.000, loss_scale=8.000, wall=599, train_wall=487 | |
| epoch 001: 1820 / 8862 loss=10.002, nll_loss=9.170, ppl=575.95, wps=45832, ups=3, wpb=15187.144, bsz=554.474, num_updates=1817, lr=0.00022718, gnorm=1.392, clip=0.000, oom=0.000, loss_scale=8.000, wall=602, train_wall=490 | |
| epoch 001: 1830 / 8862 loss=9.992, nll_loss=9.159, ppl=571.58, wps=45861, ups=3, wpb=15186.502, bsz=554.465, num_updates=1827, lr=0.000228429, gnorm=1.390, clip=0.000, oom=0.000, loss_scale=8.000, wall=605, train_wall=492 | |
| epoch 001: 1840 / 8862 loss=9.983, nll_loss=9.148, ppl=567.20, wps=45891, ups=3, wpb=15186.456, bsz=554.835, num_updates=1837, lr=0.000229679, gnorm=1.389, clip=0.000, oom=0.000, loss_scale=8.000, wall=608, train_wall=495 | |
| epoch 001: 1850 / 8862 loss=9.973, nll_loss=9.137, ppl=562.83, wps=45923, ups=3, wpb=15186.601, bsz=554.629, num_updates=1847, lr=0.000230929, gnorm=1.386, clip=0.000, oom=0.000, loss_scale=8.000, wall=611, train_wall=498 | |
| epoch 001: 1860 / 8862 loss=9.964, nll_loss=9.126, ppl=558.84, wps=45950, ups=3, wpb=15184.812, bsz=554.537, num_updates=1857, lr=0.000232179, gnorm=1.384, clip=0.000, oom=0.000, loss_scale=8.000, wall=614, train_wall=500 | |
| epoch 001: 1870 / 8862 loss=9.955, nll_loss=9.115, ppl=554.67, wps=45977, ups=3, wpb=15184.567, bsz=555.424, num_updates=1867, lr=0.000233428, gnorm=1.385, clip=0.000, oom=0.000, loss_scale=8.000, wall=617, train_wall=503 | |
| epoch 001: 1880 / 8862 loss=9.945, nll_loss=9.104, ppl=550.46, wps=46010, ups=3, wpb=15185.762, bsz=555.759, num_updates=1877, lr=0.000234678, gnorm=1.384, clip=0.000, oom=0.000, loss_scale=8.000, wall=620, train_wall=506 | |
| epoch 001: 1890 / 8862 loss=9.937, nll_loss=9.094, ppl=546.57, wps=46043, ups=3, wpb=15186.843, bsz=555.459, num_updates=1887, lr=0.000235928, gnorm=1.382, clip=0.000, oom=0.000, loss_scale=8.000, wall=622, train_wall=508 | |
| epoch 001: 1900 / 8862 loss=9.927, nll_loss=9.083, ppl=542.46, wps=46080, ups=3, wpb=15189.070, bsz=555.462, num_updates=1897, lr=0.000237178, gnorm=1.381, clip=0.000, oom=0.000, loss_scale=8.000, wall=625, train_wall=511 | |
| epoch 001: 1910 / 8862 loss=9.918, nll_loss=9.072, ppl=538.24, wps=46111, ups=3, wpb=15189.866, bsz=555.906, num_updates=1907, lr=0.000238427, gnorm=1.379, clip=0.000, oom=0.000, loss_scale=8.000, wall=628, train_wall=514 | |
| epoch 001: 1920 / 8862 loss=9.908, nll_loss=9.062, ppl=534.31, wps=46140, ups=3, wpb=15189.651, bsz=555.948, num_updates=1917, lr=0.000239677, gnorm=1.377, clip=0.000, oom=0.000, loss_scale=8.000, wall=631, train_wall=516 | |
| epoch 001: 1930 / 8862 loss=9.899, nll_loss=9.051, ppl=530.43, wps=46174, ups=3, wpb=15191.216, bsz=555.894, num_updates=1927, lr=0.000240927, gnorm=1.375, clip=0.000, oom=0.000, loss_scale=8.000, wall=634, train_wall=519 | |
| epoch 001: 1940 / 8862 loss=9.891, nll_loss=9.041, ppl=526.74, wps=46207, ups=3, wpb=15192.620, bsz=555.350, num_updates=1937, lr=0.000242177, gnorm=1.373, clip=0.000, oom=0.000, loss_scale=8.000, wall=637, train_wall=522 | |
| epoch 001: 1950 / 8862 loss=9.882, nll_loss=9.031, ppl=523.17, wps=46237, ups=3, wpb=15192.255, bsz=554.987, num_updates=1947, lr=0.000243426, gnorm=1.372, clip=0.000, oom=0.000, loss_scale=8.000, wall=640, train_wall=524 | |
| epoch 001: 1960 / 8862 loss=9.873, nll_loss=9.020, ppl=519.32, wps=46267, ups=3, wpb=15193.013, bsz=555.336, num_updates=1957, lr=0.000244676, gnorm=1.370, clip=0.000, oom=0.000, loss_scale=8.000, wall=643, train_wall=527 | |
| epoch 001: 1970 / 8862 loss=9.864, nll_loss=9.010, ppl=515.59, wps=46296, ups=3, wpb=15193.062, bsz=555.331, num_updates=1967, lr=0.000245926, gnorm=1.369, clip=0.000, oom=0.000, loss_scale=8.000, wall=646, train_wall=530 | |
| epoch 001: 1980 / 8862 loss=9.856, nll_loss=9.000, ppl=512.04, wps=46324, ups=3, wpb=15193.384, bsz=555.249, num_updates=1977, lr=0.000247176, gnorm=1.367, clip=0.000, oom=0.000, loss_scale=8.000, wall=648, train_wall=532 | |
| epoch 001: 1990 / 8862 loss=9.846, nll_loss=8.989, ppl=508.25, wps=46356, ups=3, wpb=15195.103, bsz=555.748, num_updates=1987, lr=0.000248425, gnorm=1.365, clip=0.000, oom=0.000, loss_scale=8.000, wall=651, train_wall=535 | |
| epoch 001: 2000 / 8862 loss=9.837, nll_loss=8.979, ppl=504.54, wps=46380, ups=3, wpb=15194.550, bsz=556.046, num_updates=1997, lr=0.000249675, gnorm=1.363, clip=0.000, oom=0.000, loss_scale=8.000, wall=654, train_wall=538 | |
| epoch 001: 2010 / 8862 loss=9.829, nll_loss=8.969, ppl=501.03, wps=46406, ups=3, wpb=15194.273, bsz=556.070, num_updates=2007, lr=0.000250925, gnorm=1.360, clip=0.000, oom=0.000, loss_scale=8.000, wall=657, train_wall=541 | |
| epoch 001: 2020 / 8862 loss=9.821, nll_loss=8.959, ppl=497.76, wps=46431, ups=3, wpb=15195.459, bsz=556.894, num_updates=2017, lr=0.000252175, gnorm=1.362, clip=0.000, oom=0.000, loss_scale=8.000, wall=660, train_wall=543 | |
| epoch 001: 2030 / 8862 loss=9.813, nll_loss=8.950, ppl=494.62, wps=46457, ups=3, wpb=15194.946, bsz=556.744, num_updates=2027, lr=0.000253424, gnorm=1.361, clip=0.000, oom=0.000, loss_scale=8.000, wall=663, train_wall=546 | |
| epoch 001: 2040 / 8862 loss=9.804, nll_loss=8.940, ppl=491.28, wps=46487, ups=3, wpb=15196.400, bsz=556.819, num_updates=2037, lr=0.000254674, gnorm=1.359, clip=0.000, oom=0.000, loss_scale=8.000, wall=666, train_wall=549 | |
| epoch 001: 2050 / 8862 loss=9.796, nll_loss=8.931, ppl=488.02, wps=46511, ups=3, wpb=15195.603, bsz=556.764, num_updates=2047, lr=0.000255924, gnorm=1.358, clip=0.000, oom=0.000, loss_scale=8.000, wall=669, train_wall=551 | |
| epoch 001: 2060 / 8862 loss=9.788, nll_loss=8.921, ppl=484.75, wps=46536, ups=3, wpb=15195.249, bsz=556.799, num_updates=2057, lr=0.000257174, gnorm=1.357, clip=0.000, oom=0.000, loss_scale=8.000, wall=672, train_wall=554 | |
| epoch 001: 2070 / 8862 loss=9.780, nll_loss=8.912, ppl=481.64, wps=46559, ups=3, wpb=15193.995, bsz=556.741, num_updates=2067, lr=0.000258423, gnorm=1.356, clip=0.000, oom=0.000, loss_scale=8.000, wall=675, train_wall=557 | |
| epoch 001: 2080 / 8862 loss=9.771, nll_loss=8.902, ppl=478.33, wps=46584, ups=3, wpb=15194.392, bsz=556.888, num_updates=2077, lr=0.000259673, gnorm=1.353, clip=0.000, oom=0.000, loss_scale=8.000, wall=677, train_wall=559 | |
| epoch 001: 2090 / 8862 loss=9.763, nll_loss=8.893, ppl=475.31, wps=46609, ups=3, wpb=15194.256, bsz=556.615, num_updates=2087, lr=0.000260923, gnorm=1.352, clip=0.000, oom=0.000, loss_scale=8.000, wall=680, train_wall=562 | |
| epoch 001: 2100 / 8862 loss=9.755, nll_loss=8.883, ppl=472.12, wps=46636, ups=3, wpb=15194.974, bsz=556.639, num_updates=2097, lr=0.000262173, gnorm=1.351, clip=0.000, oom=0.000, loss_scale=8.000, wall=683, train_wall=565 | |
| epoch 001: 2110 / 8862 loss=9.747, nll_loss=8.873, ppl=469.00, wps=46658, ups=3, wpb=15194.083, bsz=556.511, num_updates=2107, lr=0.000263422, gnorm=1.348, clip=0.000, oom=0.000, loss_scale=8.000, wall=686, train_wall=567 | |
| epoch 001: 2120 / 8862 loss=9.739, nll_loss=8.864, ppl=466.04, wps=46684, ups=3, wpb=15194.421, bsz=556.297, num_updates=2117, lr=0.000264672, gnorm=1.347, clip=0.000, oom=0.000, loss_scale=8.000, wall=689, train_wall=570 | |
| epoch 001: 2130 / 8862 loss=9.731, nll_loss=8.855, ppl=463.13, wps=46708, ups=3, wpb=15193.768, bsz=556.118, num_updates=2127, lr=0.000265922, gnorm=1.345, clip=0.000, oom=0.000, loss_scale=8.000, wall=692, train_wall=573 | |
| epoch 001: 2140 / 8862 loss=9.723, nll_loss=8.845, ppl=459.95, wps=46731, ups=3, wpb=15193.098, bsz=556.158, num_updates=2137, lr=0.000267172, gnorm=1.343, clip=0.000, oom=0.000, loss_scale=8.000, wall=695, train_wall=575 | |
| epoch 001: 2150 / 8862 loss=9.714, nll_loss=8.836, ppl=456.86, wps=46757, ups=3, wpb=15193.901, bsz=556.318, num_updates=2147, lr=0.000268421, gnorm=1.341, clip=0.000, oom=0.000, loss_scale=8.000, wall=698, train_wall=578 | |
| epoch 001: 2160 / 8862 loss=9.706, nll_loss=8.826, ppl=453.96, wps=46784, ups=3, wpb=15194.193, bsz=556.075, num_updates=2157, lr=0.000269671, gnorm=1.339, clip=0.000, oom=0.000, loss_scale=8.000, wall=701, train_wall=581 | |
| epoch 001: 2170 / 8862 loss=9.698, nll_loss=8.817, ppl=450.94, wps=46812, ups=3, wpb=15195.714, bsz=555.890, num_updates=2167, lr=0.000270921, gnorm=1.337, clip=0.000, oom=0.000, loss_scale=8.000, wall=703, train_wall=583 | |
| epoch 001: 2180 / 8862 loss=9.690, nll_loss=8.808, ppl=448.06, wps=46835, ups=3, wpb=15195.771, bsz=555.740, num_updates=2177, lr=0.000272171, gnorm=1.336, clip=0.000, oom=0.000, loss_scale=8.000, wall=706, train_wall=586 | |
| epoch 001: 2190 / 8862 loss=9.682, nll_loss=8.798, ppl=445.00, wps=46858, ups=3, wpb=15195.667, bsz=556.001, num_updates=2187, lr=0.00027342, gnorm=1.335, clip=0.000, oom=0.000, loss_scale=8.000, wall=709, train_wall=589 | |
| epoch 001: 2200 / 8862 loss=9.674, nll_loss=8.789, ppl=442.36, wps=46878, ups=3, wpb=15194.137, bsz=555.790, num_updates=2197, lr=0.00027467, gnorm=1.333, clip=0.000, oom=0.000, loss_scale=8.000, wall=712, train_wall=591 | |
| epoch 001: 2210 / 8862 loss=9.666, nll_loss=8.780, ppl=439.56, wps=46904, ups=3, wpb=15195.204, bsz=555.729, num_updates=2207, lr=0.00027592, gnorm=1.332, clip=0.000, oom=0.000, loss_scale=8.000, wall=715, train_wall=594 | |
| epoch 001: 2220 / 8862 loss=9.658, nll_loss=8.770, ppl=436.69, wps=46929, ups=3, wpb=15195.397, bsz=555.564, num_updates=2217, lr=0.00027717, gnorm=1.330, clip=0.000, oom=0.000, loss_scale=8.000, wall=718, train_wall=597 | |
| epoch 001: 2230 / 8862 loss=9.651, nll_loss=8.762, ppl=434.17, wps=46949, ups=3, wpb=15193.880, bsz=555.193, num_updates=2227, lr=0.000278419, gnorm=1.329, clip=0.000, oom=0.000, loss_scale=8.000, wall=721, train_wall=599 | |
| epoch 001: 2240 / 8862 loss=9.644, nll_loss=8.754, ppl=431.62, wps=46971, ups=3, wpb=15193.618, bsz=555.143, num_updates=2237, lr=0.000279669, gnorm=1.327, clip=0.000, oom=0.000, loss_scale=8.000, wall=724, train_wall=602 | |
| epoch 001: 2250 / 8862 loss=9.636, nll_loss=8.745, ppl=429.00, wps=46989, ups=3, wpb=15192.215, bsz=555.036, num_updates=2247, lr=0.000280919, gnorm=1.326, clip=0.000, oom=0.000, loss_scale=8.000, wall=726, train_wall=605 | |
| epoch 001: 2260 / 8862 loss=9.628, nll_loss=8.736, ppl=426.28, wps=47005, ups=3, wpb=15190.047, bsz=555.338, num_updates=2257, lr=0.000282169, gnorm=1.324, clip=0.000, oom=0.000, loss_scale=8.000, wall=729, train_wall=607 | |
| epoch 001: 2270 / 8862 loss=9.620, nll_loss=8.726, ppl=423.55, wps=47029, ups=3, wpb=15190.949, bsz=555.341, num_updates=2267, lr=0.000283418, gnorm=1.323, clip=0.000, oom=0.000, loss_scale=8.000, wall=732, train_wall=610 | |
| epoch 001: 2280 / 8862 loss=9.612, nll_loss=8.717, ppl=420.84, wps=47053, ups=3, wpb=15191.634, bsz=555.323, num_updates=2277, lr=0.000284668, gnorm=1.321, clip=0.000, oom=0.000, loss_scale=8.000, wall=735, train_wall=613 | |
| epoch 001: 2290 / 8862 loss=9.604, nll_loss=8.707, ppl=418.04, wps=47077, ups=3, wpb=15192.910, bsz=555.613, num_updates=2287, lr=0.000285918, gnorm=1.321, clip=0.000, oom=0.000, loss_scale=8.000, wall=738, train_wall=615 | |
| epoch 001: 2300 / 8862 loss=9.597, nll_loss=8.699, ppl=415.66, wps=47098, ups=3, wpb=15192.317, bsz=555.276, num_updates=2297, lr=0.000287168, gnorm=1.319, clip=0.000, oom=0.000, loss_scale=8.000, wall=741, train_wall=618 | |
| epoch 001: 2310 / 8862 loss=9.589, nll_loss=8.690, ppl=412.90, wps=47123, ups=3, wpb=15193.698, bsz=555.488, num_updates=2307, lr=0.000288417, gnorm=1.318, clip=0.000, oom=0.000, loss_scale=8.000, wall=744, train_wall=621 | |
| epoch 001: 2320 / 8862 loss=9.581, nll_loss=8.681, ppl=410.40, wps=47143, ups=3, wpb=15193.703, bsz=555.549, num_updates=2317, lr=0.000289667, gnorm=1.317, clip=0.000, oom=0.000, loss_scale=8.000, wall=747, train_wall=623 | |
| epoch 001: 2330 / 8862 loss=9.574, nll_loss=8.672, ppl=407.90, wps=47168, ups=3, wpb=15194.758, bsz=555.275, num_updates=2327, lr=0.000290917, gnorm=1.315, clip=0.000, oom=0.000, loss_scale=8.000, wall=750, train_wall=626 | |
| epoch 001: 2340 / 8862 loss=9.566, nll_loss=8.663, ppl=405.40, wps=47191, ups=3, wpb=15195.290, bsz=555.237, num_updates=2337, lr=0.000292167, gnorm=1.314, clip=0.000, oom=0.000, loss_scale=8.000, wall=752, train_wall=629 | |
| epoch 001: 2350 / 8862 loss=9.559, nll_loss=8.654, ppl=402.95, wps=47216, ups=3, wpb=15196.507, bsz=554.876, num_updates=2347, lr=0.000293416, gnorm=1.313, clip=0.000, oom=0.000, loss_scale=8.000, wall=755, train_wall=631 | |
| epoch 001: 2360 / 8862 loss=9.550, nll_loss=8.645, ppl=400.24, wps=47235, ups=3, wpb=15196.051, bsz=555.254, num_updates=2357, lr=0.000294666, gnorm=1.312, clip=0.000, oom=0.000, loss_scale=8.000, wall=758, train_wall=634 | |
| epoch 001: 2370 / 8862 loss=9.543, nll_loss=8.636, ppl=397.80, wps=47255, ups=3, wpb=15195.759, bsz=555.085, num_updates=2367, lr=0.000295916, gnorm=1.311, clip=0.000, oom=0.000, loss_scale=8.000, wall=761, train_wall=637 | |
| epoch 001: 2380 / 8862 loss=9.535, nll_loss=8.627, ppl=395.37, wps=47272, ups=3, wpb=15194.870, bsz=555.139, num_updates=2377, lr=0.000297166, gnorm=1.309, clip=0.000, oom=0.000, loss_scale=8.000, wall=764, train_wall=639 | |
| epoch 001: 2390 / 8862 loss=9.527, nll_loss=8.617, ppl=392.73, wps=47292, ups=3, wpb=15195.220, bsz=555.565, num_updates=2387, lr=0.000298415, gnorm=1.308, clip=0.000, oom=0.000, loss_scale=8.000, wall=767, train_wall=642 | |
| epoch 001: 2400 / 8862 loss=9.518, nll_loss=8.608, ppl=390.17, wps=47311, ups=3, wpb=15195.933, bsz=555.984, num_updates=2397, lr=0.000299665, gnorm=1.307, clip=0.000, oom=0.000, loss_scale=8.000, wall=770, train_wall=645 | |
| epoch 001: 2410 / 8862 loss=9.511, nll_loss=8.599, ppl=387.72, wps=47335, ups=3, wpb=15197.213, bsz=555.941, num_updates=2407, lr=0.000300915, gnorm=1.306, clip=0.000, oom=0.000, loss_scale=8.000, wall=773, train_wall=647 | |
| epoch 001: 2420 / 8862 loss=9.503, nll_loss=8.590, ppl=385.31, wps=47350, ups=3, wpb=15196.142, bsz=555.978, num_updates=2417, lr=0.000302165, gnorm=1.304, clip=0.000, oom=0.000, loss_scale=8.000, wall=776, train_wall=650 | |
| epoch 001: 2430 / 8862 loss=9.495, nll_loss=8.580, ppl=382.79, wps=47369, ups=3, wpb=15196.452, bsz=556.037, num_updates=2427, lr=0.000303414, gnorm=1.303, clip=0.000, oom=0.000, loss_scale=8.000, wall=779, train_wall=653 | |
| epoch 001: 2440 / 8862 loss=9.487, nll_loss=8.572, ppl=380.45, wps=47391, ups=3, wpb=15196.904, bsz=555.847, num_updates=2437, lr=0.000304664, gnorm=1.303, clip=0.000, oom=0.000, loss_scale=8.000, wall=781, train_wall=656 | |
| epoch 001: 2450 / 8862 loss=9.480, nll_loss=8.563, ppl=378.13, wps=47407, ups=3, wpb=15196.295, bsz=555.935, num_updates=2447, lr=0.000305914, gnorm=1.302, clip=0.000, oom=0.000, loss_scale=8.000, wall=784, train_wall=658 | |
| epoch 001: 2460 / 8862 loss=9.472, nll_loss=8.554, ppl=375.90, wps=47423, ups=3, wpb=15195.113, bsz=555.919, num_updates=2457, lr=0.000307164, gnorm=1.302, clip=0.000, oom=0.000, loss_scale=8.000, wall=787, train_wall=661 | |
| epoch 001: 2470 / 8862 loss=9.465, nll_loss=8.546, ppl=373.78, wps=47444, ups=3, wpb=15195.460, bsz=555.443, num_updates=2467, lr=0.000308413, gnorm=1.301, clip=0.000, oom=0.000, loss_scale=8.000, wall=790, train_wall=664 | |
| epoch 001: 2480 / 8862 loss=9.457, nll_loss=8.537, ppl=371.42, wps=47463, ups=3, wpb=15195.240, bsz=555.552, num_updates=2477, lr=0.000309663, gnorm=1.299, clip=0.000, oom=0.000, loss_scale=8.000, wall=793, train_wall=666 | |
| epoch 001: 2490 / 8862 loss=9.450, nll_loss=8.528, ppl=369.23, wps=47478, ups=3, wpb=15194.429, bsz=555.595, num_updates=2487, lr=0.000310913, gnorm=1.299, clip=0.000, oom=0.000, loss_scale=8.000, wall=796, train_wall=669 | |
| epoch 001: 2500 / 8862 loss=9.442, nll_loss=8.519, ppl=366.91, wps=47497, ups=3, wpb=15194.684, bsz=555.796, num_updates=2497, lr=0.000312163, gnorm=1.297, clip=0.000, oom=0.000, loss_scale=8.000, wall=799, train_wall=672 | |
| epoch 001: 2510 / 8862 loss=9.434, nll_loss=8.510, ppl=364.64, wps=47518, ups=3, wpb=15195.847, bsz=555.963, num_updates=2507, lr=0.000313412, gnorm=1.297, clip=0.000, oom=0.000, loss_scale=8.000, wall=802, train_wall=674 | |
| epoch 001: 2520 / 8862 loss=9.426, nll_loss=8.501, ppl=362.24, wps=47537, ups=3, wpb=15196.489, bsz=556.376, num_updates=2517, lr=0.000314662, gnorm=1.296, clip=0.000, oom=0.000, loss_scale=8.000, wall=805, train_wall=677 | |
| epoch 001: 2530 / 8862 loss=9.418, nll_loss=8.491, ppl=359.91, wps=47556, ups=3, wpb=15196.829, bsz=556.564, num_updates=2527, lr=0.000315912, gnorm=1.295, clip=0.000, oom=0.000, loss_scale=8.000, wall=808, train_wall=680 | |
| epoch 001: 2540 / 8862 loss=9.410, nll_loss=8.482, ppl=357.57, wps=47577, ups=3, wpb=15198.122, bsz=556.868, num_updates=2537, lr=0.000317162, gnorm=1.294, clip=0.000, oom=0.000, loss_scale=8.000, wall=810, train_wall=682 | |
| epoch 001: 2550 / 8862 loss=9.402, nll_loss=8.473, ppl=355.36, wps=47595, ups=3, wpb=15198.746, bsz=557.292, num_updates=2547, lr=0.000318411, gnorm=1.294, clip=0.000, oom=0.000, loss_scale=8.000, wall=813, train_wall=685 | |
| epoch 001: 2560 / 8862 loss=9.395, nll_loss=8.465, ppl=353.34, wps=47611, ups=3, wpb=15197.952, bsz=557.180, num_updates=2557, lr=0.000319661, gnorm=1.293, clip=0.000, oom=0.000, loss_scale=8.000, wall=816, train_wall=688 | |
| epoch 001: 2570 / 8862 loss=9.388, nll_loss=8.457, ppl=351.31, wps=47628, ups=3, wpb=15199.127, bsz=557.562, num_updates=2567, lr=0.000320911, gnorm=1.294, clip=0.000, oom=0.000, loss_scale=8.000, wall=819, train_wall=690 | |
| epoch 001: 2580 / 8862 loss=9.381, nll_loss=8.448, ppl=349.28, wps=47644, ups=3, wpb=15198.917, bsz=557.398, num_updates=2577, lr=0.000322161, gnorm=1.292, clip=0.000, oom=0.000, loss_scale=8.000, wall=822, train_wall=693 | |
| epoch 001: 2590 / 8862 loss=9.373, nll_loss=8.439, ppl=346.98, wps=47659, ups=3, wpb=15199.230, bsz=558.029, num_updates=2587, lr=0.00032341, gnorm=1.292, clip=0.000, oom=0.000, loss_scale=8.000, wall=825, train_wall=696 | |
| epoch 001: 2600 / 8862 loss=9.365, nll_loss=8.430, ppl=344.94, wps=47676, ups=3, wpb=15199.239, bsz=558.209, num_updates=2597, lr=0.00032466, gnorm=1.291, clip=0.000, oom=0.000, loss_scale=8.000, wall=828, train_wall=698 | |
| epoch 001: 2610 / 8862 loss=9.358, nll_loss=8.422, ppl=343.01, wps=47693, ups=3, wpb=15199.077, bsz=558.044, num_updates=2607, lr=0.00032591, gnorm=1.290, clip=0.000, oom=0.000, loss_scale=8.000, wall=831, train_wall=701 | |
| epoch 001: 2620 / 8862 loss=9.351, nll_loss=8.414, ppl=340.98, wps=47707, ups=3, wpb=15198.109, bsz=558.245, num_updates=2617, lr=0.00032716, gnorm=1.290, clip=0.000, oom=0.000, loss_scale=8.000, wall=834, train_wall=704 | |
| epoch 001: 2630 / 8862 loss=9.344, nll_loss=8.405, ppl=339.07, wps=47722, ups=3, wpb=15197.603, bsz=557.968, num_updates=2627, lr=0.000328409, gnorm=1.289, clip=0.000, oom=0.000, loss_scale=8.000, wall=837, train_wall=706 | |
| epoch 001: 2640 / 8862 loss=9.337, nll_loss=8.397, ppl=337.16, wps=47738, ups=3, wpb=15197.391, bsz=557.636, num_updates=2637, lr=0.000329659, gnorm=1.288, clip=0.000, oom=0.000, loss_scale=8.000, wall=839, train_wall=709 | |
| epoch 001: 2650 / 8862 loss=9.330, nll_loss=8.389, ppl=335.23, wps=47751, ups=3, wpb=15196.198, bsz=557.587, num_updates=2647, lr=0.000330909, gnorm=1.287, clip=0.000, oom=0.000, loss_scale=8.000, wall=842, train_wall=712 | |
| epoch 001: 2660 / 8862 loss=9.323, nll_loss=8.381, ppl=333.29, wps=47768, ups=3, wpb=15196.100, bsz=557.425, num_updates=2657, lr=0.000332159, gnorm=1.286, clip=0.000, oom=0.000, loss_scale=8.000, wall=845, train_wall=714 | |
| epoch 001: 2670 / 8862 loss=9.315, nll_loss=8.372, ppl=331.26, wps=47785, ups=3, wpb=15196.415, bsz=557.471, num_updates=2667, lr=0.000333408, gnorm=1.285, clip=0.000, oom=0.000, loss_scale=8.000, wall=848, train_wall=717 | |
| epoch 001: 2680 / 8862 loss=9.308, nll_loss=8.363, ppl=329.28, wps=47802, ups=3, wpb=15196.721, bsz=557.277, num_updates=2677, lr=0.000334658, gnorm=1.284, clip=0.000, oom=0.000, loss_scale=8.000, wall=851, train_wall=720 | |
| epoch 001: 2690 / 8862 loss=9.301, nll_loss=8.355, ppl=327.41, wps=47820, ups=3, wpb=15197.080, bsz=557.111, num_updates=2687, lr=0.000335908, gnorm=1.284, clip=0.000, oom=0.000, loss_scale=8.000, wall=854, train_wall=722 | |
| epoch 001: 2700 / 8862 loss=9.293, nll_loss=8.347, ppl=325.53, wps=47836, ups=3, wpb=15196.774, bsz=556.864, num_updates=2697, lr=0.000337158, gnorm=1.283, clip=0.000, oom=0.000, loss_scale=8.000, wall=857, train_wall=725 | |
| epoch 001: 2710 / 8862 loss=9.286, nll_loss=8.338, ppl=323.51, wps=47852, ups=3, wpb=15196.891, bsz=557.074, num_updates=2707, lr=0.000338407, gnorm=1.282, clip=0.000, oom=0.000, loss_scale=8.000, wall=860, train_wall=728 | |
| epoch 001: 2720 / 8862 loss=9.278, nll_loss=8.329, ppl=321.62, wps=47867, ups=3, wpb=15196.626, bsz=556.849, num_updates=2717, lr=0.000339657, gnorm=1.280, clip=0.000, oom=0.000, loss_scale=8.000, wall=863, train_wall=731 | |
| epoch 001: 2730 / 8862 loss=9.271, nll_loss=8.321, ppl=319.73, wps=47884, ups=3, wpb=15197.448, bsz=556.913, num_updates=2727, lr=0.000340907, gnorm=1.280, clip=0.000, oom=0.000, loss_scale=8.000, wall=865, train_wall=733 | |
| epoch 001: 2740 / 8862 loss=9.264, nll_loss=8.312, ppl=317.83, wps=47898, ups=3, wpb=15197.328, bsz=556.693, num_updates=2737, lr=0.000342157, gnorm=1.279, clip=0.000, oom=0.000, loss_scale=8.000, wall=868, train_wall=736 | |
| epoch 001: 2750 / 8862 loss=9.256, nll_loss=8.303, ppl=315.86, wps=47908, ups=3, wpb=15196.589, bsz=556.892, num_updates=2747, lr=0.000343406, gnorm=1.278, clip=0.000, oom=0.000, loss_scale=8.000, wall=871, train_wall=739 | |
| epoch 001: 2760 / 8862 loss=9.248, nll_loss=8.294, ppl=313.91, wps=47923, ups=3, wpb=15197.554, bsz=556.921, num_updates=2757, lr=0.000344656, gnorm=1.277, clip=0.000, oom=0.000, loss_scale=8.000, wall=874, train_wall=741 | |
| epoch 001: 2770 / 8862 loss=9.242, nll_loss=8.286, ppl=312.23, wps=47936, ups=3, wpb=15196.674, bsz=556.767, num_updates=2767, lr=0.000345906, gnorm=1.276, clip=0.000, oom=0.000, loss_scale=8.000, wall=877, train_wall=744 | |
| epoch 001: 2780 / 8862 loss=9.234, nll_loss=8.278, ppl=310.43, wps=47949, ups=3, wpb=15196.740, bsz=556.874, num_updates=2777, lr=0.000347156, gnorm=1.276, clip=0.000, oom=0.000, loss_scale=8.000, wall=880, train_wall=747 | |
| epoch 001: 2790 / 8862 loss=9.227, nll_loss=8.270, ppl=308.59, wps=47960, ups=3, wpb=15196.416, bsz=556.936, num_updates=2787, lr=0.000348405, gnorm=1.275, clip=0.000, oom=0.000, loss_scale=8.000, wall=883, train_wall=749 | |
| epoch 001: 2800 / 8862 loss=9.220, nll_loss=8.262, ppl=306.91, wps=47971, ups=3, wpb=15195.746, bsz=556.759, num_updates=2797, lr=0.000349655, gnorm=1.273, clip=0.000, oom=0.000, loss_scale=8.000, wall=886, train_wall=752 | |
| epoch 001: 2810 / 8862 loss=9.213, nll_loss=8.253, ppl=305.14, wps=47981, ups=3, wpb=15195.102, bsz=557.030, num_updates=2807, lr=0.000350905, gnorm=1.272, clip=0.000, oom=0.000, loss_scale=8.000, wall=889, train_wall=755 | |
| epoch 001: 2820 / 8862 loss=9.206, nll_loss=8.245, ppl=303.44, wps=47992, ups=3, wpb=15195.198, bsz=557.284, num_updates=2817, lr=0.000352155, gnorm=1.273, clip=0.000, oom=0.000, loss_scale=8.000, wall=892, train_wall=758 | |
| epoch 001: 2830 / 8862 loss=9.199, nll_loss=8.237, ppl=301.67, wps=48006, ups=3, wpb=15195.452, bsz=557.186, num_updates=2827, lr=0.000353404, gnorm=1.271, clip=0.000, oom=0.000, loss_scale=8.000, wall=895, train_wall=760 | |
| epoch 001: 2840 / 8862 loss=9.191, nll_loss=8.228, ppl=299.87, wps=48018, ups=3, wpb=15196.178, bsz=557.264, num_updates=2837, lr=0.000354654, gnorm=1.270, clip=0.000, oom=0.000, loss_scale=8.000, wall=898, train_wall=763 | |
| epoch 001: 2850 / 8862 loss=9.184, nll_loss=8.219, ppl=298.07, wps=48030, ups=3, wpb=15196.298, bsz=557.296, num_updates=2847, lr=0.000355904, gnorm=1.269, clip=0.000, oom=0.000, loss_scale=8.000, wall=901, train_wall=766 | |
| epoch 001: 2860 / 8862 loss=9.176, nll_loss=8.211, ppl=296.30, wps=48041, ups=3, wpb=15195.779, bsz=557.460, num_updates=2857, lr=0.000357154, gnorm=1.269, clip=0.000, oom=0.000, loss_scale=8.000, wall=904, train_wall=769 | |
| epoch 001: 2870 / 8862 loss=9.169, nll_loss=8.203, ppl=294.61, wps=48051, ups=3, wpb=15195.375, bsz=557.415, num_updates=2867, lr=0.000358403, gnorm=1.268, clip=0.000, oom=0.000, loss_scale=8.000, wall=907, train_wall=771 | |
| epoch 001: 2880 / 8862 loss=9.162, nll_loss=8.195, ppl=292.96, wps=48060, ups=3, wpb=15194.492, bsz=557.291, num_updates=2877, lr=0.000359653, gnorm=1.268, clip=0.000, oom=0.000, loss_scale=8.000, wall=910, train_wall=774 | |
| epoch 001: 2890 / 8862 loss=9.155, nll_loss=8.186, ppl=291.26, wps=48073, ups=3, wpb=15194.906, bsz=557.045, num_updates=2887, lr=0.000360903, gnorm=1.267, clip=0.000, oom=0.000, loss_scale=8.000, wall=913, train_wall=777 | |
| epoch 001: 2900 / 8862 loss=9.148, nll_loss=8.177, ppl=289.50, wps=48087, ups=3, wpb=15196.055, bsz=557.431, num_updates=2897, lr=0.000362153, gnorm=1.267, clip=0.000, oom=0.000, loss_scale=8.000, wall=915, train_wall=779 | |
| epoch 001: 2910 / 8862 loss=9.140, nll_loss=8.169, ppl=287.85, wps=48098, ups=3, wpb=15195.988, bsz=557.465, num_updates=2907, lr=0.000363402, gnorm=1.266, clip=0.000, oom=0.000, loss_scale=8.000, wall=918, train_wall=782 | |
| epoch 001: 2920 / 8862 loss=9.134, nll_loss=8.162, ppl=286.33, wps=48104, ups=3, wpb=15193.982, bsz=557.229, num_updates=2917, lr=0.000364652, gnorm=1.265, clip=0.000, oom=0.000, loss_scale=8.000, wall=921, train_wall=785 | |
| epoch 001: 2930 / 8862 loss=9.127, nll_loss=8.153, ppl=284.68, wps=48112, ups=3, wpb=15194.132, bsz=557.660, num_updates=2927, lr=0.000365902, gnorm=1.265, clip=0.000, oom=0.000, loss_scale=8.000, wall=924, train_wall=788 | |
| epoch 001: 2940 / 8862 loss=9.119, nll_loss=8.145, ppl=283.01, wps=48126, ups=3, wpb=15195.002, bsz=557.698, num_updates=2937, lr=0.000367152, gnorm=1.264, clip=0.000, oom=0.000, loss_scale=8.000, wall=927, train_wall=790 | |
| epoch 001: 2950 / 8862 loss=9.112, nll_loss=8.136, ppl=281.40, wps=48137, ups=3, wpb=15195.268, bsz=557.898, num_updates=2947, lr=0.000368401, gnorm=1.264, clip=0.000, oom=0.000, loss_scale=8.000, wall=930, train_wall=793 | |
| epoch 001: 2960 / 8862 loss=9.105, nll_loss=8.128, ppl=279.73, wps=48147, ups=3, wpb=15194.741, bsz=558.179, num_updates=2957, lr=0.000369651, gnorm=1.263, clip=0.000, oom=0.000, loss_scale=8.000, wall=933, train_wall=796 | |
| epoch 001: 2970 / 8862 loss=9.098, nll_loss=8.120, ppl=278.19, wps=48160, ups=3, wpb=15195.037, bsz=557.969, num_updates=2967, lr=0.000370901, gnorm=1.262, clip=0.000, oom=0.000, loss_scale=8.000, wall=936, train_wall=799 | |
| epoch 001: 2980 / 8862 loss=9.091, nll_loss=8.112, ppl=276.65, wps=48168, ups=3, wpb=15194.036, bsz=557.925, num_updates=2977, lr=0.000372151, gnorm=1.261, clip=0.000, oom=0.000, loss_scale=8.000, wall=939, train_wall=801 | |
| epoch 001: 2990 / 8862 loss=9.084, nll_loss=8.104, ppl=275.11, wps=48174, ups=3, wpb=15193.194, bsz=557.809, num_updates=2987, lr=0.0003734, gnorm=1.260, clip=0.000, oom=0.000, loss_scale=8.000, wall=942, train_wall=804 | |
| epoch 001: 3000 / 8862 loss=9.077, nll_loss=8.096, ppl=273.55, wps=48187, ups=3, wpb=15193.398, bsz=557.565, num_updates=2997, lr=0.00037465, gnorm=1.259, clip=0.000, oom=0.000, loss_scale=8.000, wall=945, train_wall=807 | |
| epoch 001: 3010 / 8862 loss=9.070, nll_loss=8.087, ppl=271.93, wps=48197, ups=3, wpb=15193.377, bsz=557.573, num_updates=3007, lr=0.0003759, gnorm=1.259, clip=0.000, oom=0.000, loss_scale=8.000, wall=948, train_wall=809 | |
| epoch 001: 3020 / 8862 loss=9.062, nll_loss=8.079, ppl=270.35, wps=48212, ups=3, wpb=15194.296, bsz=557.523, num_updates=3017, lr=0.00037715, gnorm=1.258, clip=0.000, oom=0.000, loss_scale=8.000, wall=951, train_wall=812 | |
| epoch 001: 3030 / 8862 loss=9.055, nll_loss=8.070, ppl=268.78, wps=48222, ups=3, wpb=15193.478, bsz=557.626, num_updates=3027, lr=0.000378399, gnorm=1.257, clip=0.000, oom=0.000, loss_scale=8.000, wall=954, train_wall=815 | |
| epoch 001: 3040 / 8862 loss=9.048, nll_loss=8.062, ppl=267.22, wps=48234, ups=3, wpb=15193.858, bsz=557.486, num_updates=3037, lr=0.000379649, gnorm=1.257, clip=0.000, oom=0.000, loss_scale=8.000, wall=957, train_wall=818 | |
| epoch 001: 3050 / 8862 loss=9.040, nll_loss=8.053, ppl=265.61, wps=48245, ups=3, wpb=15194.290, bsz=557.642, num_updates=3047, lr=0.000380899, gnorm=1.256, clip=0.000, oom=0.000, loss_scale=8.000, wall=960, train_wall=820 | |
| epoch 001: 3060 / 8862 loss=9.033, nll_loss=8.045, ppl=264.13, wps=48257, ups=3, wpb=15194.299, bsz=557.613, num_updates=3057, lr=0.000382149, gnorm=1.255, clip=0.000, oom=0.000, loss_scale=8.000, wall=963, train_wall=823 | |
| epoch 001: 3070 / 8862 loss=9.027, nll_loss=8.037, ppl=262.72, wps=48266, ups=3, wpb=15193.188, bsz=557.394, num_updates=3067, lr=0.000383398, gnorm=1.254, clip=0.000, oom=0.000, loss_scale=8.000, wall=965, train_wall=826 | |
| epoch 001: 3080 / 8862 loss=9.020, nll_loss=8.029, ppl=261.20, wps=48276, ups=3, wpb=15192.701, bsz=557.384, num_updates=3077, lr=0.000384648, gnorm=1.253, clip=0.000, oom=0.000, loss_scale=8.000, wall=968, train_wall=828 | |
| epoch 001: 3090 / 8862 loss=9.013, nll_loss=8.021, ppl=259.72, wps=48282, ups=3, wpb=15191.744, bsz=557.325, num_updates=3087, lr=0.000385898, gnorm=1.252, clip=0.000, oom=0.000, loss_scale=8.000, wall=971, train_wall=831 | |
| epoch 001: 3100 / 8862 loss=9.005, nll_loss=8.012, ppl=258.21, wps=48293, ups=3, wpb=15191.747, bsz=557.607, num_updates=3097, lr=0.000387148, gnorm=1.252, clip=0.000, oom=0.000, loss_scale=8.000, wall=974, train_wall=834 | |
| epoch 001: 3110 / 8862 loss=8.999, nll_loss=8.005, ppl=256.84, wps=48300, ups=3, wpb=15190.496, bsz=557.347, num_updates=3107, lr=0.000388397, gnorm=1.251, clip=0.000, oom=0.000, loss_scale=8.000, wall=977, train_wall=837 | |
| epoch 001: 3120 / 8862 loss=8.991, nll_loss=7.996, ppl=255.27, wps=48313, ups=3, wpb=15191.002, bsz=557.435, num_updates=3117, lr=0.000389647, gnorm=1.250, clip=0.000, oom=0.000, loss_scale=8.000, wall=980, train_wall=839 | |
| epoch 001: 3130 / 8862 loss=8.984, nll_loss=7.988, ppl=253.84, wps=48327, ups=3, wpb=15192.021, bsz=557.285, num_updates=3127, lr=0.000390897, gnorm=1.249, clip=0.000, oom=0.000, loss_scale=8.000, wall=983, train_wall=842 | |
| epoch 001: 3140 / 8862 loss=8.977, nll_loss=7.980, ppl=252.48, wps=48337, ups=3, wpb=15192.055, bsz=557.074, num_updates=3137, lr=0.000392147, gnorm=1.248, clip=0.000, oom=0.000, loss_scale=8.000, wall=986, train_wall=845 | |
| epoch 001: 3150 / 8862 loss=8.971, nll_loss=7.972, ppl=251.11, wps=48346, ups=3, wpb=15191.181, bsz=556.895, num_updates=3147, lr=0.000393396, gnorm=1.247, clip=0.000, oom=0.000, loss_scale=8.000, wall=989, train_wall=847 | |
| epoch 001: 3160 / 8862 loss=8.963, nll_loss=7.964, ppl=249.62, wps=48360, ups=3, wpb=15192.176, bsz=556.880, num_updates=3157, lr=0.000394646, gnorm=1.247, clip=0.000, oom=0.000, loss_scale=8.000, wall=992, train_wall=850 | |
| epoch 001: 3170 / 8862 loss=8.957, nll_loss=7.956, ppl=248.31, wps=48369, ups=3, wpb=15191.379, bsz=556.771, num_updates=3167, lr=0.000395896, gnorm=1.247, clip=0.000, oom=0.000, loss_scale=8.000, wall=995, train_wall=853 | |
| epoch 001: 3180 / 8862 loss=8.950, nll_loss=7.948, ppl=246.99, wps=48382, ups=3, wpb=15191.209, bsz=556.597, num_updates=3177, lr=0.000397146, gnorm=1.246, clip=0.000, oom=0.000, loss_scale=8.000, wall=998, train_wall=855 | |
| epoch 001: 3190 / 8862 loss=8.942, nll_loss=7.940, ppl=245.51, wps=48393, ups=3, wpb=15191.094, bsz=556.729, num_updates=3187, lr=0.000398395, gnorm=1.244, clip=0.000, oom=0.000, loss_scale=8.000, wall=1000, train_wall=858 | |
| epoch 001: 3200 / 8862 loss=8.936, nll_loss=7.932, ppl=244.27, wps=48403, ups=3, wpb=15189.987, bsz=556.459, num_updates=3197, lr=0.000399645, gnorm=1.244, clip=0.000, oom=0.000, loss_scale=8.000, wall=1003, train_wall=861 | |
| epoch 001: 3210 / 8862 loss=8.929, nll_loss=7.924, ppl=242.93, wps=48415, ups=3, wpb=15190.086, bsz=556.355, num_updates=3207, lr=0.000400895, gnorm=1.243, clip=0.000, oom=0.000, loss_scale=8.000, wall=1006, train_wall=863 | |
| epoch 001: 3220 / 8862 loss=8.922, nll_loss=7.916, ppl=241.54, wps=48429, ups=3, wpb=15190.490, bsz=556.379, num_updates=3217, lr=0.000402145, gnorm=1.243, clip=0.000, oom=0.000, loss_scale=8.000, wall=1009, train_wall=866 | |
| epoch 001: 3230 / 8862 loss=8.915, nll_loss=7.908, ppl=240.14, wps=48440, ups=3, wpb=15190.535, bsz=556.653, num_updates=3227, lr=0.000403394, gnorm=1.242, clip=0.000, oom=0.000, loss_scale=8.000, wall=1012, train_wall=869 | |
| epoch 001: 3240 / 8862 loss=8.908, nll_loss=7.900, ppl=238.89, wps=48452, ups=3, wpb=15190.567, bsz=556.804, num_updates=3237, lr=0.000404644, gnorm=1.242, clip=0.000, oom=0.000, loss_scale=8.000, wall=1015, train_wall=871 | |
| epoch 001: 3250 / 8862 loss=8.902, nll_loss=7.893, ppl=237.70, wps=48462, ups=3, wpb=15190.228, bsz=556.703, num_updates=3247, lr=0.000405894, gnorm=1.242, clip=0.000, oom=0.000, loss_scale=8.000, wall=1018, train_wall=874 | |
| epoch 001: 3260 / 8862 loss=8.895, nll_loss=7.885, ppl=236.38, wps=48473, ups=3, wpb=15190.105, bsz=556.811, num_updates=3257, lr=0.000407144, gnorm=1.241, clip=0.000, oom=0.000, loss_scale=8.000, wall=1021, train_wall=877 | |
| epoch 001: 3270 / 8862 loss=8.888, nll_loss=7.877, ppl=235.01, wps=48484, ups=3, wpb=15189.793, bsz=556.794, num_updates=3267, lr=0.000408393, gnorm=1.240, clip=0.000, oom=0.000, loss_scale=8.000, wall=1024, train_wall=879 | |
| epoch 001: 3280 / 8862 loss=8.881, nll_loss=7.868, ppl=233.62, wps=48497, ups=3, wpb=15190.400, bsz=556.843, num_updates=3277, lr=0.000409643, gnorm=1.239, clip=0.000, oom=0.000, loss_scale=8.000, wall=1026, train_wall=882 | |
| epoch 001: 3290 / 8862 loss=8.873, nll_loss=7.860, ppl=232.25, wps=48507, ups=3, wpb=15190.052, bsz=556.996, num_updates=3287, lr=0.000410893, gnorm=1.238, clip=0.000, oom=0.000, loss_scale=8.000, wall=1029, train_wall=885 | |
| epoch 001: 3300 / 8862 loss=8.866, nll_loss=7.851, ppl=230.91, wps=48519, ups=3, wpb=15190.292, bsz=557.139, num_updates=3297, lr=0.000412143, gnorm=1.238, clip=0.000, oom=0.000, loss_scale=8.000, wall=1032, train_wall=887 | |
| epoch 001: 3310 / 8862 loss=8.859, nll_loss=7.843, ppl=229.55, wps=48529, ups=3, wpb=15190.481, bsz=557.621, num_updates=3307, lr=0.000413392, gnorm=1.238, clip=0.000, oom=0.000, loss_scale=8.000, wall=1035, train_wall=890 | |
| epoch 001: 3320 / 8862 loss=8.852, nll_loss=7.835, ppl=228.27, wps=48538, ups=3, wpb=15189.703, bsz=557.624, num_updates=3317, lr=0.000414642, gnorm=1.237, clip=0.000, oom=0.000, loss_scale=8.000, wall=1038, train_wall=893 | |
| epoch 001: 3330 / 8862 loss=8.845, nll_loss=7.827, ppl=227.02, wps=48548, ups=3, wpb=15189.196, bsz=557.439, num_updates=3327, lr=0.000415892, gnorm=1.236, clip=0.000, oom=0.000, loss_scale=8.000, wall=1041, train_wall=896 | |
| epoch 001: 3340 / 8862 loss=8.838, nll_loss=7.819, ppl=225.78, wps=48561, ups=3, wpb=15189.412, bsz=557.228, num_updates=3337, lr=0.000417142, gnorm=1.235, clip=0.000, oom=0.000, loss_scale=8.000, wall=1044, train_wall=898 | |
| epoch 001: 3350 / 8862 loss=8.831, nll_loss=7.810, ppl=224.44, wps=48573, ups=3, wpb=15190.002, bsz=557.220, num_updates=3347, lr=0.000418391, gnorm=1.234, clip=0.000, oom=0.000, loss_scale=8.000, wall=1047, train_wall=901 | |
| epoch 001: 3360 / 8862 loss=8.824, nll_loss=7.802, ppl=223.18, wps=48583, ups=3, wpb=15189.503, bsz=557.404, num_updates=3357, lr=0.000419641, gnorm=1.233, clip=0.000, oom=0.000, loss_scale=8.000, wall=1050, train_wall=904 | |
| epoch 001: 3370 / 8862 loss=8.817, nll_loss=7.794, ppl=221.92, wps=48593, ups=3, wpb=15189.593, bsz=557.412, num_updates=3367, lr=0.000420891, gnorm=1.233, clip=0.000, oom=0.000, loss_scale=8.000, wall=1052, train_wall=906 | |
| epoch 001: 3380 / 8862 loss=8.810, nll_loss=7.786, ppl=220.70, wps=48603, ups=3, wpb=15188.998, bsz=557.405, num_updates=3377, lr=0.000422141, gnorm=1.232, clip=0.000, oom=0.000, loss_scale=8.000, wall=1055, train_wall=909 | |
| epoch 001: 3390 / 8862 loss=8.803, nll_loss=7.778, ppl=219.51, wps=48612, ups=3, wpb=15188.245, bsz=557.347, num_updates=3387, lr=0.00042339, gnorm=1.231, clip=0.000, oom=0.000, loss_scale=8.000, wall=1058, train_wall=912 | |
| epoch 001: 3400 / 8862 loss=8.796, nll_loss=7.770, ppl=218.24, wps=48624, ups=3, wpb=15188.428, bsz=557.312, num_updates=3397, lr=0.00042464, gnorm=1.230, clip=0.000, oom=0.000, loss_scale=8.000, wall=1061, train_wall=914 | |
| epoch 001: 3410 / 8862 loss=8.789, nll_loss=7.762, ppl=217.02, wps=48635, ups=3, wpb=15188.444, bsz=557.186, num_updates=3407, lr=0.00042589, gnorm=1.229, clip=0.000, oom=0.000, loss_scale=8.000, wall=1064, train_wall=917 | |
| epoch 001: 3420 / 8862 loss=8.782, nll_loss=7.754, ppl=215.86, wps=48645, ups=3, wpb=15187.974, bsz=556.970, num_updates=3417, lr=0.00042714, gnorm=1.228, clip=0.000, oom=0.000, loss_scale=8.000, wall=1067, train_wall=919 | |
| epoch 001: 3430 / 8862 loss=8.775, nll_loss=7.746, ppl=214.66, wps=48653, ups=3, wpb=15186.886, bsz=557.016, num_updates=3427, lr=0.000428389, gnorm=1.228, clip=0.000, oom=0.000, loss_scale=8.000, wall=1070, train_wall=922 | |
| epoch 001: 3440 / 8862 loss=8.768, nll_loss=7.737, ppl=213.40, wps=48665, ups=3, wpb=15187.518, bsz=557.129, num_updates=3437, lr=0.000429639, gnorm=1.227, clip=0.000, oom=0.000, loss_scale=8.000, wall=1073, train_wall=925 | |
| epoch 001: 3450 / 8862 loss=8.760, nll_loss=7.729, ppl=212.16, wps=48675, ups=3, wpb=15187.479, bsz=557.405, num_updates=3447, lr=0.000430889, gnorm=1.226, clip=0.000, oom=0.000, loss_scale=8.000, wall=1076, train_wall=928 | |
| epoch 001: 3460 / 8862 loss=8.753, nll_loss=7.721, ppl=210.93, wps=48686, ups=3, wpb=15187.633, bsz=557.415, num_updates=3457, lr=0.000432139, gnorm=1.225, clip=0.000, oom=0.000, loss_scale=8.000, wall=1078, train_wall=930 | |
| epoch 001: 3470 / 8862 loss=8.746, nll_loss=7.712, ppl=209.70, wps=48697, ups=3, wpb=15188.057, bsz=557.417, num_updates=3467, lr=0.000433388, gnorm=1.224, clip=0.000, oom=0.000, loss_scale=8.000, wall=1081, train_wall=933 | |
| epoch 001: 3480 / 8862 loss=8.739, nll_loss=7.704, ppl=208.58, wps=48708, ups=3, wpb=15188.022, bsz=557.114, num_updates=3477, lr=0.000434638, gnorm=1.223, clip=0.000, oom=0.000, loss_scale=8.000, wall=1084, train_wall=936 | |
| epoch 001: 3490 / 8862 loss=8.732, nll_loss=7.696, ppl=207.41, wps=48721, ups=3, wpb=15188.472, bsz=556.898, num_updates=3487, lr=0.000435888, gnorm=1.223, clip=0.000, oom=0.000, loss_scale=8.000, wall=1087, train_wall=938 | |
| epoch 001: 3500 / 8862 loss=8.725, nll_loss=7.688, ppl=206.21, wps=48730, ups=3, wpb=15188.140, bsz=556.890, num_updates=3497, lr=0.000437138, gnorm=1.222, clip=0.000, oom=0.000, loss_scale=8.000, wall=1090, train_wall=941 | |
| epoch 001: 3510 / 8862 loss=8.718, nll_loss=7.680, ppl=205.13, wps=48743, ups=3, wpb=15188.817, bsz=556.676, num_updates=3507, lr=0.000438387, gnorm=1.221, clip=0.000, oom=0.000, loss_scale=8.000, wall=1093, train_wall=943 | |
| epoch 001: 3520 / 8862 loss=8.711, nll_loss=7.672, ppl=203.99, wps=48751, ups=3, wpb=15188.196, bsz=556.840, num_updates=3517, lr=0.000439637, gnorm=1.220, clip=0.000, oom=0.000, loss_scale=8.000, wall=1096, train_wall=946 | |
| epoch 001: 3530 / 8862 loss=8.704, nll_loss=7.664, ppl=202.84, wps=48762, ups=3, wpb=15188.527, bsz=556.638, num_updates=3527, lr=0.000440887, gnorm=1.219, clip=0.000, oom=0.000, loss_scale=8.000, wall=1099, train_wall=949 | |
| epoch 001: 3540 / 8862 loss=8.698, nll_loss=7.657, ppl=201.78, wps=48768, ups=3, wpb=15187.677, bsz=556.797, num_updates=3537, lr=0.000442137, gnorm=1.219, clip=0.000, oom=0.000, loss_scale=8.000, wall=1102, train_wall=952 | |
| epoch 001: 3550 / 8862 loss=8.691, nll_loss=7.649, ppl=200.73, wps=48778, ups=3, wpb=15187.609, bsz=556.803, num_updates=3547, lr=0.000443386, gnorm=1.219, clip=0.000, oom=0.000, loss_scale=8.000, wall=1104, train_wall=954 | |
| epoch 001: 3560 / 8862 loss=8.684, nll_loss=7.641, ppl=199.58, wps=48789, ups=3, wpb=15188.039, bsz=556.846, num_updates=3557, lr=0.000444636, gnorm=1.218, clip=0.000, oom=0.000, loss_scale=8.000, wall=1107, train_wall=957 | |
| epoch 001: 3570 / 8862 loss=8.677, nll_loss=7.633, ppl=198.48, wps=48797, ups=3, wpb=15187.396, bsz=556.886, num_updates=3567, lr=0.000445886, gnorm=1.218, clip=0.000, oom=0.000, loss_scale=8.000, wall=1110, train_wall=960 | |
| epoch 001: 3580 / 8862 loss=8.670, nll_loss=7.625, ppl=197.36, wps=48810, ups=3, wpb=15188.301, bsz=556.830, num_updates=3577, lr=0.000447136, gnorm=1.217, clip=0.000, oom=0.000, loss_scale=8.000, wall=1113, train_wall=962 | |
| epoch 001: 3590 / 8862 loss=8.663, nll_loss=7.617, ppl=196.28, wps=48820, ups=3, wpb=15188.480, bsz=557.064, num_updates=3587, lr=0.000448385, gnorm=1.216, clip=0.000, oom=0.000, loss_scale=8.000, wall=1116, train_wall=965 | |
| epoch 001: 3600 / 8862 loss=8.657, nll_loss=7.609, ppl=195.26, wps=48827, ups=3, wpb=15187.372, bsz=557.015, num_updates=3597, lr=0.000449635, gnorm=1.216, clip=0.000, oom=0.000, loss_scale=8.000, wall=1119, train_wall=968 | |
| epoch 001: 3610 / 8862 loss=8.650, nll_loss=7.601, ppl=194.18, wps=48834, ups=3, wpb=15186.314, bsz=557.090, num_updates=3607, lr=0.000450885, gnorm=1.215, clip=0.000, oom=0.000, loss_scale=8.000, wall=1122, train_wall=970 | |
| epoch 001: 3620 / 8862 loss=8.643, nll_loss=7.593, ppl=193.12, wps=48845, ups=3, wpb=15186.679, bsz=557.095, num_updates=3617, lr=0.000452135, gnorm=1.215, clip=0.000, oom=0.000, loss_scale=8.000, wall=1125, train_wall=973 | |
| epoch 001: 3630 / 8862 loss=8.637, nll_loss=7.586, ppl=192.13, wps=48853, ups=3, wpb=15185.834, bsz=556.845, num_updates=3627, lr=0.000453384, gnorm=1.214, clip=0.000, oom=0.000, loss_scale=8.000, wall=1127, train_wall=975 | |
| epoch 001: 3640 / 8862 loss=8.630, nll_loss=7.578, ppl=191.08, wps=48865, ups=3, wpb=15186.570, bsz=556.685, num_updates=3637, lr=0.000454634, gnorm=1.213, clip=0.000, oom=0.000, loss_scale=8.000, wall=1130, train_wall=978 | |
| epoch 001: 3650 / 8862 loss=8.623, nll_loss=7.570, ppl=190.00, wps=48873, ups=3, wpb=15186.978, bsz=556.999, num_updates=3647, lr=0.000455884, gnorm=1.213, clip=0.000, oom=0.000, loss_scale=8.000, wall=1133, train_wall=981 | |
| epoch 001: 3660 / 8862 loss=8.616, nll_loss=7.562, ppl=189.00, wps=48883, ups=3, wpb=15187.013, bsz=557.035, num_updates=3657, lr=0.000457134, gnorm=1.212, clip=0.000, oom=0.000, loss_scale=8.000, wall=1136, train_wall=984 | |
| epoch 001: 3670 / 8862 loss=8.609, nll_loss=7.554, ppl=187.94, wps=48892, ups=3, wpb=15186.643, bsz=556.956, num_updates=3667, lr=0.000458383, gnorm=1.212, clip=0.000, oom=0.000, loss_scale=8.000, wall=1139, train_wall=986 | |
| epoch 001: 3680 / 8862 loss=8.602, nll_loss=7.546, ppl=186.87, wps=48905, ups=3, wpb=15187.832, bsz=556.914, num_updates=3677, lr=0.000459633, gnorm=1.211, clip=0.000, oom=0.000, loss_scale=8.000, wall=1142, train_wall=989 | |
| epoch 001: 3690 / 8862 loss=8.595, nll_loss=7.538, ppl=185.80, wps=48915, ups=3, wpb=15188.076, bsz=557.064, num_updates=3687, lr=0.000460883, gnorm=1.210, clip=0.000, oom=0.000, loss_scale=8.000, wall=1145, train_wall=992 | |
| epoch 001: 3700 / 8862 loss=8.589, nll_loss=7.530, ppl=184.87, wps=48922, ups=3, wpb=15187.687, bsz=557.061, num_updates=3697, lr=0.000462133, gnorm=1.210, clip=0.000, oom=0.000, loss_scale=8.000, wall=1148, train_wall=994 | |
| epoch 001: 3710 / 8862 loss=8.581, nll_loss=7.522, ppl=183.80, wps=48934, ups=3, wpb=15188.790, bsz=557.371, num_updates=3707, lr=0.000463382, gnorm=1.209, clip=0.000, oom=0.000, loss_scale=8.000, wall=1151, train_wall=997 | |
| epoch 001: 3720 / 8862 loss=8.575, nll_loss=7.514, ppl=182.82, wps=48942, ups=3, wpb=15188.376, bsz=557.443, num_updates=3717, lr=0.000464632, gnorm=1.208, clip=0.000, oom=0.000, loss_scale=8.000, wall=1154, train_wall=1000 | |
| epoch 001: 3730 / 8862 loss=8.568, nll_loss=7.507, ppl=181.91, wps=48951, ups=3, wpb=15187.838, bsz=557.228, num_updates=3727, lr=0.000465882, gnorm=1.208, clip=0.000, oom=0.000, loss_scale=8.000, wall=1156, train_wall=1002 | |
| epoch 001: 3740 / 8862 loss=8.562, nll_loss=7.499, ppl=180.93, wps=48959, ups=3, wpb=15187.657, bsz=557.394, num_updates=3737, lr=0.000467132, gnorm=1.207, clip=0.000, oom=0.000, loss_scale=8.000, wall=1159, train_wall=1005 | |
| epoch 001: 3750 / 8862 loss=8.555, nll_loss=7.492, ppl=179.98, wps=48964, ups=3, wpb=15186.943, bsz=557.574, num_updates=3747, lr=0.000468381, gnorm=1.207, clip=0.000, oom=0.000, loss_scale=8.000, wall=1162, train_wall=1008 | |
| epoch 001: 3760 / 8862 loss=8.548, nll_loss=7.484, ppl=179.02, wps=48971, ups=3, wpb=15186.628, bsz=558.119, num_updates=3757, lr=0.000469631, gnorm=1.207, clip=0.000, oom=0.000, loss_scale=8.000, wall=1165, train_wall=1010 | |
| epoch 001: 3770 / 8862 loss=8.542, nll_loss=7.476, ppl=178.09, wps=48977, ups=3, wpb=15185.610, bsz=557.986, num_updates=3767, lr=0.000470881, gnorm=1.206, clip=0.000, oom=0.000, loss_scale=8.000, wall=1168, train_wall=1013 | |
| epoch 001: 3780 / 8862 loss=8.536, nll_loss=7.469, ppl=177.21, wps=48983, ups=3, wpb=15184.688, bsz=557.797, num_updates=3777, lr=0.000472131, gnorm=1.206, clip=0.000, oom=0.000, loss_scale=8.000, wall=1171, train_wall=1016 | |
| epoch 001: 3790 / 8862 loss=8.529, nll_loss=7.462, ppl=176.31, wps=48991, ups=3, wpb=15184.052, bsz=557.610, num_updates=3787, lr=0.00047338, gnorm=1.205, clip=0.000, oom=0.000, loss_scale=8.000, wall=1174, train_wall=1018 | |
| epoch 001: 3800 / 8862 loss=8.523, nll_loss=7.455, ppl=175.41, wps=49000, ups=3, wpb=15183.882, bsz=557.341, num_updates=3797, lr=0.00047463, gnorm=1.204, clip=0.000, oom=0.000, loss_scale=8.000, wall=1177, train_wall=1021 | |
| epoch 001: 3810 / 8862 loss=8.516, nll_loss=7.447, ppl=174.49, wps=49010, ups=3, wpb=15184.351, bsz=557.167, num_updates=3807, lr=0.00047588, gnorm=1.204, clip=0.000, oom=0.000, loss_scale=8.000, wall=1179, train_wall=1024 | |
| epoch 001: 3820 / 8862 loss=8.510, nll_loss=7.439, ppl=173.57, wps=49018, ups=3, wpb=15184.228, bsz=557.225, num_updates=3817, lr=0.00047713, gnorm=1.203, clip=0.000, oom=0.000, loss_scale=8.000, wall=1182, train_wall=1026 | |
| epoch 001: 3830 / 8862 loss=8.503, nll_loss=7.432, ppl=172.68, wps=49025, ups=3, wpb=15183.796, bsz=557.253, num_updates=3827, lr=0.000478379, gnorm=1.203, clip=0.000, oom=0.000, loss_scale=8.000, wall=1185, train_wall=1029 | |
| epoch 001: 3840 / 8862 loss=8.497, nll_loss=7.424, ppl=171.76, wps=49035, ups=3, wpb=15184.193, bsz=557.274, num_updates=3837, lr=0.000479629, gnorm=1.202, clip=0.000, oom=0.000, loss_scale=8.000, wall=1188, train_wall=1032 | |
| epoch 001: 3850 / 8862 loss=8.490, nll_loss=7.417, ppl=170.88, wps=49043, ups=3, wpb=15183.964, bsz=557.400, num_updates=3847, lr=0.000480879, gnorm=1.201, clip=0.000, oom=0.000, loss_scale=8.000, wall=1191, train_wall=1034 | |
| epoch 001: 3860 / 8862 loss=8.484, nll_loss=7.409, ppl=170.01, wps=49051, ups=3, wpb=15183.635, bsz=557.241, num_updates=3857, lr=0.000482129, gnorm=1.201, clip=0.000, oom=0.000, loss_scale=8.000, wall=1194, train_wall=1037 | |
| epoch 001: 3870 / 8862 loss=8.477, nll_loss=7.402, ppl=169.10, wps=49058, ups=3, wpb=15183.242, bsz=557.209, num_updates=3867, lr=0.000483378, gnorm=1.200, clip=0.000, oom=0.000, loss_scale=8.000, wall=1197, train_wall=1040 | |
| epoch 001: 3880 / 8862 loss=8.470, nll_loss=7.394, ppl=168.19, wps=49067, ups=3, wpb=15183.448, bsz=557.251, num_updates=3877, lr=0.000484628, gnorm=1.199, clip=0.000, oom=0.000, loss_scale=8.000, wall=1200, train_wall=1042 | |
| epoch 001: 3890 / 8862 loss=8.464, nll_loss=7.386, ppl=167.32, wps=49074, ups=3, wpb=15183.365, bsz=557.176, num_updates=3887, lr=0.000485878, gnorm=1.199, clip=0.000, oom=0.000, loss_scale=8.000, wall=1203, train_wall=1045 | |
| epoch 001: 3900 / 8862 loss=8.457, nll_loss=7.379, ppl=166.45, wps=49081, ups=3, wpb=15182.927, bsz=557.177, num_updates=3897, lr=0.000487128, gnorm=1.198, clip=0.000, oom=0.000, loss_scale=8.000, wall=1206, train_wall=1048 | |
| epoch 001: 3910 / 8862 loss=8.451, nll_loss=7.372, ppl=165.60, wps=49089, ups=3, wpb=15182.965, bsz=557.143, num_updates=3907, lr=0.000488377, gnorm=1.197, clip=0.000, oom=0.000, loss_scale=8.000, wall=1208, train_wall=1050 | |
| epoch 001: 3920 / 8862 loss=8.445, nll_loss=7.364, ppl=164.78, wps=49096, ups=3, wpb=15182.542, bsz=557.177, num_updates=3917, lr=0.000489627, gnorm=1.197, clip=0.000, oom=0.000, loss_scale=8.000, wall=1211, train_wall=1053 | |
| epoch 001: 3930 / 8862 loss=8.438, nll_loss=7.357, ppl=163.89, wps=49102, ups=3, wpb=15182.032, bsz=557.516, num_updates=3927, lr=0.000490877, gnorm=1.196, clip=0.000, oom=0.000, loss_scale=8.000, wall=1214, train_wall=1056 | |
| epoch 001: 3940 / 8862 loss=8.431, nll_loss=7.349, ppl=162.97, wps=49110, ups=3, wpb=15182.592, bsz=557.892, num_updates=3937, lr=0.000492127, gnorm=1.195, clip=0.000, oom=0.000, loss_scale=8.000, wall=1217, train_wall=1058 | |
| epoch 001: 3950 / 8862 loss=8.424, nll_loss=7.341, ppl=162.10, wps=49121, ups=3, wpb=15183.429, bsz=558.009, num_updates=3947, lr=0.000493376, gnorm=1.195, clip=0.000, oom=0.000, loss_scale=8.000, wall=1220, train_wall=1061 | |
| epoch 001: 3960 / 8862 loss=8.418, nll_loss=7.334, ppl=161.31, wps=49129, ups=3, wpb=15183.268, bsz=557.786, num_updates=3957, lr=0.000494626, gnorm=1.194, clip=0.000, oom=0.000, loss_scale=8.000, wall=1223, train_wall=1064 | |
| epoch 001: 3970 / 8862 loss=8.412, nll_loss=7.327, ppl=160.53, wps=49135, ups=3, wpb=15182.899, bsz=557.729, num_updates=3967, lr=0.000495876, gnorm=1.194, clip=0.000, oom=0.000, loss_scale=8.000, wall=1226, train_wall=1066 | |
| epoch 001: 3980 / 8862 loss=8.406, nll_loss=7.319, ppl=159.72, wps=49142, ups=3, wpb=15182.882, bsz=557.726, num_updates=3977, lr=0.000497126, gnorm=1.193, clip=0.000, oom=0.000, loss_scale=8.000, wall=1229, train_wall=1069 | |
| epoch 001: 3990 / 8862 loss=8.399, nll_loss=7.312, ppl=158.90, wps=49152, ups=3, wpb=15183.450, bsz=557.670, num_updates=3987, lr=0.000498375, gnorm=1.192, clip=0.000, oom=0.000, loss_scale=8.000, wall=1232, train_wall=1072 | |
| epoch 001: 4000 / 8862 loss=8.393, nll_loss=7.304, ppl=158.03, wps=49162, ups=3, wpb=15184.162, bsz=557.662, num_updates=3997, lr=0.000499625, gnorm=1.191, clip=0.000, oom=0.000, loss_scale=8.000, wall=1235, train_wall=1074 | |
| epoch 001: 4010 / 8862 loss=8.386, nll_loss=7.297, ppl=157.23, wps=49169, ups=3, wpb=15183.922, bsz=557.739, num_updates=4007, lr=0.000499563, gnorm=1.191, clip=0.000, oom=0.000, loss_scale=8.000, wall=1237, train_wall=1077 | |
| epoch 001: 4020 / 8862 loss=8.380, nll_loss=7.289, ppl=156.43, wps=49178, ups=3, wpb=15184.552, bsz=557.767, num_updates=4017, lr=0.000498941, gnorm=1.190, clip=0.000, oom=0.000, loss_scale=8.000, wall=1240, train_wall=1080 | |
| epoch 001: 4030 / 8862 loss=8.373, nll_loss=7.282, ppl=155.64, wps=49187, ups=3, wpb=15184.664, bsz=557.798, num_updates=4027, lr=0.000498321, gnorm=1.190, clip=0.000, oom=0.000, loss_scale=8.000, wall=1243, train_wall=1082 | |
| epoch 001: 4040 / 8862 loss=8.367, nll_loss=7.275, ppl=154.86, wps=49194, ups=3, wpb=15184.386, bsz=557.736, num_updates=4037, lr=0.000497703, gnorm=1.190, clip=0.000, oom=0.000, loss_scale=8.000, wall=1246, train_wall=1085 | |
| epoch 001: 4050 / 8862 loss=8.360, nll_loss=7.267, ppl=154.01, wps=49203, ups=3, wpb=15185.052, bsz=557.892, num_updates=4047, lr=0.000497088, gnorm=1.189, clip=0.000, oom=0.000, loss_scale=8.000, wall=1249, train_wall=1088 | |
| epoch 001: 4060 / 8862 loss=8.354, nll_loss=7.260, ppl=153.23, wps=49211, ups=3, wpb=15185.086, bsz=557.907, num_updates=4057, lr=0.000496475, gnorm=1.188, clip=0.000, oom=0.000, loss_scale=8.000, wall=1252, train_wall=1091 | |
| epoch 001: 4070 / 8862 loss=8.347, nll_loss=7.252, ppl=152.42, wps=49217, ups=3, wpb=15185.382, bsz=558.276, num_updates=4067, lr=0.000495864, gnorm=1.188, clip=0.000, oom=0.000, loss_scale=8.000, wall=1255, train_wall=1093 | |
| epoch 001: 4080 / 8862 loss=8.342, nll_loss=7.245, ppl=151.71, wps=49223, ups=3, wpb=15184.924, bsz=558.232, num_updates=4077, lr=0.000495256, gnorm=1.187, clip=0.000, oom=0.000, loss_scale=8.000, wall=1258, train_wall=1096 | |
| epoch 001: 4090 / 8862 loss=8.335, nll_loss=7.238, ppl=150.98, wps=49230, ups=3, wpb=15184.694, bsz=558.077, num_updates=4087, lr=0.00049465, gnorm=1.186, clip=0.000, oom=0.000, loss_scale=8.000, wall=1261, train_wall=1099 | |
| epoch 001: 4100 / 8862 loss=8.329, nll_loss=7.231, ppl=150.24, wps=49238, ups=3, wpb=15184.907, bsz=558.031, num_updates=4097, lr=0.000494046, gnorm=1.186, clip=0.000, oom=0.000, loss_scale=8.000, wall=1264, train_wall=1101 | |
| epoch 001: 4110 / 8862 loss=8.323, nll_loss=7.224, ppl=149.50, wps=49248, ups=3, wpb=15185.674, bsz=557.980, num_updates=4107, lr=0.000493444, gnorm=1.185, clip=0.000, oom=0.000, loss_scale=8.000, wall=1266, train_wall=1104 | |
| epoch 001: 4120 / 8862 loss=8.317, nll_loss=7.217, ppl=148.79, wps=49255, ups=3, wpb=15185.328, bsz=557.887, num_updates=4117, lr=0.000492844, gnorm=1.184, clip=0.000, oom=0.000, loss_scale=8.000, wall=1269, train_wall=1107 | |
| epoch 001: 4130 / 8862 loss=8.312, nll_loss=7.211, ppl=148.11, wps=49262, ups=3, wpb=15184.726, bsz=557.536, num_updates=4127, lr=0.000492247, gnorm=1.183, clip=0.000, oom=0.000, loss_scale=8.000, wall=1272, train_wall=1109 | |
| epoch 001: 4140 / 8862 loss=8.305, nll_loss=7.203, ppl=147.38, wps=49270, ups=3, wpb=15185.137, bsz=557.414, num_updates=4137, lr=0.000491651, gnorm=1.183, clip=0.000, oom=0.000, loss_scale=8.000, wall=1275, train_wall=1112 | |
| epoch 001: 4150 / 8862 loss=8.299, nll_loss=7.196, ppl=146.63, wps=49275, ups=3, wpb=15185.288, bsz=557.957, num_updates=4147, lr=0.000491058, gnorm=1.182, clip=0.000, oom=0.000, loss_scale=8.000, wall=1278, train_wall=1115 | |
| epoch 001: 4160 / 8862 loss=8.293, nll_loss=7.189, ppl=145.91, wps=49282, ups=3, wpb=15184.970, bsz=557.896, num_updates=4157, lr=0.000490467, gnorm=1.181, clip=0.000, oom=0.000, loss_scale=8.000, wall=1281, train_wall=1117 | |
| epoch 001: 4170 / 8862 loss=8.286, nll_loss=7.182, ppl=145.18, wps=49288, ups=3, wpb=15184.741, bsz=557.868, num_updates=4167, lr=0.000489878, gnorm=1.181, clip=0.000, oom=0.000, loss_scale=8.000, wall=1284, train_wall=1120 | |
| epoch 001: 4180 / 8862 loss=8.280, nll_loss=7.174, ppl=144.44, wps=49297, ups=3, wpb=15185.114, bsz=557.981, num_updates=4177, lr=0.000489292, gnorm=1.180, clip=0.000, oom=0.000, loss_scale=8.000, wall=1287, train_wall=1123 | |
| epoch 001: 4190 / 8862 loss=8.274, nll_loss=7.168, ppl=143.77, wps=49302, ups=3, wpb=15184.553, bsz=557.955, num_updates=4187, lr=0.000488707, gnorm=1.179, clip=0.000, oom=0.000, loss_scale=8.000, wall=1290, train_wall=1125 | |
| epoch 001: 4200 / 8862 loss=8.268, nll_loss=7.160, ppl=143.04, wps=49310, ups=3, wpb=15184.876, bsz=558.297, num_updates=4197, lr=0.000488124, gnorm=1.179, clip=0.000, oom=0.000, loss_scale=8.000, wall=1292, train_wall=1128 | |
| epoch 001: 4210 / 8862 loss=8.262, nll_loss=7.153, ppl=142.35, wps=49317, ups=3, wpb=15184.616, bsz=558.185, num_updates=4207, lr=0.000487544, gnorm=1.178, clip=0.000, oom=0.000, loss_scale=8.000, wall=1295, train_wall=1131 | |
| epoch 001: 4220 / 8862 loss=8.256, nll_loss=7.147, ppl=141.69, wps=49323, ups=3, wpb=15184.173, bsz=558.004, num_updates=4217, lr=0.000486966, gnorm=1.178, clip=0.000, oom=0.000, loss_scale=8.000, wall=1298, train_wall=1133 | |
| epoch 001: 4230 / 8862 loss=8.250, nll_loss=7.140, ppl=141.02, wps=49331, ups=3, wpb=15184.211, bsz=557.937, num_updates=4227, lr=0.000486389, gnorm=1.177, clip=0.000, oom=0.000, loss_scale=8.000, wall=1301, train_wall=1136 | |
| epoch 001: 4240 / 8862 loss=8.244, nll_loss=7.133, ppl=140.33, wps=49338, ups=3, wpb=15184.012, bsz=558.096, num_updates=4237, lr=0.000485815, gnorm=1.176, clip=0.000, oom=0.000, loss_scale=8.000, wall=1304, train_wall=1139 | |
| epoch 001: 4250 / 8862 loss=8.238, nll_loss=7.126, ppl=139.66, wps=49344, ups=3, wpb=15183.912, bsz=557.997, num_updates=4247, lr=0.000485243, gnorm=1.175, clip=0.000, oom=0.000, loss_scale=8.000, wall=1307, train_wall=1141 | |
| epoch 001: 4260 / 8862 loss=8.232, nll_loss=7.119, ppl=139.00, wps=49349, ups=3, wpb=15184.049, bsz=558.368, num_updates=4257, lr=0.000484672, gnorm=1.175, clip=0.000, oom=0.000, loss_scale=8.000, wall=1310, train_wall=1144 | |
| epoch 001: 4270 / 8862 loss=8.226, nll_loss=7.112, ppl=138.34, wps=49356, ups=3, wpb=15184.007, bsz=558.293, num_updates=4267, lr=0.000484104, gnorm=1.175, clip=0.000, oom=0.000, loss_scale=8.000, wall=1313, train_wall=1147 | |
| epoch 001: 4280 / 8862 loss=8.220, nll_loss=7.105, ppl=137.68, wps=49364, ups=3, wpb=15184.341, bsz=558.254, num_updates=4277, lr=0.000483538, gnorm=1.174, clip=0.000, oom=0.000, loss_scale=8.000, wall=1316, train_wall=1149 | |
| epoch 001: 4290 / 8862 loss=8.214, nll_loss=7.098, ppl=136.99, wps=49371, ups=3, wpb=15184.539, bsz=558.481, num_updates=4287, lr=0.000482973, gnorm=1.173, clip=0.000, oom=0.000, loss_scale=8.000, wall=1319, train_wall=1152 | |
| epoch 001: 4300 / 8862 loss=8.208, nll_loss=7.091, ppl=136.37, wps=49378, ups=3, wpb=15184.764, bsz=558.631, num_updates=4297, lr=0.000482411, gnorm=1.173, clip=0.000, oom=0.000, loss_scale=8.000, wall=1321, train_wall=1155 | |
| epoch 001: 4310 / 8862 loss=8.202, nll_loss=7.085, ppl=135.74, wps=49384, ups=3, wpb=15184.685, bsz=558.647, num_updates=4307, lr=0.000481851, gnorm=1.172, clip=0.000, oom=0.000, loss_scale=8.000, wall=1324, train_wall=1157 | |
| epoch 001: 4320 / 8862 loss=8.196, nll_loss=7.078, ppl=135.10, wps=49392, ups=3, wpb=15184.814, bsz=558.578, num_updates=4317, lr=0.000481292, gnorm=1.171, clip=0.000, oom=0.000, loss_scale=8.000, wall=1327, train_wall=1160 | |
| epoch 001: 4330 / 8862 loss=8.191, nll_loss=7.071, ppl=134.47, wps=49396, ups=3, wpb=15184.071, bsz=558.604, num_updates=4327, lr=0.000480736, gnorm=1.170, clip=0.000, oom=0.000, loss_scale=8.000, wall=1330, train_wall=1163 | |
| epoch 001: 4340 / 8862 loss=8.185, nll_loss=7.064, ppl=133.85, wps=49402, ups=3, wpb=15183.612, bsz=558.847, num_updates=4337, lr=0.000480181, gnorm=1.170, clip=0.000, oom=0.000, loss_scale=8.000, wall=1333, train_wall=1165 | |
| epoch 001: 4350 / 8862 loss=8.179, nll_loss=7.058, ppl=133.26, wps=49409, ups=3, wpb=15183.084, bsz=558.658, num_updates=4347, lr=0.000479629, gnorm=1.169, clip=0.000, oom=0.000, loss_scale=8.000, wall=1336, train_wall=1168 | |
| epoch 001: 4360 / 8862 loss=8.174, nll_loss=7.052, ppl=132.68, wps=49413, ups=3, wpb=15182.018, bsz=558.626, num_updates=4357, lr=0.000479078, gnorm=1.168, clip=0.000, oom=0.000, loss_scale=8.000, wall=1339, train_wall=1171 | |
| epoch 001: 4370 / 8862 loss=8.168, nll_loss=7.045, ppl=132.07, wps=49417, ups=3, wpb=15181.123, bsz=558.800, num_updates=4367, lr=0.000478529, gnorm=1.168, clip=0.000, oom=0.000, loss_scale=8.000, wall=1342, train_wall=1173 | |
| epoch 001: 4380 / 8862 loss=8.162, nll_loss=7.038, ppl=131.46, wps=49426, ups=3, wpb=15181.503, bsz=558.804, num_updates=4377, lr=0.000477982, gnorm=1.167, clip=0.000, oom=0.000, loss_scale=8.000, wall=1344, train_wall=1176 | |
| epoch 001: 4390 / 8862 loss=8.157, nll_loss=7.032, ppl=130.88, wps=49431, ups=3, wpb=15180.910, bsz=558.687, num_updates=4387, lr=0.000477437, gnorm=1.166, clip=0.000, oom=0.000, loss_scale=8.000, wall=1347, train_wall=1179 | |
| epoch 001: 4400 / 8862 loss=8.151, nll_loss=7.026, ppl=130.30, wps=49438, ups=3, wpb=15181.154, bsz=558.628, num_updates=4397, lr=0.000476894, gnorm=1.166, clip=0.000, oom=0.000, loss_scale=8.000, wall=1350, train_wall=1181 | |
| epoch 001: 4410 / 8862 loss=8.145, nll_loss=7.019, ppl=129.66, wps=49445, ups=3, wpb=15181.424, bsz=558.711, num_updates=4407, lr=0.000476353, gnorm=1.165, clip=0.000, oom=0.000, loss_scale=8.000, wall=1353, train_wall=1184 | |
| epoch 001: 4420 / 8862 loss=8.139, nll_loss=7.012, ppl=129.10, wps=49451, ups=3, wpb=15181.108, bsz=558.509, num_updates=4417, lr=0.000475813, gnorm=1.164, clip=0.000, oom=0.000, loss_scale=8.000, wall=1356, train_wall=1187 | |
| epoch 001: 4430 / 8862 loss=8.134, nll_loss=7.006, ppl=128.53, wps=49458, ups=3, wpb=15181.068, bsz=558.359, num_updates=4427, lr=0.000475275, gnorm=1.164, clip=0.000, oom=0.000, loss_scale=8.000, wall=1359, train_wall=1189 | |
| epoch 001: 4440 / 8862 loss=8.128, nll_loss=6.999, ppl=127.93, wps=49465, ups=3, wpb=15180.900, bsz=558.278, num_updates=4437, lr=0.000474739, gnorm=1.163, clip=0.000, oom=0.000, loss_scale=8.000, wall=1362, train_wall=1192 | |
| epoch 001: 4450 / 8862 loss=8.122, nll_loss=6.993, ppl=127.34, wps=49473, ups=3, wpb=15181.493, bsz=558.247, num_updates=4447, lr=0.000474205, gnorm=1.162, clip=0.000, oom=0.000, loss_scale=8.000, wall=1365, train_wall=1195 | |
| epoch 001: 4460 / 8862 loss=8.117, nll_loss=6.986, ppl=126.76, wps=49478, ups=3, wpb=15181.291, bsz=558.334, num_updates=4457, lr=0.000473673, gnorm=1.162, clip=0.000, oom=0.000, loss_scale=8.000, wall=1368, train_wall=1197 | |
| epoch 001: 4470 / 8862 loss=8.111, nll_loss=6.979, ppl=126.17, wps=49486, ups=3, wpb=15181.761, bsz=558.443, num_updates=4467, lr=0.000473143, gnorm=1.161, clip=0.000, oom=0.000, loss_scale=8.000, wall=1370, train_wall=1200 | |
| epoch 001: 4480 / 8862 loss=8.105, nll_loss=6.973, ppl=125.65, wps=49492, ups=3, wpb=15181.690, bsz=558.223, num_updates=4477, lr=0.000472614, gnorm=1.160, clip=0.000, oom=0.000, loss_scale=8.000, wall=1373, train_wall=1203 | |
| epoch 001: 4490 / 8862 loss=8.100, nll_loss=6.967, ppl=125.09, wps=49496, ups=3, wpb=15180.983, bsz=558.300, num_updates=4487, lr=0.000472087, gnorm=1.159, clip=0.000, oom=0.000, loss_scale=8.000, wall=1376, train_wall=1205 | |
| epoch 001: 4500 / 8862 loss=8.094, nll_loss=6.960, ppl=124.54, wps=49502, ups=3, wpb=15180.560, bsz=558.279, num_updates=4497, lr=0.000471562, gnorm=1.159, clip=0.000, oom=0.000, loss_scale=8.000, wall=1379, train_wall=1208 | |
| epoch 001: 4510 / 8862 loss=8.089, nll_loss=6.954, ppl=123.98, wps=49509, ups=3, wpb=15180.679, bsz=558.299, num_updates=4507, lr=0.000471038, gnorm=1.158, clip=0.000, oom=0.000, loss_scale=8.000, wall=1382, train_wall=1211 | |
| epoch 001: 4520 / 8862 loss=8.083, nll_loss=6.947, ppl=123.42, wps=49517, ups=3, wpb=15181.363, bsz=558.315, num_updates=4517, lr=0.000470517, gnorm=1.157, clip=0.000, oom=0.000, loss_scale=8.000, wall=1385, train_wall=1213 | |
| epoch 001: 4530 / 8862 loss=8.078, nll_loss=6.941, ppl=122.88, wps=49523, ups=3, wpb=15181.195, bsz=558.096, num_updates=4527, lr=0.000469997, gnorm=1.157, clip=0.000, oom=0.000, loss_scale=8.000, wall=1388, train_wall=1216 | |
| epoch 001: 4540 / 8862 loss=8.072, nll_loss=6.935, ppl=122.36, wps=49529, ups=3, wpb=15181.012, bsz=558.000, num_updates=4537, lr=0.000469478, gnorm=1.156, clip=0.000, oom=0.000, loss_scale=8.000, wall=1391, train_wall=1219 | |
| epoch 001: 4550 / 8862 loss=8.067, nll_loss=6.929, ppl=121.82, wps=49534, ups=3, wpb=15180.755, bsz=557.996, num_updates=4547, lr=0.000468962, gnorm=1.155, clip=0.000, oom=0.000, loss_scale=8.000, wall=1394, train_wall=1221 | |
| epoch 001: 4560 / 8862 loss=8.061, nll_loss=6.922, ppl=121.28, wps=49542, ups=3, wpb=15181.049, bsz=557.963, num_updates=4557, lr=0.000468447, gnorm=1.154, clip=0.000, oom=0.000, loss_scale=8.000, wall=1396, train_wall=1224 | |
| epoch 001: 4570 / 8862 loss=8.055, nll_loss=6.915, ppl=120.71, wps=49549, ups=3, wpb=15181.559, bsz=558.141, num_updates=4567, lr=0.000467934, gnorm=1.153, clip=0.000, oom=0.000, loss_scale=8.000, wall=1399, train_wall=1227 | |
| epoch 001: 4580 / 8862 loss=8.050, nll_loss=6.909, ppl=120.21, wps=49554, ups=3, wpb=15181.159, bsz=558.026, num_updates=4577, lr=0.000467422, gnorm=1.153, clip=0.000, oom=0.000, loss_scale=8.000, wall=1402, train_wall=1229 | |
| epoch 001: 4590 / 8862 loss=8.044, nll_loss=6.903, ppl=119.68, wps=49559, ups=3, wpb=15181.171, bsz=558.142, num_updates=4587, lr=0.000466913, gnorm=1.152, clip=0.000, oom=0.000, loss_scale=8.000, wall=1405, train_wall=1232 | |
| epoch 001: 4600 / 8862 loss=8.039, nll_loss=6.897, ppl=119.17, wps=49566, ups=3, wpb=15181.126, bsz=558.155, num_updates=4597, lr=0.000466405, gnorm=1.152, clip=0.000, oom=0.000, loss_scale=8.000, wall=1408, train_wall=1235 | |
| epoch 001: 4610 / 8862 loss=8.034, nll_loss=6.891, ppl=118.67, wps=49568, ups=3, wpb=15180.111, bsz=558.267, num_updates=4607, lr=0.000465898, gnorm=1.151, clip=0.000, oom=0.000, loss_scale=8.000, wall=1411, train_wall=1237 | |
| epoch 001: 4620 / 8862 loss=8.028, nll_loss=6.884, ppl=118.15, wps=49574, ups=3, wpb=15179.963, bsz=558.284, num_updates=4617, lr=0.000465393, gnorm=1.150, clip=0.000, oom=0.000, loss_scale=8.000, wall=1414, train_wall=1240 | |
| epoch 001: 4630 / 8862 loss=8.023, nll_loss=6.878, ppl=117.63, wps=49581, ups=3, wpb=15180.416, bsz=558.274, num_updates=4627, lr=0.00046489, gnorm=1.149, clip=0.000, oom=0.000, loss_scale=8.000, wall=1417, train_wall=1243 | |
| epoch 001: 4640 / 8862 loss=8.017, nll_loss=6.872, ppl=117.13, wps=49586, ups=3, wpb=15179.882, bsz=558.176, num_updates=4637, lr=0.000464388, gnorm=1.149, clip=0.000, oom=0.000, loss_scale=8.000, wall=1420, train_wall=1245 | |
| epoch 001: 4650 / 8862 loss=8.012, nll_loss=6.866, ppl=116.61, wps=49591, ups=3, wpb=15179.834, bsz=558.211, num_updates=4647, lr=0.000463889, gnorm=1.148, clip=0.000, oom=0.000, loss_scale=8.000, wall=1422, train_wall=1248 | |
| epoch 001: 4660 / 8862 loss=8.006, nll_loss=6.859, ppl=116.10, wps=49597, ups=3, wpb=15179.747, bsz=558.177, num_updates=4657, lr=0.00046339, gnorm=1.148, clip=0.000, oom=0.000, loss_scale=8.000, wall=1425, train_wall=1251 | |
| epoch 001: 4670 / 8862 loss=8.001, nll_loss=6.853, ppl=115.64, wps=49602, ups=3, wpb=15179.156, bsz=558.160, num_updates=4667, lr=0.000462894, gnorm=1.147, clip=0.000, oom=0.000, loss_scale=8.000, wall=1428, train_wall=1253 | |
| epoch 001: 4680 / 8862 loss=7.996, nll_loss=6.847, ppl=115.15, wps=49607, ups=3, wpb=15179.073, bsz=558.364, num_updates=4677, lr=0.000462398, gnorm=1.146, clip=0.000, oom=0.000, loss_scale=8.000, wall=1431, train_wall=1256 | |
| epoch 001: 4690 / 8862 loss=7.991, nll_loss=6.841, ppl=114.67, wps=49610, ups=3, wpb=15178.403, bsz=558.282, num_updates=4687, lr=0.000461905, gnorm=1.146, clip=0.000, oom=0.000, loss_scale=16.000, wall=1434, train_wall=1259 | |
| epoch 001: 4700 / 8862 loss=7.985, nll_loss=6.835, ppl=114.18, wps=49618, ups=3, wpb=15178.605, bsz=558.192, num_updates=4697, lr=0.000461413, gnorm=1.145, clip=0.000, oom=0.000, loss_scale=16.000, wall=1437, train_wall=1261 | |
| epoch 001: 4710 / 8862 loss=7.980, nll_loss=6.829, ppl=113.73, wps=49622, ups=3, wpb=15177.892, bsz=558.156, num_updates=4707, lr=0.000460922, gnorm=1.144, clip=0.000, oom=0.000, loss_scale=16.000, wall=1440, train_wall=1264 | |
| epoch 001: 4720 / 8862 loss=7.975, nll_loss=6.823, ppl=113.26, wps=49627, ups=3, wpb=15177.752, bsz=557.971, num_updates=4717, lr=0.000460434, gnorm=1.144, clip=0.000, oom=0.000, loss_scale=16.000, wall=1443, train_wall=1267 | |
| epoch 001: 4730 / 8862 loss=7.970, nll_loss=6.817, ppl=112.79, wps=49633, ups=3, wpb=15177.562, bsz=557.884, num_updates=4727, lr=0.000459946, gnorm=1.143, clip=0.000, oom=0.000, loss_scale=16.000, wall=1446, train_wall=1269 | |
| epoch 001: 4740 / 8862 loss=7.965, nll_loss=6.812, ppl=112.35, wps=49638, ups=3, wpb=15177.334, bsz=557.789, num_updates=4737, lr=0.000459461, gnorm=1.142, clip=0.000, oom=0.000, loss_scale=16.000, wall=1448, train_wall=1272 | |
| epoch 001: 4750 / 8862 loss=7.960, nll_loss=6.806, ppl=111.89, wps=49639, ups=3, wpb=15176.544, bsz=558.060, num_updates=4747, lr=0.000458976, gnorm=1.142, clip=0.000, oom=0.000, loss_scale=16.000, wall=1451, train_wall=1275 | |
| epoch 001: 4760 / 8862 loss=7.954, nll_loss=6.800, ppl=111.41, wps=49646, ups=3, wpb=15176.736, bsz=558.035, num_updates=4757, lr=0.000458494, gnorm=1.141, clip=0.000, oom=0.000, loss_scale=16.000, wall=1454, train_wall=1277 | |
| epoch 001: 4770 / 8862 loss=7.949, nll_loss=6.794, ppl=110.94, wps=49650, ups=3, wpb=15176.110, bsz=557.972, num_updates=4767, lr=0.000458013, gnorm=1.140, clip=0.000, oom=0.000, loss_scale=16.000, wall=1457, train_wall=1280 | |
| epoch 001: 4780 / 8862 loss=7.944, nll_loss=6.788, ppl=110.47, wps=49656, ups=3, wpb=15176.076, bsz=557.956, num_updates=4777, lr=0.000457533, gnorm=1.139, clip=0.000, oom=0.000, loss_scale=16.000, wall=1460, train_wall=1283 | |
| epoch 001: 4790 / 8862 loss=7.939, nll_loss=6.781, ppl=110.01, wps=49661, ups=3, wpb=15175.550, bsz=557.859, num_updates=4787, lr=0.000457055, gnorm=1.139, clip=0.000, oom=0.000, loss_scale=16.000, wall=1463, train_wall=1285 | |
| epoch 001: 4800 / 8862 loss=7.934, nll_loss=6.776, ppl=109.57, wps=49663, ups=3, wpb=15174.541, bsz=557.877, num_updates=4797, lr=0.000456578, gnorm=1.138, clip=0.000, oom=0.000, loss_scale=16.000, wall=1466, train_wall=1288 | |
| epoch 001: 4810 / 8862 loss=7.929, nll_loss=6.770, ppl=109.18, wps=49667, ups=3, wpb=15173.839, bsz=557.683, num_updates=4807, lr=0.000456103, gnorm=1.138, clip=0.000, oom=0.000, loss_scale=16.000, wall=1469, train_wall=1291 | |
| epoch 001: 4820 / 8862 loss=7.924, nll_loss=6.764, ppl=108.70, wps=49674, ups=3, wpb=15174.357, bsz=557.719, num_updates=4817, lr=0.000455629, gnorm=1.137, clip=0.000, oom=0.000, loss_scale=16.000, wall=1471, train_wall=1293 | |
| epoch 001: 4830 / 8862 loss=7.918, nll_loss=6.758, ppl=108.25, wps=49680, ups=3, wpb=15174.237, bsz=557.640, num_updates=4827, lr=0.000455157, gnorm=1.136, clip=0.000, oom=0.000, loss_scale=16.000, wall=1474, train_wall=1296 | |
| epoch 001: 4840 / 8862 loss=7.913, nll_loss=6.752, ppl=107.78, wps=49684, ups=3, wpb=15174.158, bsz=557.841, num_updates=4837, lr=0.000454686, gnorm=1.135, clip=0.000, oom=0.000, loss_scale=16.000, wall=1477, train_wall=1299 | |
| epoch 001: 4850 / 8862 loss=7.908, nll_loss=6.746, ppl=107.34, wps=49691, ups=3, wpb=15174.614, bsz=557.676, num_updates=4847, lr=0.000454217, gnorm=1.135, clip=0.000, oom=0.000, loss_scale=16.000, wall=1480, train_wall=1301 | |
| epoch 001: 4860 / 8862 loss=7.903, nll_loss=6.740, ppl=106.90, wps=49697, ups=3, wpb=15174.746, bsz=557.674, num_updates=4857, lr=0.000453749, gnorm=1.134, clip=0.000, oom=0.000, loss_scale=16.000, wall=1483, train_wall=1304 | |
| epoch 001: 4870 / 8862 loss=7.897, nll_loss=6.734, ppl=106.45, wps=49702, ups=3, wpb=15174.771, bsz=557.825, num_updates=4867, lr=0.000453283, gnorm=1.133, clip=0.000, oom=0.000, loss_scale=16.000, wall=1486, train_wall=1307 | |
| epoch 001: 4880 / 8862 loss=7.892, nll_loss=6.728, ppl=105.99, wps=49708, ups=3, wpb=15174.933, bsz=557.885, num_updates=4877, lr=0.000452818, gnorm=1.132, clip=0.000, oom=0.000, loss_scale=16.000, wall=1489, train_wall=1309 | |
| epoch 001: 4890 / 8862 loss=7.886, nll_loss=6.721, ppl=105.52, wps=49715, ups=3, wpb=15175.735, bsz=557.929, num_updates=4887, lr=0.000452354, gnorm=1.132, clip=0.000, oom=0.000, loss_scale=16.000, wall=1492, train_wall=1312 | |
| epoch 001: 4900 / 8862 loss=7.881, nll_loss=6.716, ppl=105.10, wps=49720, ups=3, wpb=15175.536, bsz=557.740, num_updates=4897, lr=0.000451892, gnorm=1.131, clip=0.000, oom=0.000, loss_scale=16.000, wall=1495, train_wall=1315 | |
| epoch 001: 4910 / 8862 loss=7.876, nll_loss=6.710, ppl=104.69, wps=49725, ups=3, wpb=15175.544, bsz=557.719, num_updates=4907, lr=0.000451432, gnorm=1.131, clip=0.000, oom=0.000, loss_scale=16.000, wall=1498, train_wall=1318 | |
| epoch 001: 4920 / 8862 loss=7.871, nll_loss=6.704, ppl=104.25, wps=49727, ups=3, wpb=15175.485, bsz=558.107, num_updates=4917, lr=0.000450972, gnorm=1.130, clip=0.000, oom=0.000, loss_scale=16.000, wall=1501, train_wall=1320 | |
| epoch 001: 4930 / 8862 loss=7.866, nll_loss=6.698, ppl=103.80, wps=49734, ups=3, wpb=15176.020, bsz=558.114, num_updates=4927, lr=0.000450514, gnorm=1.129, clip=0.000, oom=0.000, loss_scale=16.000, wall=1503, train_wall=1323 | |
| epoch 001: 4940 / 8862 loss=7.860, nll_loss=6.692, ppl=103.38, wps=49739, ups=3, wpb=15175.810, bsz=558.063, num_updates=4937, lr=0.000450058, gnorm=1.129, clip=0.000, oom=0.000, loss_scale=16.000, wall=1506, train_wall=1326 | |
| epoch 001: 4950 / 8862 loss=7.856, nll_loss=6.686, ppl=102.97, wps=49744, ups=3, wpb=15175.566, bsz=557.998, num_updates=4947, lr=0.000449603, gnorm=1.128, clip=0.000, oom=0.000, loss_scale=16.000, wall=1509, train_wall=1328 | |
| epoch 001: 4960 / 8862 loss=7.851, nll_loss=6.681, ppl=102.58, wps=49748, ups=3, wpb=15175.150, bsz=557.824, num_updates=4957, lr=0.000449149, gnorm=1.127, clip=0.000, oom=0.000, loss_scale=16.000, wall=1512, train_wall=1331 | |
| epoch 001: 4970 / 8862 loss=7.845, nll_loss=6.675, ppl=102.15, wps=49753, ups=3, wpb=15175.262, bsz=558.002, num_updates=4967, lr=0.000448697, gnorm=1.127, clip=0.000, oom=0.000, loss_scale=16.000, wall=1515, train_wall=1334 | |
| epoch 001: 4980 / 8862 loss=7.840, nll_loss=6.669, ppl=101.74, wps=49758, ups=3, wpb=15175.150, bsz=557.979, num_updates=4977, lr=0.000448246, gnorm=1.126, clip=0.000, oom=0.000, loss_scale=16.000, wall=1518, train_wall=1336 | |
| epoch 001: 4990 / 8862 loss=7.835, nll_loss=6.663, ppl=101.32, wps=49765, ups=3, wpb=15175.698, bsz=557.852, num_updates=4987, lr=0.000447796, gnorm=1.125, clip=0.000, oom=0.000, loss_scale=16.000, wall=1521, train_wall=1339 | |
| epoch 001: 5000 / 8862 loss=7.830, nll_loss=6.657, ppl=100.90, wps=49772, ups=3, wpb=15176.277, bsz=557.998, num_updates=4997, lr=0.000447348, gnorm=1.125, clip=0.000, oom=0.000, loss_scale=16.000, wall=1524, train_wall=1342 | |
| epoch 001: 5010 / 8862 loss=7.825, nll_loss=6.651, ppl=100.48, wps=49777, ups=3, wpb=15176.153, bsz=558.141, num_updates=5007, lr=0.000446901, gnorm=1.124, clip=0.000, oom=0.000, loss_scale=16.000, wall=1527, train_wall=1344 | |
| epoch 001: 5020 / 8862 loss=7.820, nll_loss=6.645, ppl=100.09, wps=49783, ups=3, wpb=15176.450, bsz=558.067, num_updates=5017, lr=0.000446455, gnorm=1.123, clip=0.000, oom=0.000, loss_scale=16.000, wall=1529, train_wall=1347 | |
| epoch 001: 5030 / 8862 loss=7.815, nll_loss=6.639, ppl=99.69, wps=49787, ups=3, wpb=15176.548, bsz=558.378, num_updates=5027, lr=0.000446011, gnorm=1.123, clip=0.000, oom=0.000, loss_scale=16.000, wall=1532, train_wall=1350 | |
| epoch 001: 5040 / 8862 loss=7.810, nll_loss=6.634, ppl=99.31, wps=49791, ups=3, wpb=15176.155, bsz=558.424, num_updates=5037, lr=0.000445568, gnorm=1.122, clip=0.000, oom=0.000, loss_scale=16.000, wall=1535, train_wall=1352 | |
| epoch 001: 5050 / 8862 loss=7.805, nll_loss=6.628, ppl=98.93, wps=49795, ups=3, wpb=15175.917, bsz=558.294, num_updates=5047, lr=0.000445126, gnorm=1.121, clip=0.000, oom=0.000, loss_scale=16.000, wall=1538, train_wall=1355 | |
| epoch 001: 5060 / 8862 loss=7.800, nll_loss=6.623, ppl=98.53, wps=49800, ups=3, wpb=15175.702, bsz=558.405, num_updates=5057, lr=0.000444686, gnorm=1.121, clip=0.000, oom=0.000, loss_scale=16.000, wall=1541, train_wall=1358 | |
| epoch 001: 5070 / 8862 loss=7.795, nll_loss=6.617, ppl=98.15, wps=49804, ups=3, wpb=15175.635, bsz=558.495, num_updates=5067, lr=0.000444247, gnorm=1.120, clip=0.000, oom=0.000, loss_scale=16.000, wall=1544, train_wall=1360 | |
| epoch 001: 5080 / 8862 loss=7.790, nll_loss=6.611, ppl=97.77, wps=49809, ups=3, wpb=15175.538, bsz=558.477, num_updates=5077, lr=0.000443809, gnorm=1.119, clip=0.000, oom=0.000, loss_scale=16.000, wall=1547, train_wall=1363 | |
| epoch 001: 5090 / 8862 loss=7.786, nll_loss=6.606, ppl=97.40, wps=49814, ups=3, wpb=15175.307, bsz=558.455, num_updates=5087, lr=0.000443373, gnorm=1.118, clip=0.000, oom=0.000, loss_scale=16.000, wall=1550, train_wall=1366 | |
| epoch 001: 5100 / 8862 loss=7.781, nll_loss=6.600, ppl=97.03, wps=49819, ups=3, wpb=15175.173, bsz=558.256, num_updates=5097, lr=0.000442938, gnorm=1.118, clip=0.000, oom=0.000, loss_scale=16.000, wall=1553, train_wall=1368 | |
| epoch 001: 5110 / 8862 loss=7.776, nll_loss=6.595, ppl=96.67, wps=49823, ups=3, wpb=15175.211, bsz=558.375, num_updates=5107, lr=0.000442504, gnorm=1.117, clip=0.000, oom=0.000, loss_scale=16.000, wall=1555, train_wall=1371 | |
| epoch 001: 5120 / 8862 loss=7.771, nll_loss=6.589, ppl=96.30, wps=49826, ups=3, wpb=15174.572, bsz=558.349, num_updates=5117, lr=0.000442071, gnorm=1.116, clip=0.000, oom=0.000, loss_scale=16.000, wall=1558, train_wall=1374 | |
| epoch 001: 5130 / 8862 loss=7.766, nll_loss=6.584, ppl=95.93, wps=49833, ups=3, wpb=15175.218, bsz=558.436, num_updates=5127, lr=0.00044164, gnorm=1.116, clip=0.000, oom=0.000, loss_scale=16.000, wall=1561, train_wall=1376 | |
| epoch 001: 5140 / 8862 loss=7.762, nll_loss=6.579, ppl=95.57, wps=49838, ups=3, wpb=15175.149, bsz=558.259, num_updates=5137, lr=0.00044121, gnorm=1.115, clip=0.000, oom=0.000, loss_scale=16.000, wall=1564, train_wall=1379 | |
| epoch 001: 5150 / 8862 loss=7.757, nll_loss=6.573, ppl=95.23, wps=49841, ups=3, wpb=15174.638, bsz=558.214, num_updates=5147, lr=0.000440781, gnorm=1.114, clip=0.000, oom=0.000, loss_scale=16.000, wall=1567, train_wall=1382 | |
| epoch 001: 5160 / 8862 loss=7.752, nll_loss=6.568, ppl=94.87, wps=49846, ups=3, wpb=15174.557, bsz=558.168, num_updates=5157, lr=0.000440353, gnorm=1.114, clip=0.000, oom=0.000, loss_scale=16.000, wall=1570, train_wall=1384 | |
| epoch 001: 5170 / 8862 loss=7.748, nll_loss=6.562, ppl=94.50, wps=49853, ups=3, wpb=15175.279, bsz=558.153, num_updates=5167, lr=0.000439927, gnorm=1.113, clip=0.000, oom=0.000, loss_scale=16.000, wall=1573, train_wall=1387 | |
| epoch 001: 5180 / 8862 loss=7.743, nll_loss=6.557, ppl=94.13, wps=49858, ups=3, wpb=15175.595, bsz=558.114, num_updates=5177, lr=0.000439502, gnorm=1.112, clip=0.000, oom=0.000, loss_scale=16.000, wall=1576, train_wall=1390 | |
| epoch 001: 5190 / 8862 loss=7.738, nll_loss=6.552, ppl=93.80, wps=49862, ups=3, wpb=15175.248, bsz=557.925, num_updates=5187, lr=0.000439078, gnorm=1.112, clip=0.000, oom=0.000, loss_scale=16.000, wall=1579, train_wall=1393 | |
| epoch 001: 5200 / 8862 loss=7.733, nll_loss=6.546, ppl=93.46, wps=49866, ups=3, wpb=15175.195, bsz=557.946, num_updates=5197, lr=0.000438656, gnorm=1.111, clip=0.000, oom=0.000, loss_scale=16.000, wall=1582, train_wall=1395 | |
| epoch 001: 5210 / 8862 loss=7.729, nll_loss=6.541, ppl=93.12, wps=49870, ups=3, wpb=15174.922, bsz=557.982, num_updates=5207, lr=0.000438234, gnorm=1.111, clip=0.000, oom=0.000, loss_scale=16.000, wall=1584, train_wall=1398 | |
| epoch 001: 5220 / 8862 loss=7.724, nll_loss=6.536, ppl=92.78, wps=49875, ups=3, wpb=15174.832, bsz=557.968, num_updates=5217, lr=0.000437814, gnorm=1.110, clip=0.000, oom=0.000, loss_scale=16.000, wall=1587, train_wall=1401 | |
| epoch 001: 5230 / 8862 loss=7.719, nll_loss=6.530, ppl=92.42, wps=49880, ups=3, wpb=15174.996, bsz=558.054, num_updates=5227, lr=0.000437395, gnorm=1.109, clip=0.000, oom=0.000, loss_scale=16.000, wall=1590, train_wall=1403 | |
| epoch 001: 5240 / 8862 loss=7.715, nll_loss=6.525, ppl=92.09, wps=49885, ups=3, wpb=15174.850, bsz=557.963, num_updates=5237, lr=0.000436977, gnorm=1.109, clip=0.000, oom=0.000, loss_scale=16.000, wall=1593, train_wall=1406 | |
| epoch 001: 5250 / 8862 loss=7.710, nll_loss=6.519, ppl=91.74, wps=49889, ups=3, wpb=15174.845, bsz=557.963, num_updates=5247, lr=0.000436561, gnorm=1.108, clip=0.000, oom=0.000, loss_scale=16.000, wall=1596, train_wall=1409 | |
| epoch 001: 5260 / 8862 loss=7.706, nll_loss=6.514, ppl=91.41, wps=49896, ups=3, wpb=15175.216, bsz=557.819, num_updates=5257, lr=0.000436145, gnorm=1.108, clip=0.000, oom=0.000, loss_scale=16.000, wall=1599, train_wall=1411 | |
| epoch 001: 5270 / 8862 loss=7.701, nll_loss=6.509, ppl=91.05, wps=49900, ups=3, wpb=15175.448, bsz=558.055, num_updates=5267, lr=0.000435731, gnorm=1.107, clip=0.000, oom=0.000, loss_scale=16.000, wall=1602, train_wall=1414 | |
| epoch 001: 5280 / 8862 loss=7.696, nll_loss=6.504, ppl=90.73, wps=49905, ups=3, wpb=15175.675, bsz=557.911, num_updates=5277, lr=0.000435318, gnorm=1.106, clip=0.000, oom=0.000, loss_scale=16.000, wall=1605, train_wall=1417 | |
| epoch 001: 5290 / 8862 loss=7.692, nll_loss=6.498, ppl=90.41, wps=49909, ups=3, wpb=15175.296, bsz=557.899, num_updates=5287, lr=0.000434906, gnorm=1.105, clip=0.000, oom=0.000, loss_scale=16.000, wall=1608, train_wall=1419 | |
| epoch 001: 5300 / 8862 loss=7.687, nll_loss=6.493, ppl=90.06, wps=49915, ups=3, wpb=15175.941, bsz=557.887, num_updates=5297, lr=0.000434495, gnorm=1.105, clip=0.000, oom=0.000, loss_scale=16.000, wall=1610, train_wall=1422 | |
| epoch 001: 5310 / 8862 loss=7.682, nll_loss=6.487, ppl=89.73, wps=49921, ups=3, wpb=15176.387, bsz=557.775, num_updates=5307, lr=0.000434086, gnorm=1.104, clip=0.000, oom=0.000, loss_scale=16.000, wall=1613, train_wall=1425 | |
| epoch 001: 5320 / 8862 loss=7.678, nll_loss=6.482, ppl=89.41, wps=49925, ups=3, wpb=15176.115, bsz=557.684, num_updates=5317, lr=0.000433677, gnorm=1.103, clip=0.000, oom=0.000, loss_scale=16.000, wall=1616, train_wall=1427 | |
| epoch 001: 5330 / 8862 loss=7.673, nll_loss=6.477, ppl=89.10, wps=49928, ups=3, wpb=15175.682, bsz=557.792, num_updates=5327, lr=0.00043327, gnorm=1.103, clip=0.000, oom=0.000, loss_scale=16.000, wall=1619, train_wall=1430 | |
| epoch 001: 5340 / 8862 loss=7.669, nll_loss=6.472, ppl=88.78, wps=49933, ups=3, wpb=15175.797, bsz=557.675, num_updates=5337, lr=0.000432864, gnorm=1.102, clip=0.000, oom=0.000, loss_scale=16.000, wall=1622, train_wall=1433 | |
| epoch 001: 5350 / 8862 loss=7.664, nll_loss=6.467, ppl=88.45, wps=49938, ups=3, wpb=15175.965, bsz=557.661, num_updates=5347, lr=0.000432459, gnorm=1.102, clip=0.000, oom=0.000, loss_scale=16.000, wall=1625, train_wall=1435 | |
| epoch 001: 5360 / 8862 loss=7.660, nll_loss=6.462, ppl=88.13, wps=49942, ups=3, wpb=15175.740, bsz=557.595, num_updates=5357, lr=0.000432055, gnorm=1.101, clip=0.000, oom=0.000, loss_scale=16.000, wall=1628, train_wall=1438 | |
| epoch 001: 5370 / 8862 loss=7.655, nll_loss=6.456, ppl=87.82, wps=49945, ups=3, wpb=15175.261, bsz=557.540, num_updates=5367, lr=0.000431652, gnorm=1.100, clip=0.000, oom=0.000, loss_scale=16.000, wall=1631, train_wall=1441 | |
| epoch 001: 5380 / 8862 loss=7.651, nll_loss=6.451, ppl=87.50, wps=49949, ups=3, wpb=15175.126, bsz=557.656, num_updates=5377, lr=0.000431251, gnorm=1.100, clip=0.000, oom=0.000, loss_scale=16.000, wall=1634, train_wall=1443 | |
| epoch 001: 5390 / 8862 loss=7.646, nll_loss=6.446, ppl=87.20, wps=49955, ups=3, wpb=15175.406, bsz=557.511, num_updates=5387, lr=0.00043085, gnorm=1.099, clip=0.000, oom=0.000, loss_scale=16.000, wall=1636, train_wall=1446 | |
| epoch 001: 5400 / 8862 loss=7.642, nll_loss=6.441, ppl=86.91, wps=49957, ups=3, wpb=15174.577, bsz=557.343, num_updates=5397, lr=0.000430451, gnorm=1.099, clip=0.000, oom=0.000, loss_scale=16.000, wall=1639, train_wall=1449 | |
| epoch 001: 5410 / 8862 loss=7.637, nll_loss=6.436, ppl=86.57, wps=49962, ups=3, wpb=15175.053, bsz=557.631, num_updates=5407, lr=0.000430053, gnorm=1.098, clip=0.000, oom=0.000, loss_scale=16.000, wall=1642, train_wall=1451 | |
| epoch 001: 5420 / 8862 loss=7.632, nll_loss=6.431, ppl=86.26, wps=49966, ups=3, wpb=15174.990, bsz=557.765, num_updates=5417, lr=0.000429656, gnorm=1.097, clip=0.000, oom=0.000, loss_scale=16.000, wall=1645, train_wall=1454 | |
| epoch 001: 5430 / 8862 loss=7.628, nll_loss=6.425, ppl=85.95, wps=49972, ups=3, wpb=15175.383, bsz=557.684, num_updates=5427, lr=0.00042926, gnorm=1.097, clip=0.000, oom=0.000, loss_scale=16.000, wall=1648, train_wall=1457 | |
| epoch 001: 5440 / 8862 loss=7.623, nll_loss=6.420, ppl=85.64, wps=49978, ups=3, wpb=15175.836, bsz=557.625, num_updates=5437, lr=0.000428865, gnorm=1.096, clip=0.000, oom=0.000, loss_scale=16.000, wall=1651, train_wall=1459 | |
| epoch 001: 5450 / 8862 loss=7.619, nll_loss=6.415, ppl=85.32, wps=49984, ups=3, wpb=15176.544, bsz=557.613, num_updates=5447, lr=0.000428471, gnorm=1.095, clip=0.000, oom=0.000, loss_scale=16.000, wall=1654, train_wall=1462 | |
| epoch 001: 5460 / 8862 loss=7.614, nll_loss=6.410, ppl=85.02, wps=49989, ups=3, wpb=15176.632, bsz=557.481, num_updates=5457, lr=0.000428078, gnorm=1.094, clip=0.000, oom=0.000, loss_scale=16.000, wall=1657, train_wall=1465 | |
| epoch 001: 5470 / 8862 loss=7.610, nll_loss=6.405, ppl=84.72, wps=49995, ups=3, wpb=15177.259, bsz=557.391, num_updates=5467, lr=0.000427686, gnorm=1.094, clip=0.000, oom=0.000, loss_scale=16.000, wall=1660, train_wall=1467 | |
| epoch 001: 5480 / 8862 loss=7.606, nll_loss=6.400, ppl=84.45, wps=50001, ups=3, wpb=15177.466, bsz=557.169, num_updates=5477, lr=0.000427296, gnorm=1.093, clip=0.000, oom=0.000, loss_scale=16.000, wall=1663, train_wall=1470 | |
| epoch 001: 5490 / 8862 loss=7.601, nll_loss=6.395, ppl=84.16, wps=50007, ups=3, wpb=15177.947, bsz=557.133, num_updates=5487, lr=0.000426906, gnorm=1.092, clip=0.000, oom=0.000, loss_scale=16.000, wall=1665, train_wall=1473 | |
| epoch 001: 5500 / 8862 loss=7.597, nll_loss=6.390, ppl=83.89, wps=50010, ups=3, wpb=15177.335, bsz=557.028, num_updates=5497, lr=0.000426518, gnorm=1.092, clip=0.000, oom=0.000, loss_scale=16.000, wall=1668, train_wall=1475 | |
| epoch 001: 5510 / 8862 loss=7.593, nll_loss=6.385, ppl=83.59, wps=50012, ups=3, wpb=15177.060, bsz=557.420, num_updates=5507, lr=0.00042613, gnorm=1.091, clip=0.000, oom=0.000, loss_scale=16.000, wall=1671, train_wall=1478 | |
| epoch 001: 5520 / 8862 loss=7.589, nll_loss=6.380, ppl=83.31, wps=50014, ups=3, wpb=15176.566, bsz=557.533, num_updates=5517, lr=0.000425744, gnorm=1.090, clip=0.000, oom=0.000, loss_scale=16.000, wall=1674, train_wall=1481 | |
| epoch 001: 5530 / 8862 loss=7.584, nll_loss=6.375, ppl=83.02, wps=50018, ups=3, wpb=15176.404, bsz=557.436, num_updates=5527, lr=0.000425359, gnorm=1.090, clip=0.000, oom=0.000, loss_scale=16.000, wall=1677, train_wall=1483 | |
| epoch 001: 5540 / 8862 loss=7.580, nll_loss=6.371, ppl=82.75, wps=50023, ups=3, wpb=15176.211, bsz=557.276, num_updates=5537, lr=0.000424974, gnorm=1.089, clip=0.000, oom=0.000, loss_scale=16.000, wall=1680, train_wall=1486 | |
| epoch 001: 5550 / 8862 loss=7.576, nll_loss=6.366, ppl=82.46, wps=50026, ups=3, wpb=15175.946, bsz=557.310, num_updates=5547, lr=0.000424591, gnorm=1.088, clip=0.000, oom=0.000, loss_scale=16.000, wall=1683, train_wall=1489 | |
| epoch 001: 5560 / 8862 loss=7.571, nll_loss=6.361, ppl=82.18, wps=50029, ups=3, wpb=15175.992, bsz=557.475, num_updates=5557, lr=0.000424209, gnorm=1.088, clip=0.000, oom=0.000, loss_scale=16.000, wall=1686, train_wall=1491 | |
| epoch 001: 5570 / 8862 loss=7.567, nll_loss=6.356, ppl=81.91, wps=50033, ups=3, wpb=15175.787, bsz=557.449, num_updates=5567, lr=0.000423828, gnorm=1.087, clip=0.000, oom=0.000, loss_scale=16.000, wall=1689, train_wall=1494 | |
| epoch 001: 5580 / 8862 loss=7.563, nll_loss=6.351, ppl=81.61, wps=50036, ups=3, wpb=15176.066, bsz=557.683, num_updates=5577, lr=0.000423448, gnorm=1.087, clip=0.000, oom=0.000, loss_scale=16.000, wall=1692, train_wall=1497 | |
| epoch 001: 5590 / 8862 loss=7.558, nll_loss=6.346, ppl=81.32, wps=50041, ups=3, wpb=15176.188, bsz=557.634, num_updates=5587, lr=0.000423068, gnorm=1.086, clip=0.000, oom=0.000, loss_scale=16.000, wall=1694, train_wall=1500 | |
| epoch 001: 5600 / 8862 loss=7.554, nll_loss=6.341, ppl=81.05, wps=50045, ups=3, wpb=15176.010, bsz=557.628, num_updates=5597, lr=0.00042269, gnorm=1.085, clip=0.000, oom=0.000, loss_scale=16.000, wall=1697, train_wall=1502 | |
| epoch 001: 5610 / 8862 loss=7.550, nll_loss=6.336, ppl=80.81, wps=50047, ups=3, wpb=15175.374, bsz=557.520, num_updates=5607, lr=0.000422313, gnorm=1.085, clip=0.000, oom=0.000, loss_scale=16.000, wall=1700, train_wall=1505 | |
| epoch 001: 5620 / 8862 loss=7.546, nll_loss=6.331, ppl=80.53, wps=50051, ups=3, wpb=15175.199, bsz=557.568, num_updates=5617, lr=0.000421937, gnorm=1.084, clip=0.000, oom=0.000, loss_scale=16.000, wall=1703, train_wall=1508 | |
| epoch 001: 5630 / 8862 loss=7.541, nll_loss=6.326, ppl=80.24, wps=50053, ups=3, wpb=15175.059, bsz=557.783, num_updates=5627, lr=0.000421562, gnorm=1.083, clip=0.000, oom=0.000, loss_scale=16.000, wall=1706, train_wall=1510 | |
| epoch 001: 5640 / 8862 loss=7.537, nll_loss=6.321, ppl=79.98, wps=50056, ups=3, wpb=15174.937, bsz=557.749, num_updates=5637, lr=0.000421188, gnorm=1.083, clip=0.000, oom=0.000, loss_scale=16.000, wall=1709, train_wall=1513 | |
| epoch 001: 5650 / 8862 loss=7.533, nll_loss=6.316, ppl=79.69, wps=50061, ups=3, wpb=15175.430, bsz=557.974, num_updates=5647, lr=0.000420815, gnorm=1.082, clip=0.000, oom=0.000, loss_scale=16.000, wall=1712, train_wall=1516 | |
| epoch 001: 5660 / 8862 loss=7.528, nll_loss=6.311, ppl=79.41, wps=50066, ups=3, wpb=15175.616, bsz=557.965, num_updates=5657, lr=0.000420443, gnorm=1.082, clip=0.000, oom=0.000, loss_scale=16.000, wall=1715, train_wall=1518 | |
| epoch 001: 5670 / 8862 loss=7.525, nll_loss=6.307, ppl=79.18, wps=50069, ups=3, wpb=15175.029, bsz=557.826, num_updates=5667, lr=0.000420072, gnorm=1.081, clip=0.000, oom=0.000, loss_scale=16.000, wall=1718, train_wall=1521 | |
| epoch 001: 5680 / 8862 loss=7.520, nll_loss=6.302, ppl=78.91, wps=50074, ups=3, wpb=15175.172, bsz=557.783, num_updates=5677, lr=0.000419702, gnorm=1.081, clip=0.000, oom=0.000, loss_scale=16.000, wall=1720, train_wall=1524 | |
| epoch 001: 5690 / 8862 loss=7.516, nll_loss=6.297, ppl=78.63, wps=50079, ups=3, wpb=15175.641, bsz=557.783, num_updates=5687, lr=0.000419332, gnorm=1.080, clip=0.000, oom=0.000, loss_scale=16.000, wall=1723, train_wall=1526 | |
| epoch 001: 5700 / 8862 loss=7.512, nll_loss=6.292, ppl=78.37, wps=50083, ups=3, wpb=15175.665, bsz=557.798, num_updates=5697, lr=0.000418964, gnorm=1.079, clip=0.000, oom=0.000, loss_scale=16.000, wall=1726, train_wall=1529 | |
| epoch 001: 5710 / 8862 loss=7.508, nll_loss=6.288, ppl=78.12, wps=50087, ups=3, wpb=15175.867, bsz=557.740, num_updates=5707, lr=0.000418597, gnorm=1.079, clip=0.000, oom=0.000, loss_scale=16.000, wall=1729, train_wall=1532 | |
| epoch 001: 5720 / 8862 loss=7.503, nll_loss=6.283, ppl=77.86, wps=50092, ups=3, wpb=15175.896, bsz=557.696, num_updates=5717, lr=0.000418231, gnorm=1.078, clip=0.000, oom=0.000, loss_scale=16.000, wall=1732, train_wall=1534 | |
| epoch 001: 5730 / 8862 loss=7.499, nll_loss=6.278, ppl=77.61, wps=50095, ups=3, wpb=15175.603, bsz=557.752, num_updates=5727, lr=0.000417865, gnorm=1.077, clip=0.000, oom=0.000, loss_scale=16.000, wall=1735, train_wall=1537 | |
| epoch 001: 5740 / 8862 loss=7.495, nll_loss=6.273, ppl=77.34, wps=50100, ups=3, wpb=15176.061, bsz=557.808, num_updates=5737, lr=0.000417501, gnorm=1.077, clip=0.000, oom=0.000, loss_scale=16.000, wall=1738, train_wall=1540 | |
| epoch 001: 5750 / 8862 loss=7.491, nll_loss=6.268, ppl=77.08, wps=50104, ups=3, wpb=15176.327, bsz=557.965, num_updates=5747, lr=0.000417138, gnorm=1.076, clip=0.000, oom=0.000, loss_scale=16.000, wall=1741, train_wall=1542 | |
| epoch 001: 5760 / 8862 loss=7.487, nll_loss=6.264, ppl=76.84, wps=50107, ups=3, wpb=15176.253, bsz=557.903, num_updates=5757, lr=0.000416775, gnorm=1.075, clip=0.000, oom=0.000, loss_scale=16.000, wall=1744, train_wall=1545 | |
| epoch 001: 5770 / 8862 loss=7.483, nll_loss=6.259, ppl=76.60, wps=50111, ups=3, wpb=15175.938, bsz=557.870, num_updates=5767, lr=0.000416414, gnorm=1.075, clip=0.000, oom=0.000, loss_scale=16.000, wall=1747, train_wall=1548 | |
| epoch 001: 5780 / 8862 loss=7.479, nll_loss=6.255, ppl=76.36, wps=50114, ups=3, wpb=15175.762, bsz=558.004, num_updates=5777, lr=0.000416053, gnorm=1.074, clip=0.000, oom=0.000, loss_scale=16.000, wall=1749, train_wall=1550 | |
| epoch 001: 5790 / 8862 loss=7.475, nll_loss=6.250, ppl=76.11, wps=50118, ups=3, wpb=15175.925, bsz=557.901, num_updates=5787, lr=0.000415694, gnorm=1.073, clip=0.000, oom=0.000, loss_scale=16.000, wall=1752, train_wall=1553 | |
| epoch 001: 5800 / 8862 loss=7.470, nll_loss=6.245, ppl=75.86, wps=50123, ups=3, wpb=15176.226, bsz=557.943, num_updates=5797, lr=0.000415335, gnorm=1.073, clip=0.000, oom=0.000, loss_scale=16.000, wall=1755, train_wall=1556 | |
| epoch 001: 5810 / 8862 loss=7.466, nll_loss=6.241, ppl=75.61, wps=50128, ups=3, wpb=15176.378, bsz=557.929, num_updates=5807, lr=0.000414977, gnorm=1.072, clip=0.000, oom=0.000, loss_scale=16.000, wall=1758, train_wall=1558 | |
| epoch 001: 5820 / 8862 loss=7.462, nll_loss=6.236, ppl=75.37, wps=50132, ups=3, wpb=15176.467, bsz=557.981, num_updates=5817, lr=0.00041462, gnorm=1.072, clip=0.000, oom=0.000, loss_scale=16.000, wall=1761, train_wall=1561 | |
| epoch 001: 5830 / 8862 loss=7.458, nll_loss=6.231, ppl=75.13, wps=50136, ups=3, wpb=15176.461, bsz=558.101, num_updates=5827, lr=0.000414264, gnorm=1.071, clip=0.000, oom=0.000, loss_scale=16.000, wall=1764, train_wall=1564 | |
| epoch 001: 5840 / 8862 loss=7.455, nll_loss=6.227, ppl=74.91, wps=50139, ups=3, wpb=15176.250, bsz=558.066, num_updates=5837, lr=0.000413909, gnorm=1.070, clip=0.000, oom=0.000, loss_scale=16.000, wall=1767, train_wall=1566 | |
| epoch 001: 5850 / 8862 loss=7.450, nll_loss=6.222, ppl=74.67, wps=50142, ups=3, wpb=15175.935, bsz=558.185, num_updates=5847, lr=0.000413555, gnorm=1.070, clip=0.000, oom=0.000, loss_scale=16.000, wall=1770, train_wall=1569 | |
| epoch 001: 5860 / 8862 loss=7.446, nll_loss=6.218, ppl=74.42, wps=50147, ups=3, wpb=15176.550, bsz=558.187, num_updates=5857, lr=0.000413202, gnorm=1.069, clip=0.000, oom=0.000, loss_scale=16.000, wall=1773, train_wall=1572 | |
| epoch 001: 5870 / 8862 loss=7.442, nll_loss=6.213, ppl=74.17, wps=50152, ups=3, wpb=15176.824, bsz=558.267, num_updates=5867, lr=0.00041285, gnorm=1.068, clip=0.000, oom=0.000, loss_scale=16.000, wall=1775, train_wall=1575 | |
| epoch 001: 5880 / 8862 loss=7.438, nll_loss=6.208, ppl=73.95, wps=50153, ups=3, wpb=15176.057, bsz=558.116, num_updates=5877, lr=0.000412498, gnorm=1.068, clip=0.000, oom=0.000, loss_scale=16.000, wall=1778, train_wall=1577 | |
| epoch 001: 5890 / 8862 loss=7.434, nll_loss=6.204, ppl=73.71, wps=50157, ups=3, wpb=15176.295, bsz=558.112, num_updates=5887, lr=0.000412148, gnorm=1.067, clip=0.000, oom=0.000, loss_scale=16.000, wall=1781, train_wall=1580 | |
| epoch 001: 5900 / 8862 loss=7.430, nll_loss=6.200, ppl=73.50, wps=50162, ups=3, wpb=15176.440, bsz=558.004, num_updates=5897, lr=0.000411798, gnorm=1.067, clip=0.000, oom=0.000, loss_scale=16.000, wall=1784, train_wall=1583 | |
| epoch 001: 5910 / 8862 loss=7.426, nll_loss=6.195, ppl=73.27, wps=50165, ups=3, wpb=15176.468, bsz=558.047, num_updates=5907, lr=0.000411449, gnorm=1.066, clip=0.000, oom=0.000, loss_scale=16.000, wall=1787, train_wall=1585 | |
| epoch 001: 5920 / 8862 loss=7.423, nll_loss=6.191, ppl=73.04, wps=50168, ups=3, wpb=15176.264, bsz=557.988, num_updates=5917, lr=0.000411102, gnorm=1.066, clip=0.000, oom=0.000, loss_scale=16.000, wall=1790, train_wall=1588 | |
| epoch 001: 5930 / 8862 loss=7.419, nll_loss=6.186, ppl=72.81, wps=50174, ups=3, wpb=15176.679, bsz=557.979, num_updates=5927, lr=0.000410755, gnorm=1.065, clip=0.000, oom=0.000, loss_scale=16.000, wall=1793, train_wall=1591 | |
| epoch 001: 5940 / 8862 loss=7.415, nll_loss=6.182, ppl=72.60, wps=50176, ups=3, wpb=15176.353, bsz=557.880, num_updates=5937, lr=0.000410409, gnorm=1.064, clip=0.000, oom=0.000, loss_scale=16.000, wall=1796, train_wall=1593 | |
| epoch 001: 5950 / 8862 loss=7.411, nll_loss=6.178, ppl=72.38, wps=50179, ups=3, wpb=15176.154, bsz=557.967, num_updates=5947, lr=0.000410063, gnorm=1.064, clip=0.000, oom=0.000, loss_scale=16.000, wall=1799, train_wall=1596 | |
| epoch 001: 5960 / 8862 loss=7.407, nll_loss=6.173, ppl=72.16, wps=50182, ups=3, wpb=15176.328, bsz=558.070, num_updates=5957, lr=0.000409719, gnorm=1.063, clip=0.000, oom=0.000, loss_scale=16.000, wall=1802, train_wall=1599 | |
| epoch 001: 5970 / 8862 loss=7.403, nll_loss=6.169, ppl=71.94, wps=50186, ups=3, wpb=15176.533, bsz=558.168, num_updates=5967, lr=0.000409376, gnorm=1.063, clip=0.000, oom=0.000, loss_scale=16.000, wall=1804, train_wall=1601 | |
| epoch 001: 5980 / 8862 loss=7.400, nll_loss=6.165, ppl=71.74, wps=50188, ups=3, wpb=15176.008, bsz=558.007, num_updates=5977, lr=0.000409033, gnorm=1.062, clip=0.000, oom=0.000, loss_scale=16.000, wall=1807, train_wall=1604 | |
| epoch 001: 5990 / 8862 loss=7.396, nll_loss=6.161, ppl=71.54, wps=50190, ups=3, wpb=15175.422, bsz=557.852, num_updates=5987, lr=0.000408691, gnorm=1.062, clip=0.000, oom=0.000, loss_scale=16.000, wall=1810, train_wall=1607 | |
| epoch 001: 6000 / 8862 loss=7.392, nll_loss=6.156, ppl=71.31, wps=50195, ups=3, wpb=15176.233, bsz=557.989, num_updates=5997, lr=0.00040835, gnorm=1.061, clip=0.000, oom=0.000, loss_scale=16.000, wall=1813, train_wall=1609 | |
| epoch 001: 6010 / 8862 loss=7.388, nll_loss=6.151, ppl=71.08, wps=50200, ups=3, wpb=15176.616, bsz=557.934, num_updates=6007, lr=0.00040801, gnorm=1.061, clip=0.000, oom=0.000, loss_scale=16.000, wall=1816, train_wall=1612 | |
| epoch 001: 6020 / 8862 loss=7.385, nll_loss=6.147, ppl=70.88, wps=50202, ups=3, wpb=15176.402, bsz=557.884, num_updates=6017, lr=0.000407671, gnorm=1.060, clip=0.000, oom=0.000, loss_scale=16.000, wall=1819, train_wall=1615 | |
| epoch 001: 6030 / 8862 loss=7.381, nll_loss=6.143, ppl=70.67, wps=50205, ups=3, wpb=15176.041, bsz=557.851, num_updates=6027, lr=0.000407333, gnorm=1.059, clip=0.000, oom=0.000, loss_scale=16.000, wall=1822, train_wall=1617 | |
| epoch 001: 6040 / 8862 loss=7.377, nll_loss=6.139, ppl=70.45, wps=50208, ups=3, wpb=15176.047, bsz=557.860, num_updates=6037, lr=0.000406995, gnorm=1.059, clip=0.000, oom=0.000, loss_scale=16.000, wall=1825, train_wall=1620 | |
| epoch 001: 6050 / 8862 loss=7.373, nll_loss=6.134, ppl=70.23, wps=50212, ups=3, wpb=15176.332, bsz=558.042, num_updates=6047, lr=0.000406659, gnorm=1.058, clip=0.000, oom=0.000, loss_scale=16.000, wall=1828, train_wall=1623 | |
| epoch 001: 6060 / 8862 loss=7.369, nll_loss=6.130, ppl=70.02, wps=50215, ups=3, wpb=15176.239, bsz=557.982, num_updates=6057, lr=0.000406323, gnorm=1.058, clip=0.000, oom=0.000, loss_scale=16.000, wall=1831, train_wall=1625 | |
| epoch 001: 6070 / 8862 loss=7.365, nll_loss=6.125, ppl=69.80, wps=50219, ups=3, wpb=15176.221, bsz=557.970, num_updates=6067, lr=0.000405988, gnorm=1.057, clip=0.000, oom=0.000, loss_scale=16.000, wall=1833, train_wall=1628 | |
| epoch 001: 6080 / 8862 loss=7.361, nll_loss=6.121, ppl=69.59, wps=50222, ups=3, wpb=15176.188, bsz=558.032, num_updates=6077, lr=0.000405654, gnorm=1.056, clip=0.000, oom=0.000, loss_scale=16.000, wall=1836, train_wall=1631 | |
| epoch 001: 6090 / 8862 loss=7.358, nll_loss=6.117, ppl=69.39, wps=50226, ups=3, wpb=15176.173, bsz=557.960, num_updates=6087, lr=0.00040532, gnorm=1.056, clip=0.000, oom=0.000, loss_scale=16.000, wall=1839, train_wall=1633 | |
| epoch 001: 6100 / 8862 loss=7.354, nll_loss=6.113, ppl=69.20, wps=50228, ups=3, wpb=15175.476, bsz=557.660, num_updates=6097, lr=0.000404988, gnorm=1.055, clip=0.000, oom=0.000, loss_scale=16.000, wall=1842, train_wall=1636 | |
| epoch 001: 6110 / 8862 loss=7.351, nll_loss=6.108, ppl=69.00, wps=50232, ups=3, wpb=15175.470, bsz=557.642, num_updates=6107, lr=0.000404656, gnorm=1.055, clip=0.000, oom=0.000, loss_scale=16.000, wall=1845, train_wall=1639 | |
| epoch 001: 6120 / 8862 loss=7.347, nll_loss=6.104, ppl=68.80, wps=50234, ups=3, wpb=15175.285, bsz=557.689, num_updates=6117, lr=0.000404325, gnorm=1.054, clip=0.000, oom=0.000, loss_scale=16.000, wall=1848, train_wall=1641 | |
| epoch 001: 6130 / 8862 loss=7.343, nll_loss=6.100, ppl=68.59, wps=50238, ups=3, wpb=15175.411, bsz=557.614, num_updates=6127, lr=0.000403995, gnorm=1.053, clip=0.000, oom=0.000, loss_scale=16.000, wall=1851, train_wall=1644 | |
| epoch 001: 6140 / 8862 loss=7.339, nll_loss=6.096, ppl=68.38, wps=50242, ups=3, wpb=15175.565, bsz=557.664, num_updates=6137, lr=0.000403666, gnorm=1.053, clip=0.000, oom=0.000, loss_scale=16.000, wall=1854, train_wall=1647 | |
| epoch 001: 6150 / 8862 loss=7.336, nll_loss=6.092, ppl=68.20, wps=50244, ups=3, wpb=15174.864, bsz=557.653, num_updates=6147, lr=0.000403337, gnorm=1.052, clip=0.000, oom=0.000, loss_scale=16.000, wall=1857, train_wall=1649 | |
| epoch 001: 6160 / 8862 loss=7.332, nll_loss=6.087, ppl=67.99, wps=50249, ups=3, wpb=15175.344, bsz=557.630, num_updates=6157, lr=0.00040301, gnorm=1.052, clip=0.000, oom=0.000, loss_scale=16.000, wall=1859, train_wall=1652 | |
| epoch 001: 6170 / 8862 loss=7.328, nll_loss=6.083, ppl=67.77, wps=50251, ups=3, wpb=15175.609, bsz=558.104, num_updates=6167, lr=0.000402683, gnorm=1.051, clip=0.000, oom=0.000, loss_scale=16.000, wall=1862, train_wall=1655 | |
| epoch 001: 6180 / 8862 loss=7.324, nll_loss=6.079, ppl=67.58, wps=50254, ups=3, wpb=15175.502, bsz=557.987, num_updates=6177, lr=0.000402357, gnorm=1.051, clip=0.000, oom=0.000, loss_scale=16.000, wall=1865, train_wall=1658 | |
| epoch 001: 6190 / 8862 loss=7.321, nll_loss=6.074, ppl=67.39, wps=50257, ups=3, wpb=15175.171, bsz=557.900, num_updates=6187, lr=0.000402031, gnorm=1.050, clip=0.000, oom=0.000, loss_scale=16.000, wall=1868, train_wall=1660 | |
| epoch 001: 6200 / 8862 loss=7.317, nll_loss=6.070, ppl=67.20, wps=50260, ups=3, wpb=15174.937, bsz=557.853, num_updates=6197, lr=0.000401707, gnorm=1.049, clip=0.000, oom=0.000, loss_scale=16.000, wall=1871, train_wall=1663 | |
| epoch 001: 6210 / 8862 loss=7.314, nll_loss=6.066, ppl=67.00, wps=50262, ups=3, wpb=15174.876, bsz=557.966, num_updates=6207, lr=0.000401383, gnorm=1.049, clip=0.000, oom=0.000, loss_scale=16.000, wall=1874, train_wall=1666 | |
| epoch 001: 6220 / 8862 loss=7.310, nll_loss=6.062, ppl=66.81, wps=50267, ups=3, wpb=15175.121, bsz=558.040, num_updates=6217, lr=0.00040106, gnorm=1.048, clip=0.000, oom=0.000, loss_scale=16.000, wall=1877, train_wall=1668 | |
| epoch 001: 6230 / 8862 loss=7.306, nll_loss=6.058, ppl=66.62, wps=50270, ups=3, wpb=15175.253, bsz=558.061, num_updates=6227, lr=0.000400738, gnorm=1.048, clip=0.000, oom=0.000, loss_scale=16.000, wall=1880, train_wall=1671 | |
| epoch 001: 6240 / 8862 loss=7.303, nll_loss=6.054, ppl=66.43, wps=50273, ups=3, wpb=15175.165, bsz=557.981, num_updates=6237, lr=0.000400417, gnorm=1.047, clip=0.000, oom=0.000, loss_scale=16.000, wall=1883, train_wall=1674 | |
| epoch 001: 6250 / 8862 loss=7.299, nll_loss=6.050, ppl=66.24, wps=50277, ups=3, wpb=15175.356, bsz=557.898, num_updates=6247, lr=0.000400096, gnorm=1.047, clip=0.000, oom=0.000, loss_scale=16.000, wall=1886, train_wall=1676 | |
| epoch 001: 6260 / 8862 loss=7.295, nll_loss=6.046, ppl=66.05, wps=50280, ups=3, wpb=15175.259, bsz=558.098, num_updates=6257, lr=0.000399776, gnorm=1.047, clip=0.000, oom=0.000, loss_scale=16.000, wall=1888, train_wall=1679 | |
| epoch 001: 6270 / 8862 loss=7.292, nll_loss=6.041, ppl=65.86, wps=50281, ups=3, wpb=15175.019, bsz=558.108, num_updates=6267, lr=0.000399457, gnorm=1.046, clip=0.000, oom=0.000, loss_scale=16.000, wall=1891, train_wall=1682 | |
| epoch 001: 6280 / 8862 loss=7.288, nll_loss=6.037, ppl=65.68, wps=50286, ups=3, wpb=15175.495, bsz=557.999, num_updates=6277, lr=0.000399139, gnorm=1.045, clip=0.000, oom=0.000, loss_scale=16.000, wall=1894, train_wall=1684 | |
| epoch 001: 6290 / 8862 loss=7.285, nll_loss=6.034, ppl=65.51, wps=50288, ups=3, wpb=15175.029, bsz=558.024, num_updates=6287, lr=0.000398821, gnorm=1.045, clip=0.000, oom=0.000, loss_scale=16.000, wall=1897, train_wall=1687 | |
| epoch 001: 6300 / 8862 loss=7.281, nll_loss=6.029, ppl=65.32, wps=50291, ups=3, wpb=15175.373, bsz=558.052, num_updates=6297, lr=0.000398504, gnorm=1.044, clip=0.000, oom=0.000, loss_scale=16.000, wall=1900, train_wall=1690 | |
| epoch 001: 6310 / 8862 loss=7.278, nll_loss=6.025, ppl=65.13, wps=50293, ups=3, wpb=15175.072, bsz=558.166, num_updates=6307, lr=0.000398188, gnorm=1.044, clip=0.000, oom=0.000, loss_scale=16.000, wall=1903, train_wall=1692 | |
| epoch 001: 6320 / 8862 loss=7.274, nll_loss=6.021, ppl=64.95, wps=50295, ups=3, wpb=15174.886, bsz=558.167, num_updates=6317, lr=0.000397873, gnorm=1.043, clip=0.000, oom=0.000, loss_scale=16.000, wall=1906, train_wall=1695 | |
| epoch 001: 6330 / 8862 loss=7.270, nll_loss=6.017, ppl=64.75, wps=50298, ups=3, wpb=15175.324, bsz=558.155, num_updates=6327, lr=0.000397559, gnorm=1.043, clip=0.000, oom=0.000, loss_scale=16.000, wall=1909, train_wall=1698 | |
| epoch 001: 6340 / 8862 loss=7.267, nll_loss=6.013, ppl=64.58, wps=50300, ups=3, wpb=15175.181, bsz=558.072, num_updates=6337, lr=0.000397245, gnorm=1.042, clip=0.000, oom=0.000, loss_scale=16.000, wall=1912, train_wall=1701 | |
| epoch 001: 6350 / 8862 loss=7.263, nll_loss=6.009, ppl=64.39, wps=50301, ups=3, wpb=15175.008, bsz=558.333, num_updates=6347, lr=0.000396932, gnorm=1.041, clip=0.000, oom=0.000, loss_scale=16.000, wall=1915, train_wall=1703 | |
| epoch 001: 6360 / 8862 loss=7.260, nll_loss=6.005, ppl=64.21, wps=50304, ups=3, wpb=15174.776, bsz=558.318, num_updates=6357, lr=0.000396619, gnorm=1.041, clip=0.000, oom=0.000, loss_scale=16.000, wall=1918, train_wall=1706 | |
| epoch 001: 6370 / 8862 loss=7.256, nll_loss=6.001, ppl=64.03, wps=50308, ups=3, wpb=15174.952, bsz=558.264, num_updates=6367, lr=0.000396308, gnorm=1.040, clip=0.000, oom=0.000, loss_scale=16.000, wall=1921, train_wall=1709 | |
| epoch 001: 6380 / 8862 loss=7.253, nll_loss=5.997, ppl=63.86, wps=50309, ups=3, wpb=15174.466, bsz=558.268, num_updates=6377, lr=0.000395997, gnorm=1.040, clip=0.000, oom=0.000, loss_scale=16.000, wall=1923, train_wall=1711 | |
| epoch 001: 6390 / 8862 loss=7.249, nll_loss=5.993, ppl=63.68, wps=50313, ups=3, wpb=15174.710, bsz=558.221, num_updates=6387, lr=0.000395687, gnorm=1.039, clip=0.000, oom=0.000, loss_scale=16.000, wall=1926, train_wall=1714 | |
| epoch 001: 6400 / 8862 loss=7.246, nll_loss=5.989, ppl=63.51, wps=50315, ups=3, wpb=15174.537, bsz=558.203, num_updates=6397, lr=0.000395377, gnorm=1.038, clip=0.000, oom=0.000, loss_scale=16.000, wall=1929, train_wall=1717 | |
| epoch 001: 6410 / 8862 loss=7.242, nll_loss=5.985, ppl=63.33, wps=50318, ups=3, wpb=15174.595, bsz=558.264, num_updates=6407, lr=0.000395069, gnorm=1.038, clip=0.000, oom=0.000, loss_scale=16.000, wall=1932, train_wall=1719 | |
| epoch 001: 6420 / 8862 loss=7.239, nll_loss=5.981, ppl=63.17, wps=50320, ups=3, wpb=15174.507, bsz=558.276, num_updates=6417, lr=0.000394761, gnorm=1.037, clip=0.000, oom=0.000, loss_scale=16.000, wall=1935, train_wall=1722 | |
| epoch 001: 6430 / 8862 loss=7.236, nll_loss=5.977, ppl=63.00, wps=50321, ups=3, wpb=15174.163, bsz=558.201, num_updates=6427, lr=0.000394454, gnorm=1.037, clip=0.000, oom=0.000, loss_scale=16.000, wall=1938, train_wall=1725 | |
| epoch 001: 6440 / 8862 loss=7.232, nll_loss=5.973, ppl=62.83, wps=50323, ups=3, wpb=15174.212, bsz=558.199, num_updates=6437, lr=0.000394147, gnorm=1.036, clip=0.000, oom=0.000, loss_scale=16.000, wall=1941, train_wall=1728 | |
| epoch 001: 6450 / 8862 loss=7.229, nll_loss=5.970, ppl=62.66, wps=50326, ups=3, wpb=15174.243, bsz=558.064, num_updates=6447, lr=0.000393841, gnorm=1.036, clip=0.000, oom=0.000, loss_scale=16.000, wall=1944, train_wall=1730 | |
| epoch 001: 6460 / 8862 loss=7.226, nll_loss=5.966, ppl=62.50, wps=50327, ups=3, wpb=15174.097, bsz=558.076, num_updates=6457, lr=0.000393536, gnorm=1.035, clip=0.000, oom=0.000, loss_scale=16.000, wall=1947, train_wall=1733 | |
| epoch 001: 6470 / 8862 loss=7.222, nll_loss=5.962, ppl=62.32, wps=50327, ups=3, wpb=15174.164, bsz=558.107, num_updates=6467, lr=0.000393232, gnorm=1.035, clip=0.000, oom=0.000, loss_scale=16.000, wall=1950, train_wall=1736 | |
| epoch 001: 6480 / 8862 loss=7.218, nll_loss=5.957, ppl=62.14, wps=50328, ups=3, wpb=15174.382, bsz=558.151, num_updates=6477, lr=0.000392928, gnorm=1.034, clip=0.000, oom=0.000, loss_scale=16.000, wall=1953, train_wall=1739 | |
| epoch 001: 6490 / 8862 loss=7.215, nll_loss=5.953, ppl=61.96, wps=50331, ups=3, wpb=15174.520, bsz=558.112, num_updates=6487, lr=0.000392625, gnorm=1.034, clip=0.000, oom=0.000, loss_scale=16.000, wall=1956, train_wall=1741 | |
| epoch 001: 6500 / 8862 loss=7.211, nll_loss=5.950, ppl=61.80, wps=50333, ups=3, wpb=15174.416, bsz=558.132, num_updates=6497, lr=0.000392323, gnorm=1.033, clip=0.000, oom=0.000, loss_scale=16.000, wall=1959, train_wall=1744 | |
| epoch 001: 6510 / 8862 loss=7.208, nll_loss=5.946, ppl=61.64, wps=50334, ups=3, wpb=15174.306, bsz=558.097, num_updates=6507, lr=0.000392021, gnorm=1.033, clip=0.000, oom=0.000, loss_scale=16.000, wall=1962, train_wall=1747 | |
| epoch 001: 6520 / 8862 loss=7.204, nll_loss=5.942, ppl=61.47, wps=50337, ups=3, wpb=15174.583, bsz=558.157, num_updates=6517, lr=0.00039172, gnorm=1.032, clip=0.000, oom=0.000, loss_scale=16.000, wall=1965, train_wall=1749 | |
| epoch 001: 6530 / 8862 loss=7.201, nll_loss=5.938, ppl=61.31, wps=50338, ups=3, wpb=15174.075, bsz=558.235, num_updates=6527, lr=0.00039142, gnorm=1.031, clip=0.000, oom=0.000, loss_scale=16.000, wall=1968, train_wall=1752 | |
| epoch 001: 6540 / 8862 loss=7.198, nll_loss=5.934, ppl=61.14, wps=50340, ups=3, wpb=15174.274, bsz=558.140, num_updates=6537, lr=0.000391121, gnorm=1.031, clip=0.000, oom=0.000, loss_scale=16.000, wall=1970, train_wall=1755 | |
| epoch 001: 6550 / 8862 loss=7.194, nll_loss=5.930, ppl=60.97, wps=50343, ups=3, wpb=15174.802, bsz=558.279, num_updates=6547, lr=0.000390822, gnorm=1.030, clip=0.000, oom=0.000, loss_scale=16.000, wall=1973, train_wall=1758 | |
| epoch 001: 6560 / 8862 loss=7.191, nll_loss=5.926, ppl=60.80, wps=50344, ups=3, wpb=15174.638, bsz=558.316, num_updates=6557, lr=0.000390524, gnorm=1.030, clip=0.000, oom=0.000, loss_scale=16.000, wall=1976, train_wall=1760 | |
| epoch 001: 6570 / 8862 loss=7.187, nll_loss=5.922, ppl=60.63, wps=50347, ups=3, wpb=15174.872, bsz=558.343, num_updates=6567, lr=0.000390226, gnorm=1.029, clip=0.000, oom=0.000, loss_scale=16.000, wall=1979, train_wall=1763 | |
| epoch 001: 6580 / 8862 loss=7.184, nll_loss=5.918, ppl=60.47, wps=50349, ups=3, wpb=15175.071, bsz=558.275, num_updates=6577, lr=0.000389929, gnorm=1.029, clip=0.000, oom=0.000, loss_scale=16.000, wall=1982, train_wall=1766 | |
| epoch 001: 6590 / 8862 loss=7.181, nll_loss=5.914, ppl=60.31, wps=50351, ups=3, wpb=15175.230, bsz=558.188, num_updates=6587, lr=0.000389633, gnorm=1.028, clip=0.000, oom=0.000, loss_scale=16.000, wall=1985, train_wall=1769 | |
| epoch 001: 6600 / 8862 loss=7.177, nll_loss=5.911, ppl=60.16, wps=50353, ups=3, wpb=15175.191, bsz=558.189, num_updates=6597, lr=0.000389338, gnorm=1.028, clip=0.000, oom=0.000, loss_scale=16.000, wall=1988, train_wall=1771 | |
| epoch 001: 6610 / 8862 loss=7.174, nll_loss=5.907, ppl=60.00, wps=50353, ups=3, wpb=15174.784, bsz=558.279, num_updates=6607, lr=0.000389043, gnorm=1.027, clip=0.000, oom=0.000, loss_scale=16.000, wall=1991, train_wall=1774 | |
| epoch 001: 6620 / 8862 loss=7.170, nll_loss=5.903, ppl=59.84, wps=50354, ups=3, wpb=15174.330, bsz=558.348, num_updates=6617, lr=0.000388749, gnorm=1.027, clip=0.000, oom=0.000, loss_scale=16.000, wall=1994, train_wall=1777 | |
| epoch 001: 6630 / 8862 loss=7.167, nll_loss=5.899, ppl=59.69, wps=50356, ups=3, wpb=15174.448, bsz=558.299, num_updates=6627, lr=0.000388456, gnorm=1.026, clip=0.000, oom=0.000, loss_scale=16.000, wall=1997, train_wall=1779 | |
| epoch 001: 6640 / 8862 loss=7.164, nll_loss=5.895, ppl=59.52, wps=50359, ups=3, wpb=15174.766, bsz=558.289, num_updates=6637, lr=0.000388163, gnorm=1.025, clip=0.000, oom=0.000, loss_scale=16.000, wall=2000, train_wall=1782 | |
| epoch 001: 6650 / 8862 loss=7.160, nll_loss=5.892, ppl=59.37, wps=50362, ups=3, wpb=15174.955, bsz=558.295, num_updates=6647, lr=0.000387871, gnorm=1.025, clip=0.000, oom=0.000, loss_scale=16.000, wall=2003, train_wall=1785 | |
| epoch 001: 6660 / 8862 loss=7.157, nll_loss=5.888, ppl=59.21, wps=50364, ups=3, wpb=15175.121, bsz=558.208, num_updates=6657, lr=0.000387579, gnorm=1.024, clip=0.000, oom=0.000, loss_scale=16.000, wall=2006, train_wall=1788 | |
| epoch 001: 6670 / 8862 loss=7.154, nll_loss=5.884, ppl=59.05, wps=50366, ups=3, wpb=15175.032, bsz=558.200, num_updates=6667, lr=0.000387289, gnorm=1.024, clip=0.000, oom=0.000, loss_scale=16.000, wall=2009, train_wall=1790 | |
| epoch 001: 6680 / 8862 loss=7.150, nll_loss=5.880, ppl=58.90, wps=50366, ups=3, wpb=15175.004, bsz=558.307, num_updates=6677, lr=0.000386999, gnorm=1.023, clip=0.000, oom=0.000, loss_scale=16.000, wall=2012, train_wall=1793 | |
| epoch 001: 6690 / 8862 loss=7.147, nll_loss=5.876, ppl=58.75, wps=50368, ups=3, wpb=15174.823, bsz=558.342, num_updates=6687, lr=0.000386709, gnorm=1.023, clip=0.000, oom=0.000, loss_scale=16.000, wall=2015, train_wall=1796 | |
| epoch 001: 6700 / 8862 loss=7.144, nll_loss=5.873, ppl=58.59, wps=50372, ups=3, wpb=15175.022, bsz=558.307, num_updates=6697, lr=0.00038642, gnorm=1.023, clip=0.000, oom=0.000, loss_scale=16.000, wall=2018, train_wall=1798 | |
| epoch 001: 6710 / 8862 loss=7.141, nll_loss=5.869, ppl=58.45, wps=50374, ups=3, wpb=15174.891, bsz=558.205, num_updates=6707, lr=0.000386132, gnorm=1.022, clip=0.000, oom=0.000, loss_scale=16.000, wall=2020, train_wall=1801 | |
| epoch 001: 6720 / 8862 loss=7.138, nll_loss=5.865, ppl=58.30, wps=50377, ups=3, wpb=15174.914, bsz=558.256, num_updates=6717, lr=0.000385845, gnorm=1.022, clip=0.000, oom=0.000, loss_scale=16.000, wall=2023, train_wall=1804 | |
| epoch 001: 6730 / 8862 loss=7.134, nll_loss=5.862, ppl=58.15, wps=50380, ups=3, wpb=15175.085, bsz=558.279, num_updates=6727, lr=0.000385558, gnorm=1.021, clip=0.000, oom=0.000, loss_scale=16.000, wall=2026, train_wall=1807 | |
| epoch 001: 6740 / 8862 loss=7.131, nll_loss=5.858, ppl=58.00, wps=50383, ups=3, wpb=15174.691, bsz=558.180, num_updates=6737, lr=0.000385271, gnorm=1.021, clip=0.000, oom=0.000, loss_scale=16.000, wall=2029, train_wall=1809 | |
| epoch 001: 6750 / 8862 loss=7.128, nll_loss=5.854, ppl=57.85, wps=50385, ups=3, wpb=15174.584, bsz=558.090, num_updates=6747, lr=0.000384986, gnorm=1.020, clip=0.000, oom=0.000, loss_scale=16.000, wall=2032, train_wall=1812 | |
| epoch 001: 6760 / 8862 loss=7.124, nll_loss=5.851, ppl=57.70, wps=50386, ups=3, wpb=15174.404, bsz=558.117, num_updates=6757, lr=0.000384701, gnorm=1.019, clip=0.000, oom=0.000, loss_scale=16.000, wall=2035, train_wall=1815 | |
| epoch 001: 6770 / 8862 loss=7.121, nll_loss=5.847, ppl=57.55, wps=50389, ups=3, wpb=15174.649, bsz=558.114, num_updates=6767, lr=0.000384416, gnorm=1.019, clip=0.000, oom=0.000, loss_scale=16.000, wall=2038, train_wall=1817 | |
| epoch 001: 6780 / 8862 loss=7.118, nll_loss=5.843, ppl=57.41, wps=50393, ups=3, wpb=15174.813, bsz=558.091, num_updates=6777, lr=0.000384133, gnorm=1.018, clip=0.000, oom=0.000, loss_scale=16.000, wall=2041, train_wall=1820 | |
| epoch 001: 6790 / 8862 loss=7.115, nll_loss=5.840, ppl=57.28, wps=50394, ups=3, wpb=15174.396, bsz=558.023, num_updates=6787, lr=0.00038385, gnorm=1.018, clip=0.000, oom=0.000, loss_scale=16.000, wall=2044, train_wall=1823 | |
| epoch 001: 6800 / 8862 loss=7.112, nll_loss=5.836, ppl=57.13, wps=50397, ups=3, wpb=15174.327, bsz=558.028, num_updates=6797, lr=0.000383567, gnorm=1.017, clip=0.000, oom=0.000, loss_scale=16.000, wall=2047, train_wall=1825 | |
| epoch 001: 6810 / 8862 loss=7.109, nll_loss=5.833, ppl=56.99, wps=50400, ups=3, wpb=15174.287, bsz=558.032, num_updates=6807, lr=0.000383285, gnorm=1.017, clip=0.000, oom=0.000, loss_scale=16.000, wall=2049, train_wall=1828 | |
| epoch 001: 6820 / 8862 loss=7.105, nll_loss=5.829, ppl=56.83, wps=50403, ups=3, wpb=15174.620, bsz=558.128, num_updates=6817, lr=0.000383004, gnorm=1.016, clip=0.000, oom=0.000, loss_scale=16.000, wall=2052, train_wall=1831 | |
| epoch 001: 6830 / 8862 loss=7.102, nll_loss=5.825, ppl=56.68, wps=50406, ups=3, wpb=15174.672, bsz=558.188, num_updates=6827, lr=0.000382723, gnorm=1.016, clip=0.000, oom=0.000, loss_scale=16.000, wall=2055, train_wall=1833 | |
| epoch 001: 6840 / 8862 loss=7.099, nll_loss=5.821, ppl=56.53, wps=50411, ups=3, wpb=15175.184, bsz=558.122, num_updates=6837, lr=0.000382443, gnorm=1.015, clip=0.000, oom=0.000, loss_scale=16.000, wall=2058, train_wall=1836 | |
| epoch 001: 6850 / 8862 loss=7.095, nll_loss=5.817, ppl=56.39, wps=50414, ups=3, wpb=15175.323, bsz=558.114, num_updates=6847, lr=0.000382164, gnorm=1.015, clip=0.000, oom=0.000, loss_scale=16.000, wall=2061, train_wall=1839 | |
| epoch 001: 6860 / 8862 loss=7.092, nll_loss=5.814, ppl=56.26, wps=50416, ups=3, wpb=15175.087, bsz=558.057, num_updates=6857, lr=0.000381885, gnorm=1.014, clip=0.000, oom=0.000, loss_scale=16.000, wall=2064, train_wall=1841 | |
| epoch 001: 6870 / 8862 loss=7.089, nll_loss=5.810, ppl=56.12, wps=50419, ups=3, wpb=15174.937, bsz=558.019, num_updates=6867, lr=0.000381607, gnorm=1.014, clip=0.000, oom=0.000, loss_scale=16.000, wall=2067, train_wall=1844 | |
| epoch 001: 6880 / 8862 loss=7.086, nll_loss=5.807, ppl=55.98, wps=50421, ups=3, wpb=15174.770, bsz=557.984, num_updates=6877, lr=0.00038133, gnorm=1.013, clip=0.000, oom=0.000, loss_scale=16.000, wall=2070, train_wall=1847 | |
| epoch 001: 6890 / 8862 loss=7.083, nll_loss=5.803, ppl=55.84, wps=50423, ups=3, wpb=15174.532, bsz=557.927, num_updates=6887, lr=0.000381053, gnorm=1.013, clip=0.000, oom=0.000, loss_scale=16.000, wall=2073, train_wall=1849 | |
| epoch 001: 6900 / 8862 loss=7.080, nll_loss=5.800, ppl=55.71, wps=50427, ups=3, wpb=15174.671, bsz=557.816, num_updates=6897, lr=0.000380776, gnorm=1.012, clip=0.000, oom=0.000, loss_scale=16.000, wall=2075, train_wall=1852 | |
| epoch 001: 6910 / 8862 loss=7.077, nll_loss=5.796, ppl=55.57, wps=50430, ups=3, wpb=15174.835, bsz=557.887, num_updates=6907, lr=0.000380501, gnorm=1.012, clip=0.000, oom=0.000, loss_scale=16.000, wall=2078, train_wall=1855 | |
| epoch 001: 6920 / 8862 loss=7.074, nll_loss=5.793, ppl=55.43, wps=50433, ups=3, wpb=15175.097, bsz=557.907, num_updates=6917, lr=0.000380225, gnorm=1.011, clip=0.000, oom=0.000, loss_scale=16.000, wall=2081, train_wall=1857 | |
| epoch 001: 6930 / 8862 loss=7.071, nll_loss=5.789, ppl=55.30, wps=50436, ups=3, wpb=15175.207, bsz=557.886, num_updates=6927, lr=0.000379951, gnorm=1.011, clip=0.000, oom=0.000, loss_scale=16.000, wall=2084, train_wall=1860 | |
| epoch 001: 6940 / 8862 loss=7.067, nll_loss=5.786, ppl=55.16, wps=50439, ups=3, wpb=15175.184, bsz=557.871, num_updates=6937, lr=0.000379677, gnorm=1.010, clip=0.000, oom=0.000, loss_scale=16.000, wall=2087, train_wall=1863 | |
| epoch 001: 6950 / 8862 loss=7.064, nll_loss=5.782, ppl=55.03, wps=50443, ups=3, wpb=15175.461, bsz=557.783, num_updates=6947, lr=0.000379404, gnorm=1.010, clip=0.000, oom=0.000, loss_scale=16.000, wall=2090, train_wall=1865 | |
| epoch 001: 6960 / 8862 loss=7.061, nll_loss=5.778, ppl=54.89, wps=50446, ups=3, wpb=15175.606, bsz=557.782, num_updates=6957, lr=0.000379131, gnorm=1.009, clip=0.000, oom=0.000, loss_scale=16.000, wall=2093, train_wall=1868 | |
| epoch 001: 6970 / 8862 loss=7.058, nll_loss=5.775, ppl=54.76, wps=50448, ups=3, wpb=15175.514, bsz=557.835, num_updates=6967, lr=0.000378859, gnorm=1.009, clip=0.000, oom=0.000, loss_scale=16.000, wall=2096, train_wall=1871 | |
| epoch 001: 6980 / 8862 loss=7.055, nll_loss=5.772, ppl=54.63, wps=50450, ups=3, wpb=15175.384, bsz=557.798, num_updates=6977, lr=0.000378587, gnorm=1.008, clip=0.000, oom=0.000, loss_scale=16.000, wall=2099, train_wall=1873 | |
| epoch 001: 6990 / 8862 loss=7.052, nll_loss=5.768, ppl=54.49, wps=50453, ups=3, wpb=15175.523, bsz=557.955, num_updates=6987, lr=0.000378316, gnorm=1.008, clip=0.000, oom=0.000, loss_scale=16.000, wall=2102, train_wall=1876 | |
| epoch 001: 7000 / 8862 loss=7.049, nll_loss=5.764, ppl=54.35, wps=50456, ups=3, wpb=15175.504, bsz=558.105, num_updates=6997, lr=0.000378045, gnorm=1.007, clip=0.000, oom=0.000, loss_scale=16.000, wall=2104, train_wall=1879 | |
| epoch 001: 7010 / 8862 loss=7.046, nll_loss=5.761, ppl=54.22, wps=50458, ups=3, wpb=15175.335, bsz=558.058, num_updates=7007, lr=0.000377776, gnorm=1.007, clip=0.000, oom=0.000, loss_scale=16.000, wall=2107, train_wall=1882 | |
| epoch 001: 7020 / 8862 loss=7.042, nll_loss=5.757, ppl=54.09, wps=50461, ups=3, wpb=15175.298, bsz=558.070, num_updates=7017, lr=0.000377506, gnorm=1.006, clip=0.000, oom=0.000, loss_scale=16.000, wall=2110, train_wall=1884 | |
| epoch 001: 7030 / 8862 loss=7.040, nll_loss=5.754, ppl=53.96, wps=50463, ups=3, wpb=15175.372, bsz=558.178, num_updates=7027, lr=0.000377238, gnorm=1.006, clip=0.000, oom=0.000, loss_scale=16.000, wall=2113, train_wall=1887 | |
| epoch 001: 7040 / 8862 loss=7.037, nll_loss=5.751, ppl=53.84, wps=50466, ups=3, wpb=15175.327, bsz=558.085, num_updates=7037, lr=0.00037697, gnorm=1.005, clip=0.000, oom=0.000, loss_scale=16.000, wall=2116, train_wall=1890 | |
| epoch 001: 7050 / 8862 loss=7.034, nll_loss=5.747, ppl=53.71, wps=50470, ups=3, wpb=15175.694, bsz=558.086, num_updates=7047, lr=0.000376702, gnorm=1.005, clip=0.000, oom=0.000, loss_scale=16.000, wall=2119, train_wall=1892 | |
| epoch 001: 7060 / 8862 loss=7.031, nll_loss=5.744, ppl=53.59, wps=50473, ups=3, wpb=15175.561, bsz=558.007, num_updates=7057, lr=0.000376435, gnorm=1.004, clip=0.000, oom=0.000, loss_scale=16.000, wall=2122, train_wall=1895 | |
| epoch 001: 7070 / 8862 loss=7.028, nll_loss=5.740, ppl=53.46, wps=50476, ups=3, wpb=15175.713, bsz=558.041, num_updates=7067, lr=0.000376169, gnorm=1.004, clip=0.000, oom=0.000, loss_scale=16.000, wall=2125, train_wall=1898 | |
| epoch 001: 7080 / 8862 loss=7.025, nll_loss=5.737, ppl=53.33, wps=50479, ups=3, wpb=15176.045, bsz=558.227, num_updates=7077, lr=0.000375903, gnorm=1.003, clip=0.000, oom=0.000, loss_scale=16.000, wall=2128, train_wall=1900 | |
| epoch 001: 7090 / 8862 loss=7.021, nll_loss=5.733, ppl=53.20, wps=50483, ups=3, wpb=15176.410, bsz=558.163, num_updates=7087, lr=0.000375637, gnorm=1.003, clip=0.000, oom=0.000, loss_scale=16.000, wall=2131, train_wall=1903 | |
| epoch 001: 7100 / 8862 loss=7.019, nll_loss=5.730, ppl=53.07, wps=50485, ups=3, wpb=15176.349, bsz=558.139, num_updates=7097, lr=0.000375373, gnorm=1.002, clip=0.000, oom=0.000, loss_scale=16.000, wall=2133, train_wall=1906 | |
| epoch 001: 7110 / 8862 loss=7.016, nll_loss=5.727, ppl=52.96, wps=50488, ups=3, wpb=15176.081, bsz=558.031, num_updates=7107, lr=0.000375108, gnorm=1.002, clip=0.000, oom=0.000, loss_scale=16.000, wall=2136, train_wall=1908 | |
| epoch 001: 7120 / 8862 loss=7.013, nll_loss=5.723, ppl=52.84, wps=50490, ups=3, wpb=15176.066, bsz=558.036, num_updates=7117, lr=0.000374845, gnorm=1.001, clip=0.000, oom=0.000, loss_scale=16.000, wall=2139, train_wall=1911 | |
| epoch 001: 7130 / 8862 loss=7.010, nll_loss=5.720, ppl=52.72, wps=50493, ups=3, wpb=15175.896, bsz=557.985, num_updates=7127, lr=0.000374582, gnorm=1.001, clip=0.000, oom=0.000, loss_scale=16.000, wall=2142, train_wall=1914 | |
| epoch 001: 7140 / 8862 loss=7.007, nll_loss=5.717, ppl=52.59, wps=50496, ups=3, wpb=15176.137, bsz=557.960, num_updates=7137, lr=0.000374319, gnorm=1.000, clip=0.000, oom=0.000, loss_scale=16.000, wall=2145, train_wall=1916 | |
| epoch 001: 7150 / 8862 loss=7.004, nll_loss=5.714, ppl=52.47, wps=50499, ups=3, wpb=15176.032, bsz=557.917, num_updates=7147, lr=0.000374057, gnorm=1.000, clip=0.000, oom=0.000, loss_scale=16.000, wall=2148, train_wall=1919 | |
| epoch 001: 7160 / 8862 loss=7.001, nll_loss=5.710, ppl=52.34, wps=50500, ups=3, wpb=15176.131, bsz=558.130, num_updates=7157, lr=0.000373796, gnorm=0.999, clip=0.000, oom=0.000, loss_scale=16.000, wall=2151, train_wall=1922 | |
| epoch 001: 7170 / 8862 loss=6.998, nll_loss=5.707, ppl=52.23, wps=50503, ups=3, wpb=15176.074, bsz=558.140, num_updates=7167, lr=0.000373535, gnorm=0.999, clip=0.000, oom=0.000, loss_scale=16.000, wall=2154, train_wall=1924 | |
| epoch 001: 7180 / 8862 loss=6.995, nll_loss=5.703, ppl=52.10, wps=50505, ups=3, wpb=15176.163, bsz=558.181, num_updates=7177, lr=0.000373275, gnorm=0.999, clip=0.000, oom=0.000, loss_scale=16.000, wall=2157, train_wall=1927 | |
| epoch 001: 7190 / 8862 loss=6.992, nll_loss=5.700, ppl=51.98, wps=50507, ups=3, wpb=15175.740, bsz=558.166, num_updates=7187, lr=0.000373015, gnorm=0.998, clip=0.000, oom=0.000, loss_scale=16.000, wall=2159, train_wall=1930 | |
| epoch 001: 7200 / 8862 loss=6.989, nll_loss=5.696, ppl=51.86, wps=50510, ups=3, wpb=15175.998, bsz=558.200, num_updates=7197, lr=0.000372756, gnorm=0.998, clip=0.000, oom=0.000, loss_scale=16.000, wall=2162, train_wall=1932 | |
| epoch 001: 7210 / 8862 loss=6.986, nll_loss=5.693, ppl=51.75, wps=50511, ups=3, wpb=15175.458, bsz=558.119, num_updates=7207, lr=0.000372497, gnorm=0.997, clip=0.000, oom=0.000, loss_scale=16.000, wall=2165, train_wall=1935 | |
| epoch 001: 7220 / 8862 loss=6.984, nll_loss=5.690, ppl=51.64, wps=50511, ups=3, wpb=15174.807, bsz=558.314, num_updates=7217, lr=0.000372239, gnorm=0.997, clip=0.000, oom=0.000, loss_scale=16.000, wall=2168, train_wall=1938 | |
| epoch 001: 7230 / 8862 loss=6.981, nll_loss=5.687, ppl=51.52, wps=50514, ups=3, wpb=15175.026, bsz=558.291, num_updates=7227, lr=0.000371981, gnorm=0.996, clip=0.000, oom=0.000, loss_scale=16.000, wall=2171, train_wall=1940 | |
| epoch 001: 7240 / 8862 loss=6.978, nll_loss=5.684, ppl=51.41, wps=50517, ups=3, wpb=15174.951, bsz=558.234, num_updates=7237, lr=0.000371724, gnorm=0.996, clip=0.000, oom=0.000, loss_scale=16.000, wall=2174, train_wall=1943 | |
| epoch 001: 7250 / 8862 loss=6.975, nll_loss=5.681, ppl=51.29, wps=50520, ups=3, wpb=15175.147, bsz=558.196, num_updates=7247, lr=0.000371468, gnorm=0.995, clip=0.000, oom=0.000, loss_scale=16.000, wall=2177, train_wall=1946 | |
| epoch 001: 7260 / 8862 loss=6.972, nll_loss=5.677, ppl=51.17, wps=50523, ups=3, wpb=15175.152, bsz=558.226, num_updates=7257, lr=0.000371212, gnorm=0.995, clip=0.000, oom=0.000, loss_scale=16.000, wall=2180, train_wall=1948 | |
| epoch 001: 7270 / 8862 loss=6.969, nll_loss=5.674, ppl=51.06, wps=50525, ups=3, wpb=15175.052, bsz=558.300, num_updates=7267, lr=0.000370956, gnorm=0.994, clip=0.000, oom=0.000, loss_scale=16.000, wall=2183, train_wall=1951 | |
| epoch 001: 7280 / 8862 loss=6.966, nll_loss=5.671, ppl=50.94, wps=50527, ups=3, wpb=15175.058, bsz=558.259, num_updates=7277, lr=0.000370701, gnorm=0.994, clip=0.000, oom=0.000, loss_scale=16.000, wall=2186, train_wall=1954 | |
| epoch 001: 7290 / 8862 loss=6.963, nll_loss=5.667, ppl=50.82, wps=50530, ups=3, wpb=15175.193, bsz=558.263, num_updates=7287, lr=0.000370447, gnorm=0.993, clip=0.000, oom=0.000, loss_scale=16.000, wall=2188, train_wall=1956 | |
| epoch 001: 7300 / 8862 loss=6.960, nll_loss=5.664, ppl=50.69, wps=50533, ups=3, wpb=15175.354, bsz=558.332, num_updates=7297, lr=0.000370193, gnorm=0.993, clip=0.000, oom=0.000, loss_scale=16.000, wall=2191, train_wall=1959 | |
| epoch 001: 7310 / 8862 loss=6.958, nll_loss=5.661, ppl=50.59, wps=50535, ups=3, wpb=15174.946, bsz=558.200, num_updates=7307, lr=0.000369939, gnorm=0.992, clip=0.000, oom=0.000, loss_scale=16.000, wall=2194, train_wall=1962 | |
| epoch 001: 7320 / 8862 loss=6.955, nll_loss=5.658, ppl=50.48, wps=50536, ups=3, wpb=15174.399, bsz=558.159, num_updates=7317, lr=0.000369686, gnorm=0.992, clip=0.000, oom=0.000, loss_scale=16.000, wall=2197, train_wall=1964 | |
| epoch 001: 7330 / 8862 loss=6.952, nll_loss=5.655, ppl=50.37, wps=50539, ups=3, wpb=15174.300, bsz=558.064, num_updates=7327, lr=0.000369434, gnorm=0.991, clip=0.000, oom=0.000, loss_scale=16.000, wall=2200, train_wall=1967 | |
| epoch 001: 7340 / 8862 loss=6.949, nll_loss=5.651, ppl=50.25, wps=50543, ups=3, wpb=15174.871, bsz=558.074, num_updates=7337, lr=0.000369182, gnorm=0.991, clip=0.000, oom=0.000, loss_scale=16.000, wall=2203, train_wall=1970 | |
| epoch 001: 7350 / 8862 loss=6.947, nll_loss=5.648, ppl=50.15, wps=50544, ups=3, wpb=15174.521, bsz=558.128, num_updates=7347, lr=0.000368931, gnorm=0.990, clip=0.000, oom=0.000, loss_scale=16.000, wall=2206, train_wall=1972 | |
| epoch 001: 7360 / 8862 loss=6.944, nll_loss=5.645, ppl=50.04, wps=50546, ups=3, wpb=15174.379, bsz=558.083, num_updates=7357, lr=0.00036868, gnorm=0.990, clip=0.000, oom=0.000, loss_scale=16.000, wall=2209, train_wall=1975 | |
| epoch 001: 7370 / 8862 loss=6.941, nll_loss=5.642, ppl=49.93, wps=50549, ups=3, wpb=15174.399, bsz=558.149, num_updates=7367, lr=0.00036843, gnorm=0.990, clip=0.000, oom=0.000, loss_scale=16.000, wall=2212, train_wall=1978 | |
| epoch 001: 7380 / 8862 loss=6.938, nll_loss=5.639, ppl=49.82, wps=50551, ups=3, wpb=15174.628, bsz=558.253, num_updates=7377, lr=0.00036818, gnorm=0.989, clip=0.000, oom=0.000, loss_scale=16.000, wall=2214, train_wall=1981 | |
| epoch 001: 7390 / 8862 loss=6.936, nll_loss=5.636, ppl=49.71, wps=50553, ups=3, wpb=15174.275, bsz=558.118, num_updates=7387, lr=0.000367931, gnorm=0.989, clip=0.000, oom=0.000, loss_scale=16.000, wall=2217, train_wall=1983 | |
| epoch 001: 7400 / 8862 loss=6.933, nll_loss=5.632, ppl=49.60, wps=50556, ups=3, wpb=15174.552, bsz=558.157, num_updates=7397, lr=0.000367682, gnorm=0.988, clip=0.000, oom=0.000, loss_scale=16.000, wall=2220, train_wall=1986 | |
| epoch 001: 7410 / 8862 loss=6.930, nll_loss=5.629, ppl=49.49, wps=50560, ups=3, wpb=15174.859, bsz=558.131, num_updates=7407, lr=0.000367434, gnorm=0.988, clip=0.000, oom=0.000, loss_scale=16.000, wall=2223, train_wall=1989 | |
| epoch 001: 7420 / 8862 loss=6.927, nll_loss=5.626, ppl=49.38, wps=50563, ups=3, wpb=15174.953, bsz=558.000, num_updates=7417, lr=0.000367186, gnorm=0.987, clip=0.000, oom=0.000, loss_scale=16.000, wall=2226, train_wall=1991 | |
| epoch 001: 7430 / 8862 loss=6.924, nll_loss=5.623, ppl=49.28, wps=50565, ups=3, wpb=15174.909, bsz=558.040, num_updates=7427, lr=0.000366939, gnorm=0.987, clip=0.000, oom=0.000, loss_scale=16.000, wall=2229, train_wall=1994 | |
| epoch 001: 7440 / 8862 loss=6.921, nll_loss=5.620, ppl=49.17, wps=50569, ups=3, wpb=15175.270, bsz=558.058, num_updates=7437, lr=0.000366692, gnorm=0.986, clip=0.000, oom=0.000, loss_scale=16.000, wall=2232, train_wall=1997 | |
| epoch 001: 7450 / 8862 loss=6.918, nll_loss=5.616, ppl=49.05, wps=50572, ups=3, wpb=15175.628, bsz=558.228, num_updates=7447, lr=0.000366445, gnorm=0.986, clip=0.000, oom=0.000, loss_scale=16.000, wall=2235, train_wall=1999 | |
| epoch 001: 7460 / 8862 loss=6.916, nll_loss=5.613, ppl=48.95, wps=50576, ups=3, wpb=15175.956, bsz=558.146, num_updates=7457, lr=0.0003662, gnorm=0.985, clip=0.000, oom=0.000, loss_scale=16.000, wall=2238, train_wall=2002 | |
| epoch 001: 7470 / 8862 loss=6.913, nll_loss=5.610, ppl=48.84, wps=50577, ups=3, wpb=15175.870, bsz=558.323, num_updates=7467, lr=0.000365954, gnorm=0.985, clip=0.000, oom=0.000, loss_scale=16.000, wall=2240, train_wall=2005 | |
| epoch 001: 7480 / 8862 loss=6.910, nll_loss=5.607, ppl=48.73, wps=50579, ups=3, wpb=15175.617, bsz=558.333, num_updates=7477, lr=0.00036571, gnorm=0.984, clip=0.000, oom=0.000, loss_scale=16.000, wall=2243, train_wall=2007 | |
| epoch 001: 7490 / 8862 loss=6.907, nll_loss=5.604, ppl=48.62, wps=50581, ups=3, wpb=15175.697, bsz=558.479, num_updates=7487, lr=0.000365465, gnorm=0.984, clip=0.000, oom=0.000, loss_scale=16.000, wall=2246, train_wall=2010 | |
| epoch 001: 7500 / 8862 loss=6.905, nll_loss=5.601, ppl=48.52, wps=50585, ups=3, wpb=15176.063, bsz=558.436, num_updates=7497, lr=0.000365221, gnorm=0.983, clip=0.000, oom=0.000, loss_scale=16.000, wall=2249, train_wall=2013 | |
| epoch 001: 7510 / 8862 loss=6.902, nll_loss=5.597, ppl=48.41, wps=50588, ups=3, wpb=15176.178, bsz=558.482, num_updates=7507, lr=0.000364978, gnorm=0.983, clip=0.000, oom=0.000, loss_scale=16.000, wall=2252, train_wall=2015 | |
| epoch 001: 7520 / 8862 loss=6.899, nll_loss=5.594, ppl=48.30, wps=50590, ups=3, wpb=15176.389, bsz=558.657, num_updates=7517, lr=0.000364735, gnorm=0.983, clip=0.000, oom=0.000, loss_scale=16.000, wall=2255, train_wall=2018 | |
| epoch 001: 7530 / 8862 loss=6.896, nll_loss=5.591, ppl=48.20, wps=50592, ups=3, wpb=15176.322, bsz=558.612, num_updates=7527, lr=0.000364493, gnorm=0.982, clip=0.000, oom=0.000, loss_scale=16.000, wall=2258, train_wall=2021 | |
| epoch 001: 7540 / 8862 loss=6.894, nll_loss=5.588, ppl=48.10, wps=50596, ups=3, wpb=15176.622, bsz=558.571, num_updates=7537, lr=0.000364251, gnorm=0.982, clip=0.000, oom=0.000, loss_scale=16.000, wall=2261, train_wall=2023 | |
| epoch 001: 7550 / 8862 loss=6.891, nll_loss=5.585, ppl=48.00, wps=50598, ups=3, wpb=15176.563, bsz=558.418, num_updates=7547, lr=0.00036401, gnorm=0.981, clip=0.000, oom=0.000, loss_scale=16.000, wall=2264, train_wall=2026 | |
| epoch 001: 7560 / 8862 loss=6.889, nll_loss=5.582, ppl=47.91, wps=50601, ups=3, wpb=15176.575, bsz=558.447, num_updates=7557, lr=0.000363769, gnorm=0.981, clip=0.000, oom=0.000, loss_scale=16.000, wall=2267, train_wall=2029 | |
| epoch 001: 7570 / 8862 loss=6.886, nll_loss=5.579, ppl=47.81, wps=50603, ups=3, wpb=15176.622, bsz=558.408, num_updates=7567, lr=0.000363528, gnorm=0.980, clip=0.000, oom=0.000, loss_scale=16.000, wall=2269, train_wall=2031 | |
| epoch 001: 7580 / 8862 loss=6.883, nll_loss=5.576, ppl=47.71, wps=50607, ups=3, wpb=15177.083, bsz=558.389, num_updates=7577, lr=0.000363288, gnorm=0.980, clip=0.000, oom=0.000, loss_scale=16.000, wall=2272, train_wall=2034 | |
| epoch 001: 7590 / 8862 loss=6.881, nll_loss=5.573, ppl=47.61, wps=50609, ups=3, wpb=15177.023, bsz=558.305, num_updates=7587, lr=0.000363049, gnorm=0.980, clip=0.000, oom=0.000, loss_scale=16.000, wall=2275, train_wall=2037 | |
| epoch 001: 7600 / 8862 loss=6.878, nll_loss=5.570, ppl=47.51, wps=50611, ups=3, wpb=15176.869, bsz=558.346, num_updates=7597, lr=0.00036281, gnorm=0.979, clip=0.000, oom=0.000, loss_scale=16.000, wall=2278, train_wall=2039 | |
| epoch 001: 7610 / 8862 loss=6.875, nll_loss=5.567, ppl=47.41, wps=50612, ups=3, wpb=15176.631, bsz=558.282, num_updates=7607, lr=0.000362571, gnorm=0.979, clip=0.000, oom=0.000, loss_scale=16.000, wall=2281, train_wall=2042 | |
| epoch 001: 7620 / 8862 loss=6.873, nll_loss=5.564, ppl=47.31, wps=50614, ups=3, wpb=15176.864, bsz=558.332, num_updates=7617, lr=0.000362333, gnorm=0.978, clip=0.000, oom=0.000, loss_scale=16.000, wall=2284, train_wall=2045 | |
| epoch 001: 7630 / 8862 loss=6.870, nll_loss=5.561, ppl=47.21, wps=50616, ups=3, wpb=15177.096, bsz=558.357, num_updates=7627, lr=0.000362095, gnorm=0.978, clip=0.000, oom=0.000, loss_scale=16.000, wall=2287, train_wall=2048 | |
| epoch 001: 7640 / 8862 loss=6.867, nll_loss=5.558, ppl=47.11, wps=50618, ups=3, wpb=15176.974, bsz=558.337, num_updates=7637, lr=0.000361858, gnorm=0.977, clip=0.000, oom=0.000, loss_scale=16.000, wall=2290, train_wall=2050 | |
| epoch 001: 7650 / 8862 loss=6.865, nll_loss=5.555, ppl=47.02, wps=50618, ups=3, wpb=15176.701, bsz=558.246, num_updates=7647, lr=0.000361622, gnorm=0.977, clip=0.000, oom=0.000, loss_scale=16.000, wall=2293, train_wall=2053 | |
| epoch 001: 7660 / 8862 loss=6.862, nll_loss=5.552, ppl=46.92, wps=50619, ups=3, wpb=15176.873, bsz=558.416, num_updates=7657, lr=0.000361385, gnorm=0.976, clip=0.000, oom=0.000, loss_scale=16.000, wall=2296, train_wall=2056 | |
| epoch 001: 7670 / 8862 loss=6.859, nll_loss=5.549, ppl=46.82, wps=50622, ups=3, wpb=15177.375, bsz=558.391, num_updates=7667, lr=0.00036115, gnorm=0.976, clip=0.000, oom=0.000, loss_scale=16.000, wall=2299, train_wall=2058 | |
| epoch 001: 7680 / 8862 loss=6.857, nll_loss=5.546, ppl=46.72, wps=50624, ups=3, wpb=15177.611, bsz=558.371, num_updates=7677, lr=0.000360914, gnorm=0.975, clip=0.000, oom=0.000, loss_scale=16.000, wall=2302, train_wall=2061 | |
| epoch 001: 7690 / 8862 loss=6.854, nll_loss=5.543, ppl=46.61, wps=50627, ups=3, wpb=15178.025, bsz=558.420, num_updates=7687, lr=0.00036068, gnorm=0.975, clip=0.000, oom=0.000, loss_scale=16.000, wall=2305, train_wall=2064 | |
| epoch 001: 7700 / 8862 loss=6.851, nll_loss=5.540, ppl=46.52, wps=50630, ups=3, wpb=15178.585, bsz=558.478, num_updates=7697, lr=0.000360445, gnorm=0.975, clip=0.000, oom=0.000, loss_scale=16.000, wall=2308, train_wall=2067 | |
| epoch 001: 7710 / 8862 loss=6.849, nll_loss=5.537, ppl=46.42, wps=50632, ups=3, wpb=15178.638, bsz=558.386, num_updates=7707, lr=0.000360211, gnorm=0.974, clip=0.000, oom=0.000, loss_scale=16.000, wall=2310, train_wall=2069 | |
| epoch 001: 7720 / 8862 loss=6.846, nll_loss=5.534, ppl=46.33, wps=50633, ups=3, wpb=15178.599, bsz=558.336, num_updates=7717, lr=0.000359978, gnorm=0.974, clip=0.000, oom=0.000, loss_scale=16.000, wall=2313, train_wall=2072 | |
| epoch 001: 7730 / 8862 loss=6.843, nll_loss=5.531, ppl=46.23, wps=50635, ups=3, wpb=15178.615, bsz=558.273, num_updates=7727, lr=0.000359745, gnorm=0.973, clip=0.000, oom=0.000, loss_scale=16.000, wall=2316, train_wall=2075 | |
| epoch 001: 7740 / 8862 loss=6.841, nll_loss=5.528, ppl=46.14, wps=50636, ups=3, wpb=15178.494, bsz=558.164, num_updates=7737, lr=0.000359512, gnorm=0.973, clip=0.000, oom=0.000, loss_scale=16.000, wall=2319, train_wall=2077 | |
| epoch 001: 7750 / 8862 loss=6.838, nll_loss=5.525, ppl=46.05, wps=50639, ups=3, wpb=15178.656, bsz=558.166, num_updates=7747, lr=0.00035928, gnorm=0.973, clip=0.000, oom=0.000, loss_scale=16.000, wall=2322, train_wall=2080 | |
| epoch 001: 7760 / 8862 loss=6.835, nll_loss=5.522, ppl=45.94, wps=50642, ups=3, wpb=15179.115, bsz=558.279, num_updates=7757, lr=0.000359048, gnorm=0.972, clip=0.000, oom=0.000, loss_scale=16.000, wall=2325, train_wall=2083 | |
| epoch 001: 7770 / 8862 loss=6.833, nll_loss=5.519, ppl=45.85, wps=50643, ups=3, wpb=15178.766, bsz=558.207, num_updates=7767, lr=0.000358817, gnorm=0.972, clip=0.000, oom=0.000, loss_scale=16.000, wall=2328, train_wall=2085 | |
| epoch 001: 7780 / 8862 loss=6.830, nll_loss=5.516, ppl=45.76, wps=50642, ups=3, wpb=15178.397, bsz=558.324, num_updates=7777, lr=0.000358587, gnorm=0.971, clip=0.000, oom=0.000, loss_scale=16.000, wall=2331, train_wall=2088 | |
| epoch 001: 7790 / 8862 loss=6.828, nll_loss=5.513, ppl=45.67, wps=50644, ups=3, wpb=15178.579, bsz=558.347, num_updates=7787, lr=0.000358356, gnorm=0.971, clip=0.000, oom=0.000, loss_scale=16.000, wall=2334, train_wall=2091 | |
| epoch 001: 7800 / 8862 loss=6.825, nll_loss=5.510, ppl=45.57, wps=50645, ups=3, wpb=15178.258, bsz=558.451, num_updates=7797, lr=0.000358126, gnorm=0.970, clip=0.000, oom=0.000, loss_scale=16.000, wall=2337, train_wall=2094 | |
| epoch 001: 7810 / 8862 loss=6.823, nll_loss=5.507, ppl=45.48, wps=50647, ups=3, wpb=15178.407, bsz=558.390, num_updates=7807, lr=0.000357897, gnorm=0.970, clip=0.000, oom=0.000, loss_scale=16.000, wall=2340, train_wall=2096 | |
| epoch 001: 7820 / 8862 loss=6.820, nll_loss=5.505, ppl=45.40, wps=50648, ups=3, wpb=15178.018, bsz=558.346, num_updates=7817, lr=0.000357668, gnorm=0.970, clip=0.000, oom=0.000, loss_scale=16.000, wall=2343, train_wall=2099 | |
| epoch 001: 7830 / 8862 loss=6.818, nll_loss=5.502, ppl=45.31, wps=50650, ups=3, wpb=15178.170, bsz=558.320, num_updates=7827, lr=0.000357439, gnorm=0.969, clip=0.000, oom=0.000, loss_scale=16.000, wall=2346, train_wall=2102 | |
| epoch 001: 7840 / 8862 loss=6.815, nll_loss=5.499, ppl=45.22, wps=50650, ups=3, wpb=15177.437, bsz=558.332, num_updates=7837, lr=0.000357211, gnorm=0.969, clip=0.000, oom=0.000, loss_scale=16.000, wall=2348, train_wall=2104 | |
| epoch 001: 7850 / 8862 loss=6.813, nll_loss=5.496, ppl=45.13, wps=50649, ups=3, wpb=15177.145, bsz=558.359, num_updates=7847, lr=0.000356984, gnorm=0.968, clip=0.000, oom=0.000, loss_scale=16.000, wall=2351, train_wall=2107 | |
| epoch 001: 7860 / 8862 loss=6.810, nll_loss=5.493, ppl=45.04, wps=50650, ups=3, wpb=15176.987, bsz=558.351, num_updates=7857, lr=0.000356756, gnorm=0.968, clip=0.000, oom=0.000, loss_scale=16.000, wall=2354, train_wall=2110 | |
| epoch 001: 7870 / 8862 loss=6.808, nll_loss=5.490, ppl=44.95, wps=50653, ups=3, wpb=15177.206, bsz=558.279, num_updates=7867, lr=0.000356529, gnorm=0.967, clip=0.000, oom=0.000, loss_scale=16.000, wall=2357, train_wall=2113 | |
| epoch 001: 7880 / 8862 loss=6.805, nll_loss=5.487, ppl=44.86, wps=50655, ups=3, wpb=15177.353, bsz=558.194, num_updates=7877, lr=0.000356303, gnorm=0.967, clip=0.000, oom=0.000, loss_scale=16.000, wall=2360, train_wall=2115 | |
| epoch 001: 7890 / 8862 loss=6.803, nll_loss=5.485, ppl=44.77, wps=50657, ups=3, wpb=15177.452, bsz=558.185, num_updates=7887, lr=0.000356077, gnorm=0.966, clip=0.000, oom=0.000, loss_scale=16.000, wall=2363, train_wall=2118 | |
| epoch 001: 7900 / 8862 loss=6.800, nll_loss=5.482, ppl=44.68, wps=50657, ups=3, wpb=15177.084, bsz=558.242, num_updates=7897, lr=0.000355852, gnorm=0.966, clip=0.000, oom=0.000, loss_scale=16.000, wall=2366, train_wall=2121 | |
| epoch 001: 7910 / 8862 loss=6.798, nll_loss=5.479, ppl=44.60, wps=50659, ups=3, wpb=15177.072, bsz=558.173, num_updates=7907, lr=0.000355627, gnorm=0.966, clip=0.000, oom=0.000, loss_scale=16.000, wall=2369, train_wall=2123 | |
| epoch 001: 7920 / 8862 loss=6.795, nll_loss=5.476, ppl=44.51, wps=50660, ups=3, wpb=15177.357, bsz=558.154, num_updates=7917, lr=0.000355402, gnorm=0.965, clip=0.000, oom=0.000, loss_scale=16.000, wall=2372, train_wall=2126 | |
| epoch 001: 7930 / 8862 loss=6.793, nll_loss=5.473, ppl=44.42, wps=50662, ups=3, wpb=15177.500, bsz=558.093, num_updates=7927, lr=0.000355178, gnorm=0.965, clip=0.000, oom=0.000, loss_scale=16.000, wall=2375, train_wall=2129 | |
| epoch 001: 7940 / 8862 loss=6.790, nll_loss=5.470, ppl=44.33, wps=50664, ups=3, wpb=15177.641, bsz=558.099, num_updates=7937, lr=0.000354954, gnorm=0.964, clip=0.000, oom=0.000, loss_scale=16.000, wall=2378, train_wall=2132 | |
| epoch 001: 7950 / 8862 loss=6.788, nll_loss=5.468, ppl=44.25, wps=50666, ups=3, wpb=15177.798, bsz=557.991, num_updates=7947, lr=0.00035473, gnorm=0.964, clip=0.000, oom=0.000, loss_scale=16.000, wall=2381, train_wall=2134 | |
| epoch 001: 7960 / 8862 loss=6.785, nll_loss=5.465, ppl=44.16, wps=50667, ups=3, wpb=15177.670, bsz=558.023, num_updates=7957, lr=0.000354507, gnorm=0.964, clip=0.000, oom=0.000, loss_scale=16.000, wall=2384, train_wall=2137 | |
| epoch 001: 7970 / 8862 loss=6.783, nll_loss=5.462, ppl=44.08, wps=50667, ups=3, wpb=15177.421, bsz=557.931, num_updates=7967, lr=0.000354285, gnorm=0.963, clip=0.000, oom=0.000, loss_scale=16.000, wall=2387, train_wall=2140 | |
| epoch 001: 7980 / 8862 loss=6.780, nll_loss=5.459, ppl=44.00, wps=50666, ups=3, wpb=15176.755, bsz=557.919, num_updates=7977, lr=0.000354063, gnorm=0.963, clip=0.000, oom=0.000, loss_scale=16.000, wall=2389, train_wall=2142 | |
| epoch 001: 7990 / 8862 loss=6.778, nll_loss=5.457, ppl=43.91, wps=50667, ups=3, wpb=15176.484, bsz=557.870, num_updates=7987, lr=0.000353841, gnorm=0.962, clip=0.000, oom=0.000, loss_scale=16.000, wall=2392, train_wall=2145 | |
| epoch 001: 8000 / 8862 loss=6.776, nll_loss=5.454, ppl=43.84, wps=50668, ups=3, wpb=15176.285, bsz=557.764, num_updates=7997, lr=0.00035362, gnorm=0.962, clip=0.000, oom=0.000, loss_scale=16.000, wall=2395, train_wall=2148 | |
| epoch 001: 8010 / 8862 loss=6.773, nll_loss=5.451, ppl=43.75, wps=50670, ups=3, wpb=15176.591, bsz=557.768, num_updates=8007, lr=0.000353399, gnorm=0.962, clip=0.000, oom=0.000, loss_scale=16.000, wall=2398, train_wall=2151 | |
| epoch 001: 8020 / 8862 loss=6.771, nll_loss=5.448, ppl=43.66, wps=50672, ups=3, wpb=15176.914, bsz=557.830, num_updates=8017, lr=0.000353178, gnorm=0.961, clip=0.000, oom=0.000, loss_scale=16.000, wall=2401, train_wall=2153 | |
| epoch 001: 8030 / 8862 loss=6.768, nll_loss=5.445, ppl=43.57, wps=50676, ups=3, wpb=15177.431, bsz=557.786, num_updates=8027, lr=0.000352958, gnorm=0.961, clip=0.000, oom=0.000, loss_scale=16.000, wall=2404, train_wall=2156 | |
| epoch 001: 8040 / 8862 loss=6.766, nll_loss=5.442, ppl=43.49, wps=50680, ups=3, wpb=15177.860, bsz=557.776, num_updates=8037, lr=0.000352739, gnorm=0.960, clip=0.000, oom=0.000, loss_scale=16.000, wall=2407, train_wall=2159 | |
| epoch 001: 8050 / 8862 loss=6.763, nll_loss=5.440, ppl=43.40, wps=50681, ups=3, wpb=15177.626, bsz=557.746, num_updates=8047, lr=0.000352519, gnorm=0.960, clip=0.000, oom=0.000, loss_scale=16.000, wall=2410, train_wall=2161 | |
| epoch 001: 8060 / 8862 loss=6.761, nll_loss=5.437, ppl=43.32, wps=50684, ups=3, wpb=15178.059, bsz=557.900, num_updates=8057, lr=0.000352301, gnorm=0.960, clip=0.000, oom=0.000, loss_scale=16.000, wall=2413, train_wall=2164 | |
| epoch 001: 8070 / 8862 loss=6.758, nll_loss=5.434, ppl=43.24, wps=50686, ups=3, wpb=15177.850, bsz=557.971, num_updates=8067, lr=0.000352082, gnorm=0.959, clip=0.000, oom=0.000, loss_scale=16.000, wall=2416, train_wall=2167 | |
| epoch 001: 8080 / 8862 loss=6.756, nll_loss=5.431, ppl=43.15, wps=50688, ups=3, wpb=15177.903, bsz=557.912, num_updates=8077, lr=0.000351864, gnorm=0.959, clip=0.000, oom=0.000, loss_scale=16.000, wall=2419, train_wall=2169 | |
| epoch 001: 8090 / 8862 loss=6.753, nll_loss=5.429, ppl=43.07, wps=50691, ups=3, wpb=15177.923, bsz=557.826, num_updates=8087, lr=0.000351646, gnorm=0.959, clip=0.000, oom=0.000, loss_scale=16.000, wall=2421, train_wall=2172 | |
| epoch 001: 8100 / 8862 loss=6.751, nll_loss=5.426, ppl=42.98, wps=50693, ups=3, wpb=15178.218, bsz=557.891, num_updates=8097, lr=0.000351429, gnorm=0.958, clip=0.000, oom=0.000, loss_scale=16.000, wall=2424, train_wall=2175 | |
| epoch 001: 8110 / 8862 loss=6.748, nll_loss=5.423, ppl=42.90, wps=50695, ups=3, wpb=15178.191, bsz=557.888, num_updates=8107, lr=0.000351212, gnorm=0.958, clip=0.000, oom=0.000, loss_scale=16.000, wall=2427, train_wall=2177 | |
| epoch 001: 8120 / 8862 loss=6.746, nll_loss=5.420, ppl=42.81, wps=50699, ups=3, wpb=15178.784, bsz=557.859, num_updates=8117, lr=0.000350996, gnorm=0.957, clip=0.000, oom=0.000, loss_scale=16.000, wall=2430, train_wall=2180 | |
| epoch 001: 8130 / 8862 loss=6.743, nll_loss=5.417, ppl=42.74, wps=50700, ups=3, wpb=15178.390, bsz=557.913, num_updates=8127, lr=0.00035078, gnorm=0.957, clip=0.000, oom=0.000, loss_scale=16.000, wall=2433, train_wall=2183 | |
| epoch 001: 8140 / 8862 loss=6.741, nll_loss=5.415, ppl=42.66, wps=50701, ups=3, wpb=15178.120, bsz=557.890, num_updates=8137, lr=0.000350564, gnorm=0.957, clip=0.000, oom=0.000, loss_scale=16.000, wall=2436, train_wall=2185 | |
| epoch 001: 8150 / 8862 loss=6.739, nll_loss=5.412, ppl=42.58, wps=50704, ups=3, wpb=15178.322, bsz=557.846, num_updates=8147, lr=0.000350349, gnorm=0.956, clip=0.000, oom=0.000, loss_scale=16.000, wall=2439, train_wall=2188 | |
| epoch 001: 8160 / 8862 loss=6.737, nll_loss=5.409, ppl=42.50, wps=50706, ups=3, wpb=15178.535, bsz=557.946, num_updates=8157, lr=0.000350134, gnorm=0.956, clip=0.000, oom=0.000, loss_scale=16.000, wall=2442, train_wall=2191 | |
| epoch 001: 8170 / 8862 loss=6.734, nll_loss=5.407, ppl=42.42, wps=50708, ups=3, wpb=15178.644, bsz=557.917, num_updates=8167, lr=0.00034992, gnorm=0.955, clip=0.000, oom=0.000, loss_scale=16.000, wall=2445, train_wall=2193 | |
| epoch 001: 8180 / 8862 loss=6.732, nll_loss=5.404, ppl=42.34, wps=50711, ups=3, wpb=15178.872, bsz=557.907, num_updates=8177, lr=0.000349706, gnorm=0.955, clip=0.000, oom=0.000, loss_scale=16.000, wall=2448, train_wall=2196 | |
| epoch 001: 8190 / 8862 loss=6.729, nll_loss=5.401, ppl=42.25, wps=50715, ups=3, wpb=15179.364, bsz=557.969, num_updates=8187, lr=0.000349492, gnorm=0.955, clip=0.000, oom=0.000, loss_scale=16.000, wall=2450, train_wall=2199 | |
| epoch 001: 8200 / 8862 loss=6.727, nll_loss=5.398, ppl=42.17, wps=50716, ups=3, wpb=15179.263, bsz=558.010, num_updates=8197, lr=0.000349279, gnorm=0.954, clip=0.000, oom=0.000, loss_scale=16.000, wall=2453, train_wall=2202 | |
| epoch 001: 8210 / 8862 loss=6.724, nll_loss=5.396, ppl=42.09, wps=50718, ups=3, wpb=15179.060, bsz=558.064, num_updates=8207, lr=0.000349066, gnorm=0.954, clip=0.000, oom=0.000, loss_scale=16.000, wall=2456, train_wall=2204 | |
| epoch 001: 8220 / 8862 loss=6.722, nll_loss=5.393, ppl=42.02, wps=50720, ups=3, wpb=15179.175, bsz=557.967, num_updates=8217, lr=0.000348854, gnorm=0.953, clip=0.000, oom=0.000, loss_scale=16.000, wall=2459, train_wall=2207 | |
| epoch 001: 8230 / 8862 loss=6.720, nll_loss=5.390, ppl=41.94, wps=50721, ups=3, wpb=15178.566, bsz=557.972, num_updates=8227, lr=0.000348642, gnorm=0.953, clip=0.000, oom=0.000, loss_scale=16.000, wall=2462, train_wall=2210 | |
| epoch 001: 8240 / 8862 loss=6.718, nll_loss=5.388, ppl=41.87, wps=50723, ups=3, wpb=15178.544, bsz=557.823, num_updates=8237, lr=0.00034843, gnorm=0.953, clip=0.000, oom=0.000, loss_scale=16.000, wall=2465, train_wall=2212 | |
| epoch 001: 8250 / 8862 loss=6.715, nll_loss=5.385, ppl=41.79, wps=50725, ups=3, wpb=15178.523, bsz=557.793, num_updates=8247, lr=0.000348219, gnorm=0.952, clip=0.000, oom=0.000, loss_scale=16.000, wall=2468, train_wall=2215 | |
| epoch 001: 8260 / 8862 loss=6.713, nll_loss=5.382, ppl=41.71, wps=50727, ups=3, wpb=15178.312, bsz=557.816, num_updates=8257, lr=0.000348008, gnorm=0.952, clip=0.000, oom=0.000, loss_scale=16.000, wall=2471, train_wall=2218 | |
| epoch 001: 8270 / 8862 loss=6.710, nll_loss=5.380, ppl=41.64, wps=50729, ups=3, wpb=15178.310, bsz=557.784, num_updates=8267, lr=0.000347797, gnorm=0.951, clip=0.000, oom=0.000, loss_scale=16.000, wall=2474, train_wall=2220 | |
| epoch 001: 8280 / 8862 loss=6.708, nll_loss=5.377, ppl=41.56, wps=50731, ups=3, wpb=15178.226, bsz=557.746, num_updates=8277, lr=0.000347587, gnorm=0.951, clip=0.000, oom=0.000, loss_scale=16.000, wall=2476, train_wall=2223 | |
| epoch 001: 8290 / 8862 loss=6.706, nll_loss=5.375, ppl=41.49, wps=50733, ups=3, wpb=15178.113, bsz=557.758, num_updates=8287, lr=0.000347377, gnorm=0.951, clip=0.000, oom=0.000, loss_scale=16.000, wall=2479, train_wall=2226 | |
| epoch 001: 8300 / 8862 loss=6.703, nll_loss=5.372, ppl=41.41, wps=50735, ups=3, wpb=15178.255, bsz=557.776, num_updates=8297, lr=0.000347168, gnorm=0.950, clip=0.000, oom=0.000, loss_scale=16.000, wall=2482, train_wall=2228 | |
| epoch 001: 8310 / 8862 loss=6.701, nll_loss=5.369, ppl=41.33, wps=50736, ups=3, wpb=15178.355, bsz=558.031, num_updates=8307, lr=0.000346959, gnorm=0.950, clip=0.000, oom=0.000, loss_scale=16.000, wall=2485, train_wall=2231 | |
| WARNING: overflow detected, setting loss scale to: 8.0 | |
| epoch 001: 8320 / 8862 loss=6.699, nll_loss=5.367, ppl=41.26, wps=50731, ups=3, wpb=15178.330, bsz=558.064, num_updates=8316, lr=0.000346771, gnorm=0.950, clip=0.000, oom=0.000, loss_scale=8.000, wall=2488, train_wall=2234 | |
| epoch 001: 8330 / 8862 loss=6.696, nll_loss=5.364, ppl=41.18, wps=50733, ups=3, wpb=15178.213, bsz=558.087, num_updates=8326, lr=0.000346563, gnorm=0.949, clip=0.000, oom=0.000, loss_scale=8.000, wall=2491, train_wall=2236 | |
| epoch 001: 8340 / 8862 loss=6.694, nll_loss=5.361, ppl=41.11, wps=50736, ups=3, wpb=15178.611, bsz=558.074, num_updates=8336, lr=0.000346355, gnorm=0.949, clip=0.000, oom=0.000, loss_scale=8.000, wall=2494, train_wall=2239 | |
| epoch 001: 8350 / 8862 loss=6.692, nll_loss=5.359, ppl=41.03, wps=50737, ups=3, wpb=15178.517, bsz=558.146, num_updates=8346, lr=0.000346147, gnorm=0.948, clip=0.000, oom=0.000, loss_scale=8.000, wall=2497, train_wall=2242 | |
| epoch 001: 8360 / 8862 loss=6.690, nll_loss=5.356, ppl=40.96, wps=50738, ups=3, wpb=15178.131, bsz=558.135, num_updates=8356, lr=0.00034594, gnorm=0.948, clip=0.000, oom=0.000, loss_scale=8.000, wall=2500, train_wall=2244 | |
| epoch 001: 8370 / 8862 loss=6.687, nll_loss=5.354, ppl=40.89, wps=50741, ups=3, wpb=15178.352, bsz=558.015, num_updates=8366, lr=0.000345733, gnorm=0.948, clip=0.000, oom=0.000, loss_scale=8.000, wall=2503, train_wall=2247 | |
| epoch 001: 8380 / 8862 loss=6.685, nll_loss=5.351, ppl=40.82, wps=50744, ups=3, wpb=15178.495, bsz=557.929, num_updates=8376, lr=0.000345527, gnorm=0.947, clip=0.000, oom=0.000, loss_scale=8.000, wall=2505, train_wall=2250 | |
| epoch 001: 8390 / 8862 loss=6.683, nll_loss=5.349, ppl=40.75, wps=50747, ups=3, wpb=15178.782, bsz=557.848, num_updates=8386, lr=0.000345321, gnorm=0.947, clip=0.000, oom=0.000, loss_scale=8.000, wall=2508, train_wall=2252 | |
| epoch 001: 8400 / 8862 loss=6.681, nll_loss=5.346, ppl=40.67, wps=50750, ups=3, wpb=15179.116, bsz=557.884, num_updates=8396, lr=0.000345115, gnorm=0.947, clip=0.000, oom=0.000, loss_scale=8.000, wall=2511, train_wall=2255 | |
| epoch 001: 8410 / 8862 loss=6.678, nll_loss=5.343, ppl=40.60, wps=50751, ups=3, wpb=15178.849, bsz=557.885, num_updates=8406, lr=0.00034491, gnorm=0.946, clip=0.000, oom=0.000, loss_scale=8.000, wall=2514, train_wall=2258 | |
| epoch 001: 8420 / 8862 loss=6.676, nll_loss=5.341, ppl=40.53, wps=50754, ups=3, wpb=15178.981, bsz=557.760, num_updates=8416, lr=0.000344705, gnorm=0.946, clip=0.000, oom=0.000, loss_scale=8.000, wall=2517, train_wall=2260 | |
| epoch 001: 8430 / 8862 loss=6.674, nll_loss=5.338, ppl=40.46, wps=50756, ups=3, wpb=15179.003, bsz=557.732, num_updates=8426, lr=0.0003445, gnorm=0.946, clip=0.000, oom=0.000, loss_scale=8.000, wall=2520, train_wall=2263 | |
| epoch 001: 8440 / 8862 loss=6.672, nll_loss=5.336, ppl=40.39, wps=50758, ups=3, wpb=15178.728, bsz=557.656, num_updates=8436, lr=0.000344296, gnorm=0.945, clip=0.000, oom=0.000, loss_scale=8.000, wall=2523, train_wall=2266 | |
| epoch 001: 8450 / 8862 loss=6.669, nll_loss=5.333, ppl=40.32, wps=50759, ups=3, wpb=15178.695, bsz=557.629, num_updates=8446, lr=0.000344092, gnorm=0.945, clip=0.000, oom=0.000, loss_scale=8.000, wall=2526, train_wall=2268 | |
| epoch 001: 8460 / 8862 loss=6.667, nll_loss=5.331, ppl=40.25, wps=50762, ups=3, wpb=15178.833, bsz=557.564, num_updates=8456, lr=0.000343888, gnorm=0.944, clip=0.000, oom=0.000, loss_scale=8.000, wall=2529, train_wall=2271 | |
| epoch 001: 8470 / 8862 loss=6.665, nll_loss=5.328, ppl=40.18, wps=50765, ups=3, wpb=15179.090, bsz=557.496, num_updates=8466, lr=0.000343685, gnorm=0.944, clip=0.000, oom=0.000, loss_scale=8.000, wall=2531, train_wall=2274 | |
| epoch 001: 8480 / 8862 loss=6.663, nll_loss=5.326, ppl=40.11, wps=50767, ups=3, wpb=15179.078, bsz=557.521, num_updates=8476, lr=0.000343482, gnorm=0.944, clip=0.000, oom=0.000, loss_scale=8.000, wall=2534, train_wall=2276 | |
| epoch 001: 8490 / 8862 loss=6.661, nll_loss=5.323, ppl=40.04, wps=50768, ups=3, wpb=15178.958, bsz=557.536, num_updates=8486, lr=0.00034328, gnorm=0.943, clip=0.000, oom=0.000, loss_scale=8.000, wall=2537, train_wall=2279 | |
| epoch 001: 8500 / 8862 loss=6.658, nll_loss=5.321, ppl=39.97, wps=50771, ups=3, wpb=15179.229, bsz=557.550, num_updates=8496, lr=0.000343078, gnorm=0.943, clip=0.000, oom=0.000, loss_scale=8.000, wall=2540, train_wall=2282 | |
| epoch 001: 8510 / 8862 loss=6.656, nll_loss=5.318, ppl=39.89, wps=50772, ups=3, wpb=15179.128, bsz=557.563, num_updates=8506, lr=0.000342876, gnorm=0.943, clip=0.000, oom=0.000, loss_scale=8.000, wall=2543, train_wall=2284 | |
| epoch 001: 8520 / 8862 loss=6.654, nll_loss=5.315, ppl=39.82, wps=50775, ups=3, wpb=15179.370, bsz=557.563, num_updates=8516, lr=0.000342675, gnorm=0.942, clip=0.000, oom=0.000, loss_scale=8.000, wall=2546, train_wall=2287 | |
| epoch 001: 8530 / 8862 loss=6.651, nll_loss=5.313, ppl=39.75, wps=50777, ups=3, wpb=15179.158, bsz=557.542, num_updates=8526, lr=0.000342474, gnorm=0.942, clip=0.000, oom=0.000, loss_scale=8.000, wall=2549, train_wall=2290 | |
| epoch 001: 8540 / 8862 loss=6.649, nll_loss=5.310, ppl=39.68, wps=50779, ups=3, wpb=15179.390, bsz=557.631, num_updates=8536, lr=0.000342273, gnorm=0.941, clip=0.000, oom=0.000, loss_scale=8.000, wall=2552, train_wall=2292 | |
| epoch 001: 8550 / 8862 loss=6.647, nll_loss=5.308, ppl=39.61, wps=50780, ups=3, wpb=15179.154, bsz=557.620, num_updates=8546, lr=0.000342073, gnorm=0.941, clip=0.000, oom=0.000, loss_scale=8.000, wall=2555, train_wall=2295 | |
| epoch 001: 8560 / 8862 loss=6.645, nll_loss=5.306, ppl=39.55, wps=50780, ups=3, wpb=15178.527, bsz=557.569, num_updates=8556, lr=0.000341873, gnorm=0.941, clip=0.000, oom=0.000, loss_scale=8.000, wall=2557, train_wall=2298 | |
| epoch 001: 8570 / 8862 loss=6.643, nll_loss=5.303, ppl=39.48, wps=50782, ups=3, wpb=15178.342, bsz=557.615, num_updates=8566, lr=0.000341673, gnorm=0.940, clip=0.000, oom=0.000, loss_scale=8.000, wall=2560, train_wall=2300 | |
| epoch 001: 8580 / 8862 loss=6.641, nll_loss=5.301, ppl=39.41, wps=50784, ups=3, wpb=15178.561, bsz=557.689, num_updates=8576, lr=0.000341474, gnorm=0.940, clip=0.000, oom=0.000, loss_scale=8.000, wall=2563, train_wall=2303 | |
| epoch 001: 8590 / 8862 loss=6.638, nll_loss=5.298, ppl=39.34, wps=50787, ups=3, wpb=15178.968, bsz=557.699, num_updates=8586, lr=0.000341275, gnorm=0.940, clip=0.000, oom=0.000, loss_scale=8.000, wall=2566, train_wall=2306 | |
| epoch 001: 8600 / 8862 loss=6.636, nll_loss=5.296, ppl=39.27, wps=50789, ups=3, wpb=15178.827, bsz=557.701, num_updates=8596, lr=0.000341076, gnorm=0.939, clip=0.000, oom=0.000, loss_scale=8.000, wall=2569, train_wall=2308 | |
| epoch 001: 8610 / 8862 loss=6.634, nll_loss=5.293, ppl=39.20, wps=50790, ups=3, wpb=15178.809, bsz=557.787, num_updates=8606, lr=0.000340878, gnorm=0.939, clip=0.000, oom=0.000, loss_scale=8.000, wall=2572, train_wall=2311 | |
| epoch 001: 8620 / 8862 loss=6.632, nll_loss=5.291, ppl=39.14, wps=50792, ups=3, wpb=15179.062, bsz=557.809, num_updates=8616, lr=0.00034068, gnorm=0.939, clip=0.000, oom=0.000, loss_scale=8.000, wall=2575, train_wall=2314 | |
| epoch 001: 8630 / 8862 loss=6.629, nll_loss=5.288, ppl=39.07, wps=50794, ups=3, wpb=15179.294, bsz=557.797, num_updates=8626, lr=0.000340483, gnorm=0.938, clip=0.000, oom=0.000, loss_scale=8.000, wall=2578, train_wall=2317 | |
| epoch 001: 8640 / 8862 loss=6.627, nll_loss=5.285, ppl=39.00, wps=50797, ups=3, wpb=15179.389, bsz=557.785, num_updates=8636, lr=0.000340286, gnorm=0.938, clip=0.000, oom=0.000, loss_scale=8.000, wall=2581, train_wall=2319 | |
| epoch 001: 8650 / 8862 loss=6.625, nll_loss=5.283, ppl=38.93, wps=50799, ups=3, wpb=15179.282, bsz=557.747, num_updates=8646, lr=0.000340089, gnorm=0.938, clip=0.000, oom=0.000, loss_scale=8.000, wall=2584, train_wall=2322 | |
| epoch 001: 8660 / 8862 loss=6.623, nll_loss=5.280, ppl=38.87, wps=50799, ups=3, wpb=15178.822, bsz=557.792, num_updates=8656, lr=0.000339892, gnorm=0.937, clip=0.000, oom=0.000, loss_scale=8.000, wall=2586, train_wall=2325 | |
| epoch 001: 8670 / 8862 loss=6.621, nll_loss=5.278, ppl=38.80, wps=50800, ups=3, wpb=15178.566, bsz=557.679, num_updates=8666, lr=0.000339696, gnorm=0.937, clip=0.000, oom=0.000, loss_scale=8.000, wall=2589, train_wall=2327 | |
| epoch 001: 8680 / 8862 loss=6.618, nll_loss=5.276, ppl=38.73, wps=50802, ups=3, wpb=15178.557, bsz=557.767, num_updates=8676, lr=0.0003395, gnorm=0.936, clip=0.000, oom=0.000, loss_scale=8.000, wall=2592, train_wall=2330 | |
| epoch 001: 8690 / 8862 loss=6.616, nll_loss=5.273, ppl=38.67, wps=50804, ups=3, wpb=15178.475, bsz=557.697, num_updates=8686, lr=0.000339305, gnorm=0.936, clip=0.000, oom=0.000, loss_scale=8.000, wall=2595, train_wall=2333 | |
| epoch 001: 8700 / 8862 loss=6.614, nll_loss=5.271, ppl=38.61, wps=50805, ups=3, wpb=15178.484, bsz=557.713, num_updates=8696, lr=0.00033911, gnorm=0.936, clip=0.000, oom=0.000, loss_scale=8.000, wall=2598, train_wall=2335 | |
| epoch 001: 8710 / 8862 loss=6.612, nll_loss=5.268, ppl=38.54, wps=50807, ups=3, wpb=15178.360, bsz=557.769, num_updates=8706, lr=0.000338915, gnorm=0.935, clip=0.000, oom=0.000, loss_scale=8.000, wall=2601, train_wall=2338 | |
| epoch 001: 8720 / 8862 loss=6.610, nll_loss=5.266, ppl=38.48, wps=50809, ups=3, wpb=15178.348, bsz=557.748, num_updates=8716, lr=0.00033872, gnorm=0.935, clip=0.000, oom=0.000, loss_scale=8.000, wall=2604, train_wall=2341 | |
| epoch 001: 8730 / 8862 loss=6.608, nll_loss=5.264, ppl=38.42, wps=50811, ups=3, wpb=15178.558, bsz=557.669, num_updates=8726, lr=0.000338526, gnorm=0.935, clip=0.000, oom=0.000, loss_scale=8.000, wall=2607, train_wall=2343 | |
| epoch 001: 8740 / 8862 loss=6.606, nll_loss=5.261, ppl=38.36, wps=50813, ups=3, wpb=15178.510, bsz=557.601, num_updates=8736, lr=0.000338332, gnorm=0.934, clip=0.000, oom=0.000, loss_scale=8.000, wall=2610, train_wall=2346 | |
| epoch 001: 8750 / 8862 loss=6.604, nll_loss=5.259, ppl=38.30, wps=50814, ups=3, wpb=15178.173, bsz=557.472, num_updates=8746, lr=0.000338139, gnorm=0.934, clip=0.000, oom=0.000, loss_scale=8.000, wall=2612, train_wall=2349 | |
| epoch 001: 8760 / 8862 loss=6.602, nll_loss=5.257, ppl=38.23, wps=50817, ups=3, wpb=15178.464, bsz=557.436, num_updates=8756, lr=0.000337946, gnorm=0.934, clip=0.000, oom=0.000, loss_scale=8.000, wall=2615, train_wall=2351 | |
| epoch 001: 8770 / 8862 loss=6.600, nll_loss=5.254, ppl=38.17, wps=50819, ups=3, wpb=15178.450, bsz=557.476, num_updates=8766, lr=0.000337753, gnorm=0.933, clip=0.000, oom=0.000, loss_scale=8.000, wall=2618, train_wall=2354 | |
| epoch 001: 8780 / 8862 loss=6.598, nll_loss=5.252, ppl=38.11, wps=50819, ups=3, wpb=15178.035, bsz=557.515, num_updates=8776, lr=0.000337561, gnorm=0.933, clip=0.000, oom=0.000, loss_scale=8.000, wall=2621, train_wall=2357 | |
| epoch 001: 8790 / 8862 loss=6.596, nll_loss=5.250, ppl=38.05, wps=50820, ups=3, wpb=15177.732, bsz=557.485, num_updates=8786, lr=0.000337368, gnorm=0.933, clip=0.000, oom=0.000, loss_scale=8.000, wall=2624, train_wall=2359 | |
| epoch 001: 8800 / 8862 loss=6.594, nll_loss=5.247, ppl=37.98, wps=50822, ups=3, wpb=15177.632, bsz=557.460, num_updates=8796, lr=0.000337177, gnorm=0.932, clip=0.000, oom=0.000, loss_scale=8.000, wall=2627, train_wall=2362 | |
| epoch 001: 8810 / 8862 loss=6.591, nll_loss=5.245, ppl=37.92, wps=50824, ups=3, wpb=15177.854, bsz=557.481, num_updates=8806, lr=0.000336985, gnorm=0.932, clip=0.000, oom=0.000, loss_scale=8.000, wall=2630, train_wall=2365 | |
| epoch 001: 8820 / 8862 loss=6.589, nll_loss=5.242, ppl=37.85, wps=50825, ups=3, wpb=15177.874, bsz=557.627, num_updates=8816, lr=0.000336794, gnorm=0.932, clip=0.000, oom=0.000, loss_scale=8.000, wall=2633, train_wall=2367 | |
| epoch 001: 8830 / 8862 loss=6.587, nll_loss=5.240, ppl=37.79, wps=50827, ups=3, wpb=15177.675, bsz=557.578, num_updates=8826, lr=0.000336603, gnorm=0.931, clip=0.000, oom=0.000, loss_scale=8.000, wall=2636, train_wall=2370 | |
| epoch 001: 8840 / 8862 loss=6.585, nll_loss=5.237, ppl=37.72, wps=50828, ups=3, wpb=15177.829, bsz=557.632, num_updates=8836, lr=0.000336413, gnorm=0.931, clip=0.000, oom=0.000, loss_scale=8.000, wall=2639, train_wall=2373 | |
| epoch 001: 8850 / 8862 loss=6.583, nll_loss=5.235, ppl=37.66, wps=50830, ups=3, wpb=15178.100, bsz=557.693, num_updates=8846, lr=0.000336222, gnorm=0.931, clip=0.000, oom=0.000, loss_scale=8.000, wall=2641, train_wall=2375 | |
| epoch 001: 8860 / 8862 loss=6.581, nll_loss=5.233, ppl=37.60, wps=50831, ups=3, wpb=15178.003, bsz=557.542, num_updates=8856, lr=0.000336032, gnorm=0.930, clip=0.000, oom=0.000, loss_scale=8.000, wall=2644, train_wall=2378 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 001 | loss 6.580 | nll_loss 5.232 | ppl 37.59 | wps 50823 | ups 3 | wpb 15177.643 | bsz 557.517 | num_updates 8857 | lr 0.000336013 | gnorm 0.930 | clip 0.000 | oom 0.000 | loss_scale 8.000 | wall 2645 | train_wall 2378 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| WARNING: 2459 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[34935, 29199, 25522, 50610, 31640, 50522, 29514, 23772, 21318, 30173] | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 001 | valid on 'valid' subset | loss 4.613 | nll_loss 2.852 | ppl 7.22 | num_updates 8857 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 002: 10 / 8862 loss=4.699, nll_loss=3.097, ppl=8.56, wps=51397, ups=0, wpb=14989.818, bsz=513.455, num_updates=8868, lr=0.000335805, gnorm=0.619, clip=0.000, oom=0.000, loss_scale=8.000, wall=2694, train_wall=2383 | |
| epoch 002: 20 / 8862 loss=4.651, nll_loss=3.044, ppl=8.25, wps=51256, ups=0, wpb=15080.238, bsz=553.905, num_updates=8878, lr=0.000335616, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2697, train_wall=2386 | |
| epoch 002: 30 / 8862 loss=4.669, nll_loss=3.064, ppl=8.37, wps=51374, ups=1, wpb=15149.194, bsz=575.742, num_updates=8888, lr=0.000335427, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=2700, train_wall=2389 | |
| epoch 002: 40 / 8862 loss=4.680, nll_loss=3.077, ppl=8.44, wps=51556, ups=1, wpb=15155.683, bsz=572.293, num_updates=8898, lr=0.000335238, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=2703, train_wall=2391 | |
| epoch 002: 50 / 8862 loss=4.685, nll_loss=3.083, ppl=8.48, wps=51394, ups=1, wpb=15139.902, bsz=575.843, num_updates=8908, lr=0.00033505, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2706, train_wall=2394 | |
| epoch 002: 60 / 8862 loss=4.703, nll_loss=3.103, ppl=8.59, wps=51471, ups=1, wpb=15154.082, bsz=560.000, num_updates=8918, lr=0.000334862, gnorm=0.608, clip=0.000, oom=0.000, loss_scale=8.000, wall=2709, train_wall=2397 | |
| epoch 002: 70 / 8862 loss=4.704, nll_loss=3.105, ppl=8.60, wps=51589, ups=1, wpb=15180.606, bsz=554.479, num_updates=8928, lr=0.000334675, gnorm=0.607, clip=0.000, oom=0.000, loss_scale=8.000, wall=2712, train_wall=2399 | |
| epoch 002: 80 / 8862 loss=4.705, nll_loss=3.106, ppl=8.61, wps=51645, ups=1, wpb=15197.173, bsz=558.914, num_updates=8938, lr=0.000334487, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2715, train_wall=2402 | |
| epoch 002: 90 / 8862 loss=4.695, nll_loss=3.094, ppl=8.54, wps=51673, ups=1, wpb=15200.110, bsz=564.659, num_updates=8948, lr=0.0003343, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=2718, train_wall=2405 | |
| epoch 002: 100 / 8862 loss=4.685, nll_loss=3.083, ppl=8.47, wps=51607, ups=1, wpb=15205.178, bsz=571.564, num_updates=8958, lr=0.000334114, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=2721, train_wall=2408 | |
| epoch 002: 110 / 8862 loss=4.684, nll_loss=3.081, ppl=8.46, wps=51588, ups=1, wpb=15182.477, bsz=562.667, num_updates=8968, lr=0.000333928, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=2724, train_wall=2410 | |
| epoch 002: 120 / 8862 loss=4.678, nll_loss=3.075, ppl=8.43, wps=51629, ups=1, wpb=15191.132, bsz=561.851, num_updates=8978, lr=0.000333741, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=2727, train_wall=2413 | |
| epoch 002: 130 / 8862 loss=4.669, nll_loss=3.064, ppl=8.36, wps=51701, ups=2, wpb=15203.389, bsz=567.145, num_updates=8988, lr=0.000333556, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=2730, train_wall=2416 | |
| epoch 002: 140 / 8862 loss=4.669, nll_loss=3.065, ppl=8.37, wps=51767, ups=2, wpb=15205.262, bsz=566.979, num_updates=8998, lr=0.00033337, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=2733, train_wall=2418 | |
| epoch 002: 150 / 8862 loss=4.671, nll_loss=3.067, ppl=8.38, wps=51777, ups=2, wpb=15211.550, bsz=568.795, num_updates=9008, lr=0.000333185, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=2736, train_wall=2421 | |
| epoch 002: 160 / 8862 loss=4.667, nll_loss=3.063, ppl=8.36, wps=51680, ups=2, wpb=15220.509, bsz=586.634, num_updates=9018, lr=0.000333, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=2739, train_wall=2424 | |
| epoch 002: 170 / 8862 loss=4.670, nll_loss=3.066, ppl=8.38, wps=51644, ups=2, wpb=15205.836, bsz=585.825, num_updates=9028, lr=0.000332816, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=2742, train_wall=2427 | |
| epoch 002: 180 / 8862 loss=4.672, nll_loss=3.068, ppl=8.39, wps=51560, ups=2, wpb=15181.785, bsz=586.210, num_updates=9038, lr=0.000332632, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=2744, train_wall=2429 | |
| epoch 002: 190 / 8862 loss=4.673, nll_loss=3.069, ppl=8.39, wps=51568, ups=2, wpb=15180.351, bsz=585.173, num_updates=9048, lr=0.000332448, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=2747, train_wall=2432 | |
| epoch 002: 200 / 8862 loss=4.671, nll_loss=3.068, ppl=8.39, wps=51485, ups=2, wpb=15167.219, bsz=582.408, num_updates=9058, lr=0.000332264, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=2750, train_wall=2435 | |
| epoch 002: 210 / 8862 loss=4.677, nll_loss=3.074, ppl=8.42, wps=51441, ups=2, wpb=15167.957, bsz=577.820, num_updates=9068, lr=0.000332081, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=2753, train_wall=2438 | |
| epoch 002: 220 / 8862 loss=4.678, nll_loss=3.075, ppl=8.43, wps=51347, ups=2, wpb=15156.163, bsz=578.860, num_updates=9078, lr=0.000331898, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2756, train_wall=2440 | |
| epoch 002: 230 / 8862 loss=4.681, nll_loss=3.079, ppl=8.45, wps=51344, ups=2, wpb=15163.143, bsz=576.762, num_updates=9088, lr=0.000331716, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2759, train_wall=2443 | |
| epoch 002: 240 / 8862 loss=4.680, nll_loss=3.078, ppl=8.44, wps=51318, ups=2, wpb=15163.992, bsz=575.668, num_updates=9098, lr=0.000331533, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2762, train_wall=2446 | |
| epoch 002: 250 / 8862 loss=4.677, nll_loss=3.075, ppl=8.42, wps=51301, ups=2, wpb=15164.689, bsz=574.853, num_updates=9108, lr=0.000331351, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2765, train_wall=2449 | |
| epoch 002: 260 / 8862 loss=4.676, nll_loss=3.074, ppl=8.42, wps=51274, ups=2, wpb=15163.958, bsz=578.115, num_updates=9118, lr=0.000331169, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2768, train_wall=2452 | |
| epoch 002: 270 / 8862 loss=4.676, nll_loss=3.073, ppl=8.42, wps=51295, ups=2, wpb=15172.863, bsz=575.675, num_updates=9128, lr=0.000330988, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2771, train_wall=2454 | |
| epoch 002: 280 / 8862 loss=4.677, nll_loss=3.074, ppl=8.42, wps=51277, ups=2, wpb=15170.875, bsz=574.833, num_updates=9138, lr=0.000330807, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2774, train_wall=2457 | |
| epoch 002: 290 / 8862 loss=4.671, nll_loss=3.068, ppl=8.39, wps=51248, ups=2, wpb=15173.498, bsz=579.546, num_updates=9148, lr=0.000330626, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2777, train_wall=2460 | |
| epoch 002: 300 / 8862 loss=4.667, nll_loss=3.063, ppl=8.36, wps=51216, ups=2, wpb=15174.017, bsz=585.302, num_updates=9158, lr=0.000330445, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2780, train_wall=2463 | |
| epoch 002: 310 / 8862 loss=4.665, nll_loss=3.061, ppl=8.35, wps=51210, ups=2, wpb=15174.621, bsz=585.878, num_updates=9168, lr=0.000330265, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2783, train_wall=2465 | |
| epoch 002: 320 / 8862 loss=4.670, nll_loss=3.067, ppl=8.38, wps=51163, ups=2, wpb=15164.701, bsz=588.461, num_updates=9178, lr=0.000330085, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=2786, train_wall=2468 | |
| epoch 002: 330 / 8862 loss=4.670, nll_loss=3.067, ppl=8.38, wps=51169, ups=2, wpb=15169.369, bsz=586.707, num_updates=9188, lr=0.000329905, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=2789, train_wall=2471 | |
| epoch 002: 340 / 8862 loss=4.669, nll_loss=3.066, ppl=8.37, wps=51156, ups=2, wpb=15170.372, bsz=587.613, num_updates=9198, lr=0.000329726, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2792, train_wall=2474 | |
| epoch 002: 350 / 8862 loss=4.669, nll_loss=3.065, ppl=8.37, wps=51163, ups=2, wpb=15175.684, bsz=587.761, num_updates=9208, lr=0.000329547, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2795, train_wall=2476 | |
| epoch 002: 360 / 8862 loss=4.671, nll_loss=3.068, ppl=8.38, wps=51154, ups=2, wpb=15175.668, bsz=586.127, num_updates=9218, lr=0.000329368, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2798, train_wall=2479 | |
| epoch 002: 370 / 8862 loss=4.670, nll_loss=3.066, ppl=8.38, wps=51156, ups=2, wpb=15184.423, bsz=588.658, num_updates=9228, lr=0.00032919, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2801, train_wall=2482 | |
| epoch 002: 380 / 8862 loss=4.669, nll_loss=3.065, ppl=8.37, wps=51135, ups=2, wpb=15182.255, bsz=588.514, num_updates=9238, lr=0.000329011, gnorm=0.607, clip=0.000, oom=0.000, loss_scale=8.000, wall=2804, train_wall=2485 | |
| epoch 002: 390 / 8862 loss=4.670, nll_loss=3.067, ppl=8.38, wps=51106, ups=2, wpb=15173.627, bsz=586.824, num_updates=9248, lr=0.000328834, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2807, train_wall=2488 | |
| epoch 002: 400 / 8862 loss=4.669, nll_loss=3.066, ppl=8.37, wps=51128, ups=2, wpb=15182.823, bsz=584.958, num_updates=9258, lr=0.000328656, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2810, train_wall=2490 | |
| epoch 002: 410 / 8862 loss=4.667, nll_loss=3.063, ppl=8.36, wps=51128, ups=2, wpb=15188.049, bsz=585.946, num_updates=9268, lr=0.000328479, gnorm=0.607, clip=0.000, oom=0.000, loss_scale=8.000, wall=2813, train_wall=2493 | |
| epoch 002: 420 / 8862 loss=4.666, nll_loss=3.062, ppl=8.35, wps=51124, ups=2, wpb=15191.416, bsz=586.489, num_updates=9278, lr=0.000328301, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2816, train_wall=2496 | |
| epoch 002: 430 / 8862 loss=4.667, nll_loss=3.063, ppl=8.36, wps=51129, ups=2, wpb=15193.251, bsz=584.761, num_updates=9288, lr=0.000328125, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2819, train_wall=2499 | |
| epoch 002: 440 / 8862 loss=4.666, nll_loss=3.063, ppl=8.35, wps=51129, ups=2, wpb=15194.206, bsz=583.438, num_updates=9298, lr=0.000327948, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2822, train_wall=2501 | |
| epoch 002: 450 / 8862 loss=4.667, nll_loss=3.063, ppl=8.36, wps=51113, ups=3, wpb=15190.166, bsz=581.747, num_updates=9308, lr=0.000327772, gnorm=0.606, clip=0.000, oom=0.000, loss_scale=8.000, wall=2825, train_wall=2504 | |
| epoch 002: 460 / 8862 loss=4.665, nll_loss=3.062, ppl=8.35, wps=51104, ups=3, wpb=15190.328, bsz=582.889, num_updates=9318, lr=0.000327596, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2828, train_wall=2507 | |
| epoch 002: 470 / 8862 loss=4.664, nll_loss=3.059, ppl=8.34, wps=51107, ups=3, wpb=15194.936, bsz=582.709, num_updates=9328, lr=0.00032742, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2831, train_wall=2510 | |
| epoch 002: 480 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51102, ups=3, wpb=15193.202, bsz=581.422, num_updates=9338, lr=0.000327245, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2834, train_wall=2513 | |
| epoch 002: 490 / 8862 loss=4.663, nll_loss=3.059, ppl=8.33, wps=51104, ups=3, wpb=15195.605, bsz=580.399, num_updates=9348, lr=0.00032707, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2837, train_wall=2515 | |
| epoch 002: 500 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51107, ups=3, wpb=15199.333, bsz=580.263, num_updates=9358, lr=0.000326895, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2840, train_wall=2518 | |
| epoch 002: 510 / 8862 loss=4.663, nll_loss=3.059, ppl=8.33, wps=51109, ups=3, wpb=15201.135, bsz=580.775, num_updates=9368, lr=0.000326721, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2843, train_wall=2521 | |
| epoch 002: 520 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51086, ups=3, wpb=15195.244, bsz=579.317, num_updates=9378, lr=0.000326546, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2846, train_wall=2524 | |
| epoch 002: 530 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51079, ups=3, wpb=15191.708, bsz=577.959, num_updates=9388, lr=0.000326372, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2849, train_wall=2526 | |
| epoch 002: 540 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51075, ups=3, wpb=15190.497, bsz=576.222, num_updates=9398, lr=0.000326199, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2852, train_wall=2529 | |
| epoch 002: 550 / 8862 loss=4.663, nll_loss=3.059, ppl=8.33, wps=51080, ups=3, wpb=15193.060, bsz=575.260, num_updates=9408, lr=0.000326025, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2855, train_wall=2532 | |
| epoch 002: 560 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51075, ups=3, wpb=15190.727, bsz=574.018, num_updates=9418, lr=0.000325852, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2858, train_wall=2535 | |
| epoch 002: 570 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51084, ups=3, wpb=15193.060, bsz=573.772, num_updates=9428, lr=0.000325679, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2861, train_wall=2537 | |
| epoch 002: 580 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51073, ups=3, wpb=15189.888, bsz=572.902, num_updates=9438, lr=0.000325507, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2864, train_wall=2540 | |
| epoch 002: 590 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51044, ups=3, wpb=15185.091, bsz=575.377, num_updates=9448, lr=0.000325334, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2867, train_wall=2543 | |
| epoch 002: 600 / 8862 loss=4.665, nll_loss=3.062, ppl=8.35, wps=51044, ups=3, wpb=15179.260, bsz=575.907, num_updates=9458, lr=0.000325162, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2870, train_wall=2546 | |
| epoch 002: 610 / 8862 loss=4.666, nll_loss=3.063, ppl=8.36, wps=51065, ups=3, wpb=15177.105, bsz=574.180, num_updates=9468, lr=0.000324991, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2873, train_wall=2548 | |
| epoch 002: 620 / 8862 loss=4.666, nll_loss=3.062, ppl=8.35, wps=51093, ups=3, wpb=15178.301, bsz=575.098, num_updates=9478, lr=0.000324819, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2876, train_wall=2551 | |
| epoch 002: 630 / 8862 loss=4.664, nll_loss=3.061, ppl=8.34, wps=51116, ups=3, wpb=15181.349, bsz=576.228, num_updates=9488, lr=0.000324648, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2879, train_wall=2554 | |
| epoch 002: 640 / 8862 loss=4.664, nll_loss=3.060, ppl=8.34, wps=51152, ups=3, wpb=15185.184, bsz=575.339, num_updates=9498, lr=0.000324477, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2881, train_wall=2556 | |
| epoch 002: 650 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51183, ups=3, wpb=15187.210, bsz=575.656, num_updates=9508, lr=0.000324306, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2884, train_wall=2559 | |
| epoch 002: 660 / 8862 loss=4.661, nll_loss=3.057, ppl=8.32, wps=51216, ups=3, wpb=15191.811, bsz=575.201, num_updates=9518, lr=0.000324136, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2887, train_wall=2562 | |
| epoch 002: 670 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51232, ups=3, wpb=15190.502, bsz=575.690, num_updates=9528, lr=0.000323966, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2890, train_wall=2564 | |
| epoch 002: 680 / 8862 loss=4.658, nll_loss=3.054, ppl=8.30, wps=51255, ups=3, wpb=15194.905, bsz=577.668, num_updates=9538, lr=0.000323796, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2893, train_wall=2567 | |
| epoch 002: 690 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51268, ups=3, wpb=15191.404, bsz=576.463, num_updates=9548, lr=0.000323626, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2896, train_wall=2570 | |
| epoch 002: 700 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51291, ups=3, wpb=15191.134, bsz=574.939, num_updates=9558, lr=0.000323457, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2899, train_wall=2572 | |
| epoch 002: 710 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51315, ups=3, wpb=15192.973, bsz=574.402, num_updates=9568, lr=0.000323288, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2902, train_wall=2575 | |
| epoch 002: 720 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51336, ups=3, wpb=15193.026, bsz=573.836, num_updates=9578, lr=0.000323119, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2905, train_wall=2578 | |
| epoch 002: 730 / 8862 loss=4.661, nll_loss=3.058, ppl=8.33, wps=51344, ups=3, wpb=15191.174, bsz=574.161, num_updates=9588, lr=0.000322951, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2907, train_wall=2580 | |
| epoch 002: 740 / 8862 loss=4.660, nll_loss=3.056, ppl=8.31, wps=51360, ups=3, wpb=15191.093, bsz=574.359, num_updates=9598, lr=0.000322782, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2910, train_wall=2583 | |
| epoch 002: 750 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51374, ups=3, wpb=15189.057, bsz=574.434, num_updates=9608, lr=0.000322614, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2913, train_wall=2586 | |
| epoch 002: 760 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51387, ups=3, wpb=15188.812, bsz=573.782, num_updates=9618, lr=0.000322446, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2916, train_wall=2588 | |
| epoch 002: 770 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51404, ups=3, wpb=15189.084, bsz=573.780, num_updates=9628, lr=0.000322279, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2919, train_wall=2591 | |
| epoch 002: 780 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51396, ups=3, wpb=15184.266, bsz=574.781, num_updates=9638, lr=0.000322112, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2922, train_wall=2594 | |
| epoch 002: 790 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51420, ups=3, wpb=15185.828, bsz=574.594, num_updates=9648, lr=0.000321945, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2925, train_wall=2596 | |
| epoch 002: 800 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51438, ups=3, wpb=15187.125, bsz=574.302, num_updates=9658, lr=0.000321778, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2928, train_wall=2599 | |
| epoch 002: 810 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51453, ups=3, wpb=15186.440, bsz=574.284, num_updates=9668, lr=0.000321612, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2931, train_wall=2602 | |
| epoch 002: 820 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51468, ups=3, wpb=15185.758, bsz=573.983, num_updates=9678, lr=0.000321445, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2933, train_wall=2604 | |
| epoch 002: 830 / 8862 loss=4.661, nll_loss=3.057, ppl=8.32, wps=51483, ups=3, wpb=15184.951, bsz=573.131, num_updates=9688, lr=0.000321279, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2936, train_wall=2607 | |
| epoch 002: 840 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51506, ups=3, wpb=15187.640, bsz=572.461, num_updates=9698, lr=0.000321114, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2939, train_wall=2610 | |
| epoch 002: 850 / 8862 loss=4.661, nll_loss=3.058, ppl=8.33, wps=51508, ups=3, wpb=15184.128, bsz=572.625, num_updates=9708, lr=0.000320948, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2942, train_wall=2612 | |
| epoch 002: 860 / 8862 loss=4.662, nll_loss=3.059, ppl=8.33, wps=51525, ups=3, wpb=15184.967, bsz=571.698, num_updates=9718, lr=0.000320783, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2945, train_wall=2615 | |
| epoch 002: 870 / 8862 loss=4.662, nll_loss=3.059, ppl=8.33, wps=51541, ups=3, wpb=15185.085, bsz=570.930, num_updates=9728, lr=0.000320618, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2948, train_wall=2618 | |
| epoch 002: 880 / 8862 loss=4.663, nll_loss=3.060, ppl=8.34, wps=51557, ups=3, wpb=15185.300, bsz=569.898, num_updates=9738, lr=0.000320454, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2951, train_wall=2620 | |
| epoch 002: 890 / 8862 loss=4.663, nll_loss=3.059, ppl=8.33, wps=51572, ups=3, wpb=15185.747, bsz=569.796, num_updates=9748, lr=0.000320289, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=2954, train_wall=2623 | |
| epoch 002: 900 / 8862 loss=4.662, nll_loss=3.059, ppl=8.33, wps=51598, ups=3, wpb=15189.459, bsz=568.861, num_updates=9758, lr=0.000320125, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2956, train_wall=2626 | |
| epoch 002: 910 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51610, ups=3, wpb=15189.749, bsz=568.676, num_updates=9768, lr=0.000319961, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2959, train_wall=2628 | |
| epoch 002: 920 / 8862 loss=4.662, nll_loss=3.058, ppl=8.33, wps=51616, ups=3, wpb=15188.270, bsz=568.686, num_updates=9778, lr=0.000319797, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2962, train_wall=2631 | |
| epoch 002: 930 / 8862 loss=4.661, nll_loss=3.057, ppl=8.32, wps=51632, ups=3, wpb=15189.490, bsz=568.438, num_updates=9788, lr=0.000319634, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2965, train_wall=2634 | |
| epoch 002: 940 / 8862 loss=4.659, nll_loss=3.056, ppl=8.31, wps=51656, ups=3, wpb=15193.677, bsz=568.689, num_updates=9798, lr=0.000319471, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2968, train_wall=2636 | |
| epoch 002: 950 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51664, ups=3, wpb=15192.570, bsz=568.185, num_updates=9808, lr=0.000319308, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2971, train_wall=2639 | |
| epoch 002: 960 / 8862 loss=4.658, nll_loss=3.054, ppl=8.31, wps=51673, ups=3, wpb=15192.422, bsz=567.742, num_updates=9818, lr=0.000319145, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2974, train_wall=2642 | |
| epoch 002: 970 / 8862 loss=4.658, nll_loss=3.054, ppl=8.30, wps=51682, ups=3, wpb=15191.518, bsz=567.333, num_updates=9828, lr=0.000318983, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2977, train_wall=2644 | |
| epoch 002: 980 / 8862 loss=4.658, nll_loss=3.054, ppl=8.31, wps=51693, ups=3, wpb=15190.773, bsz=566.646, num_updates=9838, lr=0.000318821, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=2979, train_wall=2647 | |
| epoch 002: 990 / 8862 loss=4.658, nll_loss=3.055, ppl=8.31, wps=51711, ups=3, wpb=15193.444, bsz=566.232, num_updates=9848, lr=0.000318659, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2982, train_wall=2650 | |
| epoch 002: 1000 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51724, ups=3, wpb=15193.847, bsz=565.275, num_updates=9858, lr=0.000318497, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2985, train_wall=2652 | |
| epoch 002: 1010 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51733, ups=3, wpb=15193.977, bsz=564.645, num_updates=9868, lr=0.000318336, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2988, train_wall=2655 | |
| epoch 002: 1020 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51743, ups=3, wpb=15193.004, bsz=563.871, num_updates=9878, lr=0.000318175, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2991, train_wall=2658 | |
| epoch 002: 1030 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51756, ups=3, wpb=15194.193, bsz=563.864, num_updates=9888, lr=0.000318014, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2994, train_wall=2660 | |
| epoch 002: 1040 / 8862 loss=4.660, nll_loss=3.056, ppl=8.32, wps=51757, ups=3, wpb=15192.875, bsz=564.734, num_updates=9898, lr=0.000317853, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=2997, train_wall=2663 | |
| epoch 002: 1050 / 8862 loss=4.662, nll_loss=3.059, ppl=8.33, wps=51761, ups=3, wpb=15190.469, bsz=564.171, num_updates=9908, lr=0.000317693, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3000, train_wall=2666 | |
| epoch 002: 1060 / 8862 loss=4.663, nll_loss=3.060, ppl=8.34, wps=51766, ups=3, wpb=15189.074, bsz=563.551, num_updates=9918, lr=0.000317532, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3002, train_wall=2668 | |
| epoch 002: 1070 / 8862 loss=4.663, nll_loss=3.060, ppl=8.34, wps=51769, ups=3, wpb=15186.749, bsz=563.563, num_updates=9928, lr=0.000317372, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=3005, train_wall=2671 | |
| epoch 002: 1080 / 8862 loss=4.663, nll_loss=3.059, ppl=8.34, wps=51781, ups=3, wpb=15187.611, bsz=562.953, num_updates=9938, lr=0.000317213, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=3008, train_wall=2673 | |
| epoch 002: 1090 / 8862 loss=4.663, nll_loss=3.060, ppl=8.34, wps=51788, ups=3, wpb=15186.983, bsz=563.021, num_updates=9948, lr=0.000317053, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=3011, train_wall=2676 | |
| epoch 002: 1100 / 8862 loss=4.663, nll_loss=3.060, ppl=8.34, wps=51783, ups=3, wpb=15183.258, bsz=562.819, num_updates=9958, lr=0.000316894, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=3014, train_wall=2679 | |
| epoch 002: 1110 / 8862 loss=4.662, nll_loss=3.059, ppl=8.33, wps=51791, ups=3, wpb=15183.301, bsz=562.463, num_updates=9968, lr=0.000316735, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=3017, train_wall=2681 | |
| epoch 002: 1120 / 8862 loss=4.662, nll_loss=3.059, ppl=8.33, wps=51796, ups=3, wpb=15182.540, bsz=561.798, num_updates=9978, lr=0.000316576, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=3020, train_wall=2684 | |
| epoch 002: 1130 / 8862 loss=4.661, nll_loss=3.058, ppl=8.33, wps=51801, ups=3, wpb=15182.095, bsz=562.808, num_updates=9988, lr=0.000316418, gnorm=0.605, clip=0.000, oom=0.000, loss_scale=8.000, wall=3023, train_wall=2687 | |
| epoch 002: 1140 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51817, ups=3, wpb=15185.555, bsz=563.450, num_updates=9998, lr=0.000316259, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3026, train_wall=2689 | |
| epoch 002: 1150 / 8862 loss=4.657, nll_loss=3.054, ppl=8.30, wps=51826, ups=3, wpb=15187.215, bsz=564.156, num_updates=10008, lr=0.000316101, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3028, train_wall=2692 | |
| epoch 002: 1160 / 8862 loss=4.658, nll_loss=3.054, ppl=8.31, wps=51836, ups=3, wpb=15187.151, bsz=563.618, num_updates=10018, lr=0.000315944, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3031, train_wall=2695 | |
| epoch 002: 1170 / 8862 loss=4.658, nll_loss=3.055, ppl=8.31, wps=51841, ups=3, wpb=15186.102, bsz=563.026, num_updates=10028, lr=0.000315786, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3034, train_wall=2697 | |
| epoch 002: 1180 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51845, ups=3, wpb=15184.493, bsz=562.168, num_updates=10038, lr=0.000315629, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3037, train_wall=2700 | |
| epoch 002: 1190 / 8862 loss=4.658, nll_loss=3.054, ppl=8.30, wps=51856, ups=3, wpb=15185.759, bsz=562.364, num_updates=10048, lr=0.000315472, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3040, train_wall=2703 | |
| epoch 002: 1200 / 8862 loss=4.658, nll_loss=3.055, ppl=8.31, wps=51849, ups=3, wpb=15181.291, bsz=562.351, num_updates=10058, lr=0.000315315, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3043, train_wall=2705 | |
| epoch 002: 1210 / 8862 loss=4.659, nll_loss=3.056, ppl=8.31, wps=51854, ups=3, wpb=15180.340, bsz=561.988, num_updates=10068, lr=0.000315158, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3046, train_wall=2708 | |
| epoch 002: 1220 / 8862 loss=4.658, nll_loss=3.055, ppl=8.31, wps=51854, ups=3, wpb=15178.157, bsz=561.815, num_updates=10078, lr=0.000315002, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3049, train_wall=2711 | |
| epoch 002: 1230 / 8862 loss=4.658, nll_loss=3.054, ppl=8.31, wps=51861, ups=3, wpb=15178.399, bsz=562.145, num_updates=10088, lr=0.000314845, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3051, train_wall=2713 | |
| epoch 002: 1240 / 8862 loss=4.659, nll_loss=3.055, ppl=8.31, wps=51867, ups=3, wpb=15177.891, bsz=561.483, num_updates=10098, lr=0.00031469, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3054, train_wall=2716 | |
| epoch 002: 1250 / 8862 loss=4.659, nll_loss=3.056, ppl=8.32, wps=51876, ups=3, wpb=15178.138, bsz=561.081, num_updates=10108, lr=0.000314534, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3057, train_wall=2719 | |
| epoch 002: 1260 / 8862 loss=4.657, nll_loss=3.054, ppl=8.30, wps=51887, ups=3, wpb=15179.395, bsz=561.586, num_updates=10118, lr=0.000314378, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3060, train_wall=2721 | |
| epoch 002: 1270 / 8862 loss=4.657, nll_loss=3.053, ppl=8.30, wps=51890, ups=3, wpb=15178.419, bsz=561.706, num_updates=10128, lr=0.000314223, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3063, train_wall=2724 | |
| epoch 002: 1280 / 8862 loss=4.656, nll_loss=3.053, ppl=8.30, wps=51905, ups=3, wpb=15180.761, bsz=561.105, num_updates=10138, lr=0.000314068, gnorm=0.604, clip=0.000, oom=0.000, loss_scale=8.000, wall=3066, train_wall=2727 | |
| epoch 002: 1290 / 8862 loss=4.656, nll_loss=3.052, ppl=8.30, wps=51914, ups=3, wpb=15181.514, bsz=561.171, num_updates=10148, lr=0.000313913, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3069, train_wall=2729 | |
| epoch 002: 1300 / 8862 loss=4.656, nll_loss=3.052, ppl=8.29, wps=51920, ups=3, wpb=15181.113, bsz=561.636, num_updates=10158, lr=0.000313759, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3072, train_wall=2732 | |
| epoch 002: 1310 / 8862 loss=4.656, nll_loss=3.052, ppl=8.29, wps=51918, ups=3, wpb=15178.783, bsz=561.526, num_updates=10168, lr=0.000313604, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3074, train_wall=2735 | |
| epoch 002: 1320 / 8862 loss=4.655, nll_loss=3.051, ppl=8.29, wps=51916, ups=3, wpb=15176.709, bsz=561.859, num_updates=10178, lr=0.00031345, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3077, train_wall=2737 | |
| epoch 002: 1330 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=51924, ups=3, wpb=15177.128, bsz=561.563, num_updates=10188, lr=0.000313296, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3080, train_wall=2740 | |
| epoch 002: 1340 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=51930, ups=3, wpb=15176.854, bsz=561.128, num_updates=10198, lr=0.000313143, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3083, train_wall=2743 | |
| epoch 002: 1350 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=51939, ups=3, wpb=15177.976, bsz=561.013, num_updates=10208, lr=0.000312989, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3086, train_wall=2745 | |
| epoch 002: 1360 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=51945, ups=3, wpb=15177.733, bsz=560.494, num_updates=10218, lr=0.000312836, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3089, train_wall=2748 | |
| epoch 002: 1370 / 8862 loss=4.655, nll_loss=3.051, ppl=8.29, wps=51946, ups=3, wpb=15176.642, bsz=560.298, num_updates=10228, lr=0.000312683, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3092, train_wall=2751 | |
| epoch 002: 1380 / 8862 loss=4.655, nll_loss=3.051, ppl=8.29, wps=51957, ups=3, wpb=15179.145, bsz=560.365, num_updates=10238, lr=0.000312531, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3095, train_wall=2753 | |
| epoch 002: 1390 / 8862 loss=4.654, nll_loss=3.051, ppl=8.29, wps=51967, ups=3, wpb=15180.390, bsz=560.483, num_updates=10248, lr=0.000312378, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3097, train_wall=2756 | |
| epoch 002: 1400 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=51975, ups=3, wpb=15180.981, bsz=560.228, num_updates=10258, lr=0.000312226, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3100, train_wall=2759 | |
| epoch 002: 1410 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=51980, ups=3, wpb=15179.999, bsz=559.654, num_updates=10268, lr=0.000312074, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3103, train_wall=2761 | |
| epoch 002: 1420 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=51984, ups=3, wpb=15179.967, bsz=559.673, num_updates=10278, lr=0.000311922, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3106, train_wall=2764 | |
| epoch 002: 1430 / 8862 loss=4.655, nll_loss=3.051, ppl=8.29, wps=51989, ups=3, wpb=15179.186, bsz=559.223, num_updates=10288, lr=0.00031177, gnorm=0.603, clip=0.000, oom=0.000, loss_scale=8.000, wall=3109, train_wall=2767 | |
| epoch 002: 1440 / 8862 loss=4.655, nll_loss=3.052, ppl=8.29, wps=51999, ups=3, wpb=15180.897, bsz=558.701, num_updates=10298, lr=0.000311619, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3112, train_wall=2769 | |
| epoch 002: 1450 / 8862 loss=4.655, nll_loss=3.051, ppl=8.29, wps=51995, ups=3, wpb=15178.126, bsz=559.090, num_updates=10308, lr=0.000311468, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3115, train_wall=2772 | |
| epoch 002: 1460 / 8862 loss=4.655, nll_loss=3.051, ppl=8.29, wps=52000, ups=3, wpb=15178.259, bsz=558.719, num_updates=10318, lr=0.000311317, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3118, train_wall=2775 | |
| epoch 002: 1470 / 8862 loss=4.654, nll_loss=3.051, ppl=8.29, wps=52011, ups=3, wpb=15179.625, bsz=558.542, num_updates=10328, lr=0.000311166, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3120, train_wall=2777 | |
| epoch 002: 1480 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=52021, ups=3, wpb=15180.972, bsz=558.347, num_updates=10338, lr=0.000311015, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3123, train_wall=2780 | |
| epoch 002: 1490 / 8862 loss=4.654, nll_loss=3.050, ppl=8.28, wps=52015, ups=3, wpb=15178.520, bsz=558.744, num_updates=10348, lr=0.000310865, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3126, train_wall=2783 | |
| epoch 002: 1500 / 8862 loss=4.653, nll_loss=3.050, ppl=8.28, wps=52023, ups=3, wpb=15179.437, bsz=558.273, num_updates=10358, lr=0.000310715, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3129, train_wall=2785 | |
| epoch 002: 1510 / 8862 loss=4.652, nll_loss=3.048, ppl=8.27, wps=52013, ups=3, wpb=15181.046, bsz=560.709, num_updates=10368, lr=0.000310565, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3132, train_wall=2788 | |
| epoch 002: 1520 / 8862 loss=4.652, nll_loss=3.049, ppl=8.27, wps=52025, ups=3, wpb=15182.838, bsz=560.978, num_updates=10378, lr=0.000310415, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3135, train_wall=2791 | |
| epoch 002: 1530 / 8862 loss=4.652, nll_loss=3.048, ppl=8.27, wps=52029, ups=3, wpb=15183.295, bsz=561.045, num_updates=10388, lr=0.000310266, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3138, train_wall=2793 | |
| epoch 002: 1540 / 8862 loss=4.652, nll_loss=3.048, ppl=8.27, wps=52034, ups=3, wpb=15183.664, bsz=561.038, num_updates=10398, lr=0.000310117, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3141, train_wall=2796 | |
| epoch 002: 1550 / 8862 loss=4.651, nll_loss=3.047, ppl=8.27, wps=52040, ups=3, wpb=15184.309, bsz=560.707, num_updates=10408, lr=0.000309968, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3144, train_wall=2799 | |
| epoch 002: 1560 / 8862 loss=4.652, nll_loss=3.048, ppl=8.27, wps=52044, ups=3, wpb=15184.122, bsz=560.302, num_updates=10418, lr=0.000309819, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3147, train_wall=2801 | |
| epoch 002: 1570 / 8862 loss=4.652, nll_loss=3.048, ppl=8.27, wps=52054, ups=3, wpb=15185.878, bsz=560.015, num_updates=10428, lr=0.00030967, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3149, train_wall=2804 | |
| epoch 002: 1580 / 8862 loss=4.651, nll_loss=3.047, ppl=8.27, wps=52058, ups=3, wpb=15185.528, bsz=559.615, num_updates=10438, lr=0.000309522, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3152, train_wall=2807 | |
| epoch 002: 1590 / 8862 loss=4.651, nll_loss=3.047, ppl=8.27, wps=52052, ups=3, wpb=15183.598, bsz=560.116, num_updates=10448, lr=0.000309374, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3155, train_wall=2809 | |
| epoch 002: 1600 / 8862 loss=4.650, nll_loss=3.046, ppl=8.26, wps=52061, ups=3, wpb=15185.284, bsz=560.005, num_updates=10458, lr=0.000309226, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3158, train_wall=2812 | |
| epoch 002: 1610 / 8862 loss=4.649, nll_loss=3.045, ppl=8.25, wps=52073, ups=3, wpb=15187.341, bsz=559.762, num_updates=10468, lr=0.000309078, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3161, train_wall=2815 | |
| epoch 002: 1620 / 8862 loss=4.650, nll_loss=3.046, ppl=8.26, wps=52077, ups=3, wpb=15186.961, bsz=559.447, num_updates=10478, lr=0.000308931, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3164, train_wall=2817 | |
| epoch 002: 1630 / 8862 loss=4.650, nll_loss=3.046, ppl=8.26, wps=52083, ups=3, wpb=15187.807, bsz=559.431, num_updates=10488, lr=0.000308783, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3167, train_wall=2820 | |
| epoch 002: 1640 / 8862 loss=4.649, nll_loss=3.045, ppl=8.25, wps=52079, ups=3, wpb=15186.012, bsz=559.566, num_updates=10498, lr=0.000308636, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3170, train_wall=2823 | |
| epoch 002: 1650 / 8862 loss=4.649, nll_loss=3.045, ppl=8.25, wps=52086, ups=3, wpb=15186.684, bsz=559.515, num_updates=10508, lr=0.000308489, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3173, train_wall=2825 | |
| epoch 002: 1660 / 8862 loss=4.649, nll_loss=3.045, ppl=8.26, wps=52084, ups=3, wpb=15184.415, bsz=559.302, num_updates=10518, lr=0.000308343, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3175, train_wall=2828 | |
| epoch 002: 1670 / 8862 loss=4.649, nll_loss=3.045, ppl=8.25, wps=52086, ups=3, wpb=15184.661, bsz=560.000, num_updates=10528, lr=0.000308196, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3178, train_wall=2831 | |
| epoch 002: 1680 / 8862 loss=4.648, nll_loss=3.044, ppl=8.25, wps=52086, ups=3, wpb=15184.114, bsz=560.385, num_updates=10538, lr=0.00030805, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3181, train_wall=2833 | |
| epoch 002: 1690 / 8862 loss=4.647, nll_loss=3.043, ppl=8.24, wps=52095, ups=3, wpb=15185.640, bsz=560.369, num_updates=10548, lr=0.000307904, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3184, train_wall=2836 | |
| epoch 002: 1700 / 8862 loss=4.647, nll_loss=3.042, ppl=8.24, wps=52091, ups=3, wpb=15184.818, bsz=560.879, num_updates=10558, lr=0.000307758, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3187, train_wall=2839 | |
| epoch 002: 1710 / 8862 loss=4.646, nll_loss=3.042, ppl=8.24, wps=52088, ups=3, wpb=15182.943, bsz=561.015, num_updates=10568, lr=0.000307612, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3190, train_wall=2841 | |
| epoch 002: 1720 / 8862 loss=4.646, nll_loss=3.042, ppl=8.23, wps=52094, ups=3, wpb=15183.823, bsz=561.381, num_updates=10578, lr=0.000307467, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3193, train_wall=2844 | |
| epoch 002: 1730 / 8862 loss=4.645, nll_loss=3.041, ppl=8.23, wps=52101, ups=3, wpb=15184.495, bsz=561.123, num_updates=10588, lr=0.000307322, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3196, train_wall=2847 | |
| epoch 002: 1740 / 8862 loss=4.645, nll_loss=3.041, ppl=8.23, wps=52102, ups=3, wpb=15183.855, bsz=561.126, num_updates=10598, lr=0.000307177, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3199, train_wall=2849 | |
| epoch 002: 1750 / 8862 loss=4.644, nll_loss=3.040, ppl=8.22, wps=52102, ups=3, wpb=15183.374, bsz=561.421, num_updates=10608, lr=0.000307032, gnorm=0.602, clip=0.000, oom=0.000, loss_scale=8.000, wall=3201, train_wall=2852 | |
| epoch 002: 1760 / 8862 loss=4.644, nll_loss=3.040, ppl=8.22, wps=52105, ups=3, wpb=15183.596, bsz=561.436, num_updates=10618, lr=0.000306887, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3204, train_wall=2855 | |
| epoch 002: 1770 / 8862 loss=4.643, nll_loss=3.039, ppl=8.22, wps=52108, ups=3, wpb=15183.971, bsz=561.459, num_updates=10628, lr=0.000306743, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3207, train_wall=2857 | |
| epoch 002: 1780 / 8862 loss=4.643, nll_loss=3.039, ppl=8.22, wps=52116, ups=3, wpb=15185.624, bsz=561.464, num_updates=10638, lr=0.000306598, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3210, train_wall=2860 | |
| epoch 002: 1790 / 8862 loss=4.643, nll_loss=3.039, ppl=8.22, wps=52118, ups=3, wpb=15185.141, bsz=561.063, num_updates=10648, lr=0.000306454, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3213, train_wall=2863 | |
| epoch 002: 1800 / 8862 loss=4.643, nll_loss=3.039, ppl=8.22, wps=52116, ups=3, wpb=15184.426, bsz=561.581, num_updates=10658, lr=0.000306311, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3216, train_wall=2865 | |
| epoch 002: 1810 / 8862 loss=4.643, nll_loss=3.038, ppl=8.22, wps=52116, ups=3, wpb=15183.916, bsz=562.191, num_updates=10668, lr=0.000306167, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3219, train_wall=2868 | |
| epoch 002: 1820 / 8862 loss=4.643, nll_loss=3.038, ppl=8.22, wps=52119, ups=3, wpb=15183.811, bsz=561.955, num_updates=10678, lr=0.000306024, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3222, train_wall=2871 | |
| epoch 002: 1830 / 8862 loss=4.642, nll_loss=3.037, ppl=8.21, wps=52126, ups=3, wpb=15185.599, bsz=562.185, num_updates=10688, lr=0.00030588, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3225, train_wall=2873 | |
| epoch 002: 1840 / 8862 loss=4.642, nll_loss=3.037, ppl=8.21, wps=52124, ups=3, wpb=15184.243, bsz=562.290, num_updates=10698, lr=0.000305737, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3227, train_wall=2876 | |
| epoch 002: 1850 / 8862 loss=4.641, nll_loss=3.036, ppl=8.20, wps=52126, ups=3, wpb=15184.309, bsz=562.468, num_updates=10708, lr=0.000305595, gnorm=0.601, clip=0.000, oom=0.000, loss_scale=8.000, wall=3230, train_wall=2879 | |
| epoch 002: 1860 / 8862 loss=4.640, nll_loss=3.035, ppl=8.20, wps=52127, ups=3, wpb=15183.435, bsz=562.584, num_updates=10718, lr=0.000305452, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3233, train_wall=2881 | |
| epoch 002: 1870 / 8862 loss=4.640, nll_loss=3.035, ppl=8.20, wps=52132, ups=3, wpb=15183.835, bsz=562.450, num_updates=10728, lr=0.00030531, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3236, train_wall=2884 | |
| epoch 002: 1880 / 8862 loss=4.639, nll_loss=3.034, ppl=8.19, wps=52131, ups=3, wpb=15183.833, bsz=563.164, num_updates=10738, lr=0.000305168, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3239, train_wall=2887 | |
| epoch 002: 1890 / 8862 loss=4.639, nll_loss=3.034, ppl=8.19, wps=52133, ups=3, wpb=15183.338, bsz=563.105, num_updates=10748, lr=0.000305026, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3242, train_wall=2889 | |
| epoch 002: 1900 / 8862 loss=4.638, nll_loss=3.033, ppl=8.18, wps=52136, ups=3, wpb=15183.760, bsz=563.068, num_updates=10758, lr=0.000304884, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3245, train_wall=2892 | |
| epoch 002: 1910 / 8862 loss=4.638, nll_loss=3.033, ppl=8.19, wps=52132, ups=3, wpb=15181.330, bsz=563.048, num_updates=10768, lr=0.000304742, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3248, train_wall=2895 | |
| epoch 002: 1920 / 8862 loss=4.639, nll_loss=3.034, ppl=8.19, wps=52135, ups=3, wpb=15180.633, bsz=562.511, num_updates=10778, lr=0.000304601, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3251, train_wall=2897 | |
| epoch 002: 1930 / 8862 loss=4.639, nll_loss=3.034, ppl=8.19, wps=52142, ups=3, wpb=15181.525, bsz=562.125, num_updates=10788, lr=0.00030446, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3253, train_wall=2900 | |
| epoch 002: 1940 / 8862 loss=4.638, nll_loss=3.033, ppl=8.19, wps=52140, ups=3, wpb=15180.024, bsz=561.896, num_updates=10798, lr=0.000304318, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3256, train_wall=2903 | |
| epoch 002: 1950 / 8862 loss=4.639, nll_loss=3.034, ppl=8.19, wps=52144, ups=3, wpb=15180.091, bsz=561.940, num_updates=10808, lr=0.000304178, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3259, train_wall=2905 | |
| epoch 002: 1960 / 8862 loss=4.639, nll_loss=3.034, ppl=8.19, wps=52141, ups=3, wpb=15178.083, bsz=562.007, num_updates=10818, lr=0.000304037, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3262, train_wall=2908 | |
| epoch 002: 1970 / 8862 loss=4.638, nll_loss=3.033, ppl=8.19, wps=52141, ups=3, wpb=15177.317, bsz=562.184, num_updates=10828, lr=0.000303897, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3265, train_wall=2911 | |
| epoch 002: 1980 / 8862 loss=4.638, nll_loss=3.033, ppl=8.19, wps=52143, ups=3, wpb=15176.878, bsz=562.043, num_updates=10838, lr=0.000303756, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3268, train_wall=2913 | |
| epoch 002: 1990 / 8862 loss=4.638, nll_loss=3.033, ppl=8.19, wps=52150, ups=3, wpb=15177.800, bsz=561.615, num_updates=10848, lr=0.000303616, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3271, train_wall=2916 | |
| epoch 002: 2000 / 8862 loss=4.638, nll_loss=3.033, ppl=8.19, wps=52148, ups=3, wpb=15176.048, bsz=561.479, num_updates=10858, lr=0.000303477, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3273, train_wall=2919 | |
| epoch 002: 2010 / 8862 loss=4.637, nll_loss=3.032, ppl=8.18, wps=52152, ups=3, wpb=15176.550, bsz=561.579, num_updates=10868, lr=0.000303337, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3276, train_wall=2921 | |
| epoch 002: 2020 / 8862 loss=4.637, nll_loss=3.032, ppl=8.18, wps=52152, ups=3, wpb=15176.296, bsz=561.809, num_updates=10878, lr=0.000303197, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3279, train_wall=2924 | |
| epoch 002: 2030 / 8862 loss=4.636, nll_loss=3.031, ppl=8.17, wps=52154, ups=3, wpb=15176.278, bsz=561.977, num_updates=10888, lr=0.000303058, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3282, train_wall=2927 | |
| epoch 002: 2040 / 8862 loss=4.635, nll_loss=3.030, ppl=8.17, wps=52158, ups=3, wpb=15176.665, bsz=562.093, num_updates=10898, lr=0.000302919, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3285, train_wall=2929 | |
| epoch 002: 2050 / 8862 loss=4.635, nll_loss=3.029, ppl=8.16, wps=52158, ups=3, wpb=15176.225, bsz=562.500, num_updates=10908, lr=0.00030278, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3288, train_wall=2932 | |
| epoch 002: 2060 / 8862 loss=4.634, nll_loss=3.029, ppl=8.16, wps=52163, ups=3, wpb=15177.136, bsz=562.123, num_updates=10918, lr=0.000302641, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3291, train_wall=2935 | |
| epoch 002: 2070 / 8862 loss=4.634, nll_loss=3.028, ppl=8.16, wps=52169, ups=3, wpb=15178.178, bsz=562.090, num_updates=10928, lr=0.000302503, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3294, train_wall=2937 | |
| epoch 002: 2080 / 8862 loss=4.633, nll_loss=3.028, ppl=8.16, wps=52172, ups=3, wpb=15178.744, bsz=562.011, num_updates=10938, lr=0.000302365, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3297, train_wall=2940 | |
| epoch 002: 2090 / 8862 loss=4.634, nll_loss=3.028, ppl=8.16, wps=52171, ups=3, wpb=15177.282, bsz=562.016, num_updates=10948, lr=0.000302227, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3299, train_wall=2943 | |
| epoch 002: 2100 / 8862 loss=4.634, nll_loss=3.028, ppl=8.16, wps=52172, ups=3, wpb=15176.766, bsz=561.584, num_updates=10958, lr=0.000302089, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3302, train_wall=2945 | |
| epoch 002: 2110 / 8862 loss=4.633, nll_loss=3.028, ppl=8.16, wps=52174, ups=3, wpb=15176.507, bsz=561.732, num_updates=10968, lr=0.000301951, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3305, train_wall=2948 | |
| epoch 002: 2120 / 8862 loss=4.633, nll_loss=3.028, ppl=8.15, wps=52179, ups=3, wpb=15177.147, bsz=561.124, num_updates=10978, lr=0.000301813, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3308, train_wall=2951 | |
| epoch 002: 2130 / 8862 loss=4.633, nll_loss=3.027, ppl=8.15, wps=52179, ups=3, wpb=15176.897, bsz=561.190, num_updates=10988, lr=0.000301676, gnorm=0.600, clip=0.000, oom=0.000, loss_scale=8.000, wall=3311, train_wall=2953 | |
| epoch 002: 2140 / 8862 loss=4.632, nll_loss=3.027, ppl=8.15, wps=52178, ups=3, wpb=15176.310, bsz=561.188, num_updates=10998, lr=0.000301539, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3314, train_wall=2956 | |
| epoch 002: 2150 / 8862 loss=4.632, nll_loss=3.026, ppl=8.15, wps=52180, ups=3, wpb=15176.421, bsz=561.209, num_updates=11008, lr=0.000301402, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3317, train_wall=2959 | |
| epoch 002: 2160 / 8862 loss=4.631, nll_loss=3.026, ppl=8.14, wps=52185, ups=3, wpb=15177.353, bsz=561.170, num_updates=11018, lr=0.000301265, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3320, train_wall=2961 | |
| epoch 002: 2170 / 8862 loss=4.631, nll_loss=3.026, ppl=8.14, wps=52191, ups=3, wpb=15178.499, bsz=561.157, num_updates=11028, lr=0.000301128, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3323, train_wall=2964 | |
| epoch 002: 2180 / 8862 loss=4.631, nll_loss=3.025, ppl=8.14, wps=52193, ups=3, wpb=15177.912, bsz=561.218, num_updates=11038, lr=0.000300992, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3325, train_wall=2967 | |
| epoch 002: 2190 / 8862 loss=4.631, nll_loss=3.025, ppl=8.14, wps=52199, ups=3, wpb=15178.707, bsz=560.990, num_updates=11048, lr=0.000300856, gnorm=0.599, clip=0.000, oom=0.000, loss_scale=8.000, wall=3328, train_wall=2969 | |
| epoch 002: 2200 / 8862 loss=4.630, nll_loss=3.024, ppl=8.14, wps=52207, ups=3, wpb=15180.537, bsz=561.109, num_updates=11058, lr=0.00030072, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3331, train_wall=2972 | |
| epoch 002: 2210 / 8862 loss=4.629, nll_loss=3.024, ppl=8.13, wps=52213, ups=3, wpb=15181.408, bsz=560.850, num_updates=11068, lr=0.000300584, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3334, train_wall=2975 | |
| epoch 002: 2220 / 8862 loss=4.630, nll_loss=3.024, ppl=8.13, wps=52216, ups=3, wpb=15181.334, bsz=560.580, num_updates=11078, lr=0.000300448, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3337, train_wall=2977 | |
| epoch 002: 2230 / 8862 loss=4.630, nll_loss=3.024, ppl=8.13, wps=52219, ups=3, wpb=15181.374, bsz=560.319, num_updates=11088, lr=0.000300312, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3340, train_wall=2980 | |
| epoch 002: 2240 / 8862 loss=4.629, nll_loss=3.023, ppl=8.13, wps=52219, ups=3, wpb=15181.350, bsz=560.753, num_updates=11098, lr=0.000300177, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3343, train_wall=2983 | |
| epoch 002: 2250 / 8862 loss=4.628, nll_loss=3.023, ppl=8.13, wps=52223, ups=3, wpb=15182.072, bsz=560.917, num_updates=11108, lr=0.000300042, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3346, train_wall=2985 | |
| epoch 002: 2260 / 8862 loss=4.629, nll_loss=3.023, ppl=8.13, wps=52221, ups=3, wpb=15180.945, bsz=560.658, num_updates=11118, lr=0.000299907, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3348, train_wall=2988 | |
| epoch 002: 2270 / 8862 loss=4.627, nll_loss=3.021, ppl=8.12, wps=52224, ups=3, wpb=15181.840, bsz=560.941, num_updates=11128, lr=0.000299772, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3351, train_wall=2991 | |
| epoch 002: 2280 / 8862 loss=4.627, nll_loss=3.021, ppl=8.12, wps=52224, ups=3, wpb=15181.969, bsz=560.954, num_updates=11138, lr=0.000299638, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3354, train_wall=2993 | |
| epoch 002: 2290 / 8862 loss=4.627, nll_loss=3.021, ppl=8.12, wps=52227, ups=3, wpb=15182.568, bsz=561.180, num_updates=11148, lr=0.000299503, gnorm=0.598, clip=0.000, oom=0.000, loss_scale=8.000, wall=3357, train_wall=2996 | |
| epoch 002: 2300 / 8862 loss=4.626, nll_loss=3.020, ppl=8.11, wps=52227, ups=3, wpb=15182.298, bsz=561.186, num_updates=11158, lr=0.000299369, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3360, train_wall=2999 | |
| epoch 002: 2310 / 8862 loss=4.626, nll_loss=3.020, ppl=8.11, wps=52230, ups=3, wpb=15182.814, bsz=561.215, num_updates=11168, lr=0.000299235, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3363, train_wall=3001 | |
| epoch 002: 2320 / 8862 loss=4.625, nll_loss=3.019, ppl=8.11, wps=52231, ups=3, wpb=15183.190, bsz=561.555, num_updates=11178, lr=0.000299101, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3366, train_wall=3004 | |
| epoch 002: 2330 / 8862 loss=4.625, nll_loss=3.019, ppl=8.11, wps=52234, ups=3, wpb=15183.875, bsz=561.160, num_updates=11188, lr=0.000298967, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3369, train_wall=3007 | |
| epoch 002: 2340 / 8862 loss=4.626, nll_loss=3.020, ppl=8.11, wps=52236, ups=3, wpb=15184.114, bsz=561.193, num_updates=11198, lr=0.000298834, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3372, train_wall=3009 | |
| epoch 002: 2350 / 8862 loss=4.625, nll_loss=3.019, ppl=8.11, wps=52241, ups=3, wpb=15184.998, bsz=561.082, num_updates=11208, lr=0.0002987, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3375, train_wall=3012 | |
| epoch 002: 2360 / 8862 loss=4.625, nll_loss=3.019, ppl=8.11, wps=52244, ups=3, wpb=15185.574, bsz=560.691, num_updates=11218, lr=0.000298567, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3377, train_wall=3015 | |
| epoch 002: 2370 / 8862 loss=4.625, nll_loss=3.019, ppl=8.11, wps=52243, ups=3, wpb=15183.918, bsz=560.540, num_updates=11228, lr=0.000298434, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3380, train_wall=3017 | |
| epoch 002: 2380 / 8862 loss=4.625, nll_loss=3.019, ppl=8.11, wps=52245, ups=3, wpb=15184.782, bsz=560.645, num_updates=11238, lr=0.000298302, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3383, train_wall=3020 | |
| epoch 002: 2390 / 8862 loss=4.625, nll_loss=3.018, ppl=8.10, wps=52243, ups=3, wpb=15184.154, bsz=560.405, num_updates=11248, lr=0.000298169, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3386, train_wall=3023 | |
| epoch 002: 2400 / 8862 loss=4.624, nll_loss=3.018, ppl=8.10, wps=52242, ups=3, wpb=15183.858, bsz=560.486, num_updates=11258, lr=0.000298036, gnorm=0.597, clip=0.000, oom=0.000, loss_scale=8.000, wall=3389, train_wall=3025 | |
| epoch 002: 2410 / 8862 loss=4.624, nll_loss=3.018, ppl=8.10, wps=52240, ups=3, wpb=15183.252, bsz=560.620, num_updates=11268, lr=0.000297904, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3392, train_wall=3028 | |
| epoch 002: 2420 / 8862 loss=4.624, nll_loss=3.018, ppl=8.10, wps=52243, ups=3, wpb=15183.668, bsz=560.354, num_updates=11278, lr=0.000297772, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3395, train_wall=3031 | |
| epoch 002: 2430 / 8862 loss=4.623, nll_loss=3.017, ppl=8.10, wps=52246, ups=3, wpb=15184.535, bsz=560.365, num_updates=11288, lr=0.00029764, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3398, train_wall=3033 | |
| epoch 002: 2440 / 8862 loss=4.624, nll_loss=3.017, ppl=8.10, wps=52238, ups=3, wpb=15182.154, bsz=560.128, num_updates=11298, lr=0.000297508, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3401, train_wall=3036 | |
| epoch 002: 2450 / 8862 loss=4.623, nll_loss=3.017, ppl=8.10, wps=52238, ups=3, wpb=15181.809, bsz=559.781, num_updates=11308, lr=0.000297377, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3403, train_wall=3039 | |
| epoch 002: 2460 / 8862 loss=4.624, nll_loss=3.018, ppl=8.10, wps=52237, ups=3, wpb=15180.718, bsz=559.821, num_updates=11318, lr=0.000297245, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3406, train_wall=3041 | |
| epoch 002: 2470 / 8862 loss=4.623, nll_loss=3.017, ppl=8.09, wps=52236, ups=3, wpb=15180.523, bsz=560.285, num_updates=11328, lr=0.000297114, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3409, train_wall=3044 | |
| epoch 002: 2480 / 8862 loss=4.623, nll_loss=3.017, ppl=8.09, wps=52237, ups=3, wpb=15179.975, bsz=560.119, num_updates=11338, lr=0.000296983, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3412, train_wall=3047 | |
| epoch 002: 2490 / 8862 loss=4.624, nll_loss=3.017, ppl=8.10, wps=52239, ups=3, wpb=15179.762, bsz=559.679, num_updates=11348, lr=0.000296852, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3415, train_wall=3049 | |
| epoch 002: 2500 / 8862 loss=4.623, nll_loss=3.017, ppl=8.10, wps=52238, ups=3, wpb=15179.361, bsz=559.594, num_updates=11358, lr=0.000296722, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3418, train_wall=3052 | |
| epoch 002: 2510 / 8862 loss=4.623, nll_loss=3.017, ppl=8.09, wps=52238, ups=3, wpb=15179.045, bsz=559.481, num_updates=11368, lr=0.000296591, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3421, train_wall=3055 | |
| epoch 002: 2520 / 8862 loss=4.623, nll_loss=3.016, ppl=8.09, wps=52236, ups=3, wpb=15178.770, bsz=559.940, num_updates=11378, lr=0.000296461, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3424, train_wall=3057 | |
| epoch 002: 2530 / 8862 loss=4.622, nll_loss=3.016, ppl=8.09, wps=52231, ups=3, wpb=15177.881, bsz=560.392, num_updates=11388, lr=0.00029633, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3427, train_wall=3060 | |
| epoch 002: 2540 / 8862 loss=4.622, nll_loss=3.015, ppl=8.09, wps=52234, ups=3, wpb=15178.182, bsz=560.460, num_updates=11398, lr=0.0002962, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3430, train_wall=3063 | |
| epoch 002: 2550 / 8862 loss=4.622, nll_loss=3.015, ppl=8.09, wps=52235, ups=3, wpb=15178.252, bsz=560.580, num_updates=11408, lr=0.000296071, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3432, train_wall=3065 | |
| epoch 002: 2560 / 8862 loss=4.622, nll_loss=3.015, ppl=8.09, wps=52233, ups=3, wpb=15177.517, bsz=560.372, num_updates=11418, lr=0.000295941, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3435, train_wall=3068 | |
| epoch 002: 2570 / 8862 loss=4.621, nll_loss=3.015, ppl=8.08, wps=52234, ups=3, wpb=15177.363, bsz=560.078, num_updates=11428, lr=0.000295811, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3438, train_wall=3071 | |
| epoch 002: 2580 / 8862 loss=4.621, nll_loss=3.015, ppl=8.08, wps=52239, ups=3, wpb=15178.471, bsz=560.053, num_updates=11438, lr=0.000295682, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3441, train_wall=3073 | |
| epoch 002: 2590 / 8862 loss=4.621, nll_loss=3.014, ppl=8.08, wps=52236, ups=3, wpb=15178.675, bsz=560.775, num_updates=11448, lr=0.000295553, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3444, train_wall=3076 | |
| epoch 002: 2600 / 8862 loss=4.621, nll_loss=3.014, ppl=8.08, wps=52236, ups=3, wpb=15178.124, bsz=560.701, num_updates=11458, lr=0.000295424, gnorm=0.596, clip=0.000, oom=0.000, loss_scale=8.000, wall=3447, train_wall=3079 | |
| epoch 002: 2610 / 8862 loss=4.620, nll_loss=3.014, ppl=8.08, wps=52241, ups=3, wpb=15179.555, bsz=560.935, num_updates=11468, lr=0.000295295, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3450, train_wall=3082 | |
| epoch 002: 2620 / 8862 loss=4.620, nll_loss=3.013, ppl=8.07, wps=52241, ups=3, wpb=15179.785, bsz=561.096, num_updates=11478, lr=0.000295166, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3453, train_wall=3084 | |
| epoch 002: 2630 / 8862 loss=4.620, nll_loss=3.014, ppl=8.08, wps=52238, ups=3, wpb=15178.626, bsz=561.262, num_updates=11488, lr=0.000295038, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3456, train_wall=3087 | |
| epoch 002: 2640 / 8862 loss=4.620, nll_loss=3.014, ppl=8.08, wps=52237, ups=3, wpb=15178.219, bsz=561.236, num_updates=11498, lr=0.00029491, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3459, train_wall=3090 | |
| epoch 002: 2650 / 8862 loss=4.619, nll_loss=3.013, ppl=8.07, wps=52240, ups=3, wpb=15178.558, bsz=561.186, num_updates=11508, lr=0.000294781, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3461, train_wall=3092 | |
| epoch 002: 2660 / 8862 loss=4.619, nll_loss=3.013, ppl=8.07, wps=52237, ups=3, wpb=15178.052, bsz=561.188, num_updates=11518, lr=0.000294653, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3464, train_wall=3095 | |
| epoch 002: 2670 / 8862 loss=4.619, nll_loss=3.013, ppl=8.07, wps=52238, ups=3, wpb=15178.354, bsz=561.006, num_updates=11528, lr=0.000294526, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3467, train_wall=3098 | |
| epoch 002: 2680 / 8862 loss=4.619, nll_loss=3.013, ppl=8.07, wps=52239, ups=3, wpb=15177.877, bsz=560.830, num_updates=11538, lr=0.000294398, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3470, train_wall=3100 | |
| epoch 002: 2690 / 8862 loss=4.619, nll_loss=3.013, ppl=8.07, wps=52239, ups=3, wpb=15177.763, bsz=560.850, num_updates=11548, lr=0.00029427, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3473, train_wall=3103 | |
| epoch 002: 2700 / 8862 loss=4.619, nll_loss=3.012, ppl=8.07, wps=52244, ups=3, wpb=15179.010, bsz=561.060, num_updates=11558, lr=0.000294143, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3476, train_wall=3106 | |
| epoch 002: 2710 / 8862 loss=4.618, nll_loss=3.012, ppl=8.07, wps=52248, ups=3, wpb=15180.421, bsz=560.885, num_updates=11568, lr=0.000294016, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3479, train_wall=3108 | |
| epoch 002: 2720 / 8862 loss=4.618, nll_loss=3.012, ppl=8.06, wps=52250, ups=3, wpb=15180.889, bsz=560.773, num_updates=11578, lr=0.000293889, gnorm=0.595, clip=0.000, oom=0.000, loss_scale=8.000, wall=3482, train_wall=3111 | |
| epoch 002: 2730 / 8862 loss=4.618, nll_loss=3.011, ppl=8.06, wps=52253, ups=3, wpb=15181.587, bsz=560.838, num_updates=11588, lr=0.000293762, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3485, train_wall=3114 | |
| epoch 002: 2740 / 8862 loss=4.617, nll_loss=3.011, ppl=8.06, wps=52250, ups=3, wpb=15180.270, bsz=560.735, num_updates=11598, lr=0.000293635, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3488, train_wall=3116 | |
| epoch 002: 2750 / 8862 loss=4.617, nll_loss=3.010, ppl=8.06, wps=52251, ups=3, wpb=15180.449, bsz=560.503, num_updates=11608, lr=0.000293509, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3490, train_wall=3119 | |
| epoch 002: 2760 / 8862 loss=4.617, nll_loss=3.010, ppl=8.06, wps=52250, ups=3, wpb=15179.607, bsz=560.429, num_updates=11618, lr=0.000293383, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3493, train_wall=3122 | |
| epoch 002: 2770 / 8862 loss=4.616, nll_loss=3.010, ppl=8.05, wps=52254, ups=3, wpb=15180.894, bsz=560.413, num_updates=11628, lr=0.000293256, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3496, train_wall=3124 | |
| epoch 002: 2780 / 8862 loss=4.616, nll_loss=3.009, ppl=8.05, wps=52254, ups=3, wpb=15180.985, bsz=560.417, num_updates=11638, lr=0.00029313, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3499, train_wall=3127 | |
| epoch 002: 2790 / 8862 loss=4.616, nll_loss=3.009, ppl=8.05, wps=52255, ups=3, wpb=15181.588, bsz=560.267, num_updates=11648, lr=0.000293005, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3502, train_wall=3130 | |
| epoch 002: 2800 / 8862 loss=4.616, nll_loss=3.009, ppl=8.05, wps=52255, ups=3, wpb=15181.455, bsz=560.060, num_updates=11658, lr=0.000292879, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3505, train_wall=3132 | |
| epoch 002: 2810 / 8862 loss=4.616, nll_loss=3.009, ppl=8.05, wps=52252, ups=3, wpb=15180.949, bsz=560.421, num_updates=11668, lr=0.000292753, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3508, train_wall=3135 | |
| epoch 002: 2820 / 8862 loss=4.616, nll_loss=3.009, ppl=8.05, wps=52254, ups=3, wpb=15181.156, bsz=560.128, num_updates=11678, lr=0.000292628, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3511, train_wall=3138 | |
| epoch 002: 2830 / 8862 loss=4.615, nll_loss=3.009, ppl=8.05, wps=52259, ups=3, wpb=15182.622, bsz=560.311, num_updates=11688, lr=0.000292503, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3514, train_wall=3140 | |
| epoch 002: 2840 / 8862 loss=4.615, nll_loss=3.009, ppl=8.05, wps=52262, ups=3, wpb=15183.467, bsz=560.113, num_updates=11698, lr=0.000292378, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3517, train_wall=3143 | |
| epoch 002: 2850 / 8862 loss=4.615, nll_loss=3.009, ppl=8.05, wps=52263, ups=3, wpb=15183.444, bsz=559.902, num_updates=11708, lr=0.000292253, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3519, train_wall=3146 | |
| epoch 002: 2860 / 8862 loss=4.615, nll_loss=3.009, ppl=8.05, wps=52262, ups=3, wpb=15183.079, bsz=559.681, num_updates=11718, lr=0.000292128, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3522, train_wall=3149 | |
| epoch 002: 2870 / 8862 loss=4.615, nll_loss=3.008, ppl=8.05, wps=52265, ups=3, wpb=15183.795, bsz=559.579, num_updates=11728, lr=0.000292003, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3525, train_wall=3151 | |
| epoch 002: 2880 / 8862 loss=4.615, nll_loss=3.008, ppl=8.04, wps=52265, ups=3, wpb=15183.429, bsz=559.570, num_updates=11738, lr=0.000291879, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3528, train_wall=3154 | |
| epoch 002: 2890 / 8862 loss=4.614, nll_loss=3.007, ppl=8.04, wps=52259, ups=3, wpb=15182.449, bsz=559.787, num_updates=11748, lr=0.000291755, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3531, train_wall=3157 | |
| epoch 002: 2900 / 8862 loss=4.614, nll_loss=3.008, ppl=8.04, wps=52259, ups=3, wpb=15182.384, bsz=559.608, num_updates=11758, lr=0.000291631, gnorm=0.594, clip=0.000, oom=0.000, loss_scale=8.000, wall=3534, train_wall=3159 | |
| epoch 002: 2910 / 8862 loss=4.614, nll_loss=3.007, ppl=8.04, wps=52261, ups=3, wpb=15182.423, bsz=559.648, num_updates=11768, lr=0.000291507, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3537, train_wall=3162 | |
| epoch 002: 2920 / 8862 loss=4.613, nll_loss=3.007, ppl=8.04, wps=52261, ups=3, wpb=15181.951, bsz=559.754, num_updates=11778, lr=0.000291383, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3540, train_wall=3165 | |
| epoch 002: 2930 / 8862 loss=4.613, nll_loss=3.007, ppl=8.04, wps=52263, ups=3, wpb=15182.074, bsz=559.438, num_updates=11788, lr=0.000291259, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3543, train_wall=3167 | |
| epoch 002: 2940 / 8862 loss=4.613, nll_loss=3.006, ppl=8.03, wps=52264, ups=3, wpb=15182.246, bsz=559.037, num_updates=11798, lr=0.000291136, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3545, train_wall=3170 | |
| epoch 002: 2950 / 8862 loss=4.613, nll_loss=3.006, ppl=8.03, wps=52264, ups=3, wpb=15181.818, bsz=559.322, num_updates=11808, lr=0.000291013, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3548, train_wall=3173 | |
| epoch 002: 2960 / 8862 loss=4.613, nll_loss=3.006, ppl=8.03, wps=52264, ups=3, wpb=15181.396, bsz=559.103, num_updates=11818, lr=0.000290889, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3551, train_wall=3175 | |
| epoch 002: 2970 / 8862 loss=4.613, nll_loss=3.006, ppl=8.03, wps=52262, ups=3, wpb=15180.602, bsz=559.639, num_updates=11828, lr=0.000290766, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3554, train_wall=3178 | |
| epoch 002: 2980 / 8862 loss=4.613, nll_loss=3.006, ppl=8.03, wps=52259, ups=3, wpb=15179.493, bsz=559.632, num_updates=11838, lr=0.000290644, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3557, train_wall=3181 | |
| epoch 002: 2990 / 8862 loss=4.613, nll_loss=3.006, ppl=8.03, wps=52260, ups=3, wpb=15179.217, bsz=559.476, num_updates=11848, lr=0.000290521, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3560, train_wall=3183 | |
| epoch 002: 3000 / 8862 loss=4.612, nll_loss=3.005, ppl=8.03, wps=52262, ups=3, wpb=15179.379, bsz=559.643, num_updates=11858, lr=0.000290398, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3563, train_wall=3186 | |
| epoch 002: 3010 / 8862 loss=4.612, nll_loss=3.005, ppl=8.03, wps=52266, ups=3, wpb=15180.149, bsz=559.514, num_updates=11868, lr=0.000290276, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3566, train_wall=3189 | |
| epoch 002: 3020 / 8862 loss=4.612, nll_loss=3.005, ppl=8.03, wps=52266, ups=3, wpb=15179.915, bsz=559.489, num_updates=11878, lr=0.000290154, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3569, train_wall=3191 | |
| epoch 002: 3030 / 8862 loss=4.612, nll_loss=3.005, ppl=8.03, wps=52265, ups=3, wpb=15178.912, bsz=559.485, num_updates=11888, lr=0.000290032, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3571, train_wall=3194 | |
| epoch 002: 3040 / 8862 loss=4.611, nll_loss=3.004, ppl=8.02, wps=52269, ups=3, wpb=15179.350, bsz=559.334, num_updates=11898, lr=0.00028991, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3574, train_wall=3197 | |
| epoch 002: 3050 / 8862 loss=4.612, nll_loss=3.005, ppl=8.03, wps=52271, ups=3, wpb=15179.669, bsz=559.310, num_updates=11908, lr=0.000289788, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3577, train_wall=3199 | |
| epoch 002: 3060 / 8862 loss=4.611, nll_loss=3.004, ppl=8.02, wps=52270, ups=3, wpb=15178.375, bsz=559.075, num_updates=11918, lr=0.000289667, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3580, train_wall=3202 | |
| epoch 002: 3070 / 8862 loss=4.611, nll_loss=3.004, ppl=8.02, wps=52269, ups=3, wpb=15177.280, bsz=558.955, num_updates=11928, lr=0.000289545, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3583, train_wall=3204 | |
| epoch 002: 3080 / 8862 loss=4.611, nll_loss=3.004, ppl=8.02, wps=52268, ups=3, wpb=15176.617, bsz=559.003, num_updates=11938, lr=0.000289424, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3586, train_wall=3207 | |
| epoch 002: 3090 / 8862 loss=4.611, nll_loss=3.004, ppl=8.02, wps=52267, ups=3, wpb=15175.938, bsz=558.957, num_updates=11948, lr=0.000289303, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3589, train_wall=3210 | |
| epoch 002: 3100 / 8862 loss=4.611, nll_loss=3.003, ppl=8.02, wps=52269, ups=3, wpb=15176.279, bsz=559.236, num_updates=11958, lr=0.000289182, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3592, train_wall=3212 | |
| epoch 002: 3110 / 8862 loss=4.611, nll_loss=3.004, ppl=8.02, wps=52269, ups=3, wpb=15175.844, bsz=559.198, num_updates=11968, lr=0.000289061, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3594, train_wall=3215 | |
| epoch 002: 3120 / 8862 loss=4.610, nll_loss=3.003, ppl=8.02, wps=52272, ups=3, wpb=15176.530, bsz=559.062, num_updates=11978, lr=0.00028894, gnorm=0.593, clip=0.000, oom=0.000, loss_scale=8.000, wall=3597, train_wall=3218 | |
| epoch 002: 3130 / 8862 loss=4.610, nll_loss=3.003, ppl=8.02, wps=52275, ups=3, wpb=15177.063, bsz=558.937, num_updates=11988, lr=0.00028882, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3600, train_wall=3220 | |
| epoch 002: 3140 / 8862 loss=4.609, nll_loss=3.002, ppl=8.01, wps=52277, ups=3, wpb=15177.611, bsz=558.976, num_updates=11998, lr=0.000288699, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3603, train_wall=3223 | |
| epoch 002: 3150 / 8862 loss=4.610, nll_loss=3.003, ppl=8.01, wps=52277, ups=3, wpb=15176.848, bsz=558.751, num_updates=12008, lr=0.000288579, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3606, train_wall=3226 | |
| epoch 002: 3160 / 8862 loss=4.610, nll_loss=3.003, ppl=8.01, wps=52277, ups=3, wpb=15176.406, bsz=558.547, num_updates=12018, lr=0.000288459, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3609, train_wall=3228 | |
| epoch 002: 3170 / 8862 loss=4.610, nll_loss=3.003, ppl=8.01, wps=52278, ups=3, wpb=15175.965, bsz=558.216, num_updates=12028, lr=0.000288339, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3612, train_wall=3231 | |
| epoch 002: 3180 / 8862 loss=4.610, nll_loss=3.003, ppl=8.02, wps=52280, ups=3, wpb=15175.919, bsz=558.069, num_updates=12038, lr=0.000288219, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3615, train_wall=3234 | |
| epoch 002: 3190 / 8862 loss=4.609, nll_loss=3.002, ppl=8.01, wps=52280, ups=3, wpb=15175.298, bsz=558.095, num_updates=12048, lr=0.0002881, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3617, train_wall=3236 | |
| epoch 002: 3200 / 8862 loss=4.609, nll_loss=3.002, ppl=8.01, wps=52282, ups=3, wpb=15175.823, bsz=558.236, num_updates=12058, lr=0.00028798, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3620, train_wall=3239 | |
| epoch 002: 3210 / 8862 loss=4.609, nll_loss=3.001, ppl=8.01, wps=52284, ups=3, wpb=15176.307, bsz=558.453, num_updates=12068, lr=0.000287861, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3623, train_wall=3242 | |
| epoch 002: 3220 / 8862 loss=4.609, nll_loss=3.002, ppl=8.01, wps=52289, ups=3, wpb=15177.433, bsz=558.356, num_updates=12078, lr=0.000287741, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3626, train_wall=3244 | |
| epoch 002: 3230 / 8862 loss=4.608, nll_loss=3.001, ppl=8.01, wps=52290, ups=3, wpb=15177.942, bsz=558.514, num_updates=12088, lr=0.000287622, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3629, train_wall=3247 | |
| epoch 002: 3240 / 8862 loss=4.608, nll_loss=3.001, ppl=8.00, wps=52288, ups=3, wpb=15176.903, bsz=558.465, num_updates=12098, lr=0.000287504, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3632, train_wall=3250 | |
| epoch 002: 3250 / 8862 loss=4.607, nll_loss=3.000, ppl=8.00, wps=52289, ups=3, wpb=15176.798, bsz=558.543, num_updates=12108, lr=0.000287385, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3635, train_wall=3252 | |
| epoch 002: 3260 / 8862 loss=4.608, nll_loss=3.001, ppl=8.00, wps=52288, ups=3, wpb=15175.666, bsz=558.194, num_updates=12118, lr=0.000287266, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3638, train_wall=3255 | |
| epoch 002: 3270 / 8862 loss=4.608, nll_loss=3.000, ppl=8.00, wps=52288, ups=3, wpb=15175.281, bsz=558.046, num_updates=12128, lr=0.000287148, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3640, train_wall=3258 | |
| epoch 002: 3280 / 8862 loss=4.607, nll_loss=3.000, ppl=8.00, wps=52289, ups=3, wpb=15175.372, bsz=558.013, num_updates=12138, lr=0.000287029, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3643, train_wall=3260 | |
| epoch 002: 3290 / 8862 loss=4.607, nll_loss=3.000, ppl=8.00, wps=52294, ups=3, wpb=15176.604, bsz=557.802, num_updates=12148, lr=0.000286911, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3646, train_wall=3263 | |
| epoch 002: 3300 / 8862 loss=4.607, nll_loss=3.000, ppl=8.00, wps=52296, ups=3, wpb=15176.811, bsz=557.576, num_updates=12158, lr=0.000286793, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3649, train_wall=3266 | |
| epoch 002: 3310 / 8862 loss=4.606, nll_loss=2.999, ppl=8.00, wps=52297, ups=3, wpb=15176.633, bsz=557.526, num_updates=12168, lr=0.000286675, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3652, train_wall=3268 | |
| epoch 002: 3320 / 8862 loss=4.606, nll_loss=2.999, ppl=7.99, wps=52295, ups=3, wpb=15175.983, bsz=557.574, num_updates=12178, lr=0.000286558, gnorm=0.592, clip=0.000, oom=0.000, loss_scale=8.000, wall=3655, train_wall=3271 | |
| epoch 002: 3330 / 8862 loss=4.606, nll_loss=2.999, ppl=7.99, wps=52294, ups=3, wpb=15175.278, bsz=557.788, num_updates=12188, lr=0.00028644, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3658, train_wall=3274 | |
| epoch 002: 3340 / 8862 loss=4.606, nll_loss=2.999, ppl=7.99, wps=52295, ups=3, wpb=15175.107, bsz=557.603, num_updates=12198, lr=0.000286323, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3661, train_wall=3276 | |
| epoch 002: 3350 / 8862 loss=4.606, nll_loss=2.999, ppl=7.99, wps=52301, ups=3, wpb=15176.150, bsz=557.286, num_updates=12208, lr=0.000286205, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3664, train_wall=3279 | |
| epoch 002: 3360 / 8862 loss=4.606, nll_loss=2.999, ppl=7.99, wps=52302, ups=3, wpb=15176.565, bsz=557.206, num_updates=12218, lr=0.000286088, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3666, train_wall=3282 | |
| epoch 002: 3370 / 8862 loss=4.606, nll_loss=2.998, ppl=7.99, wps=52303, ups=3, wpb=15176.305, bsz=557.083, num_updates=12228, lr=0.000285971, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3669, train_wall=3284 | |
| epoch 002: 3380 / 8862 loss=4.606, nll_loss=2.998, ppl=7.99, wps=52301, ups=3, wpb=15175.393, bsz=557.035, num_updates=12238, lr=0.000285854, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3672, train_wall=3287 | |
| epoch 002: 3390 / 8862 loss=4.605, nll_loss=2.998, ppl=7.99, wps=52303, ups=3, wpb=15176.065, bsz=557.162, num_updates=12248, lr=0.000285738, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3675, train_wall=3290 | |
| epoch 002: 3400 / 8862 loss=4.605, nll_loss=2.998, ppl=7.99, wps=52304, ups=3, wpb=15176.143, bsz=557.147, num_updates=12258, lr=0.000285621, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3678, train_wall=3292 | |
| epoch 002: 3410 / 8862 loss=4.605, nll_loss=2.997, ppl=7.99, wps=52309, ups=3, wpb=15177.405, bsz=557.256, num_updates=12268, lr=0.000285505, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3681, train_wall=3295 | |
| epoch 002: 3420 / 8862 loss=4.605, nll_loss=2.997, ppl=7.99, wps=52307, ups=3, wpb=15176.650, bsz=557.238, num_updates=12278, lr=0.000285388, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3684, train_wall=3298 | |
| epoch 002: 3430 / 8862 loss=4.604, nll_loss=2.997, ppl=7.98, wps=52308, ups=3, wpb=15176.909, bsz=557.314, num_updates=12288, lr=0.000285272, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3687, train_wall=3300 | |
| epoch 002: 3440 / 8862 loss=4.604, nll_loss=2.997, ppl=7.98, wps=52310, ups=3, wpb=15177.344, bsz=557.175, num_updates=12298, lr=0.000285156, gnorm=0.591, clip=0.000, oom=0.000, loss_scale=8.000, wall=3690, train_wall=3303 | |
| epoch 002: 3450 / 8862 loss=4.604, nll_loss=2.997, ppl=7.98, wps=52313, ups=3, wpb=15177.655, bsz=556.982, num_updates=12308, lr=0.00028504, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3692, train_wall=3306 | |
| epoch 002: 3460 / 8862 loss=4.603, nll_loss=2.996, ppl=7.98, wps=52311, ups=3, wpb=15177.108, bsz=557.166, num_updates=12318, lr=0.000284925, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3695, train_wall=3308 | |
| epoch 002: 3470 / 8862 loss=4.603, nll_loss=2.996, ppl=7.98, wps=52312, ups=3, wpb=15177.411, bsz=557.264, num_updates=12328, lr=0.000284809, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3698, train_wall=3311 | |
| epoch 002: 3480 / 8862 loss=4.603, nll_loss=2.996, ppl=7.98, wps=52310, ups=3, wpb=15176.938, bsz=557.297, num_updates=12338, lr=0.000284694, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3701, train_wall=3314 | |
| epoch 002: 3490 / 8862 loss=4.603, nll_loss=2.995, ppl=7.97, wps=52309, ups=3, wpb=15176.233, bsz=557.431, num_updates=12348, lr=0.000284578, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3704, train_wall=3316 | |
| epoch 002: 3500 / 8862 loss=4.603, nll_loss=2.995, ppl=7.97, wps=52312, ups=3, wpb=15176.801, bsz=557.523, num_updates=12358, lr=0.000284463, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3707, train_wall=3319 | |
| epoch 002: 3510 / 8862 loss=4.603, nll_loss=2.995, ppl=7.97, wps=52310, ups=3, wpb=15175.943, bsz=557.366, num_updates=12368, lr=0.000284348, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3710, train_wall=3322 | |
| epoch 002: 3520 / 8862 loss=4.602, nll_loss=2.995, ppl=7.97, wps=52309, ups=3, wpb=15175.427, bsz=557.644, num_updates=12378, lr=0.000284233, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3713, train_wall=3324 | |
| epoch 002: 3530 / 8862 loss=4.602, nll_loss=2.994, ppl=7.97, wps=52310, ups=3, wpb=15175.005, bsz=557.533, num_updates=12388, lr=0.000284118, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3716, train_wall=3327 | |
| epoch 002: 3540 / 8862 loss=4.602, nll_loss=2.994, ppl=7.97, wps=52311, ups=3, wpb=15174.940, bsz=557.501, num_updates=12398, lr=0.000284004, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3718, train_wall=3330 | |
| epoch 002: 3550 / 8862 loss=4.602, nll_loss=2.994, ppl=7.97, wps=52312, ups=3, wpb=15174.844, bsz=557.265, num_updates=12408, lr=0.000283889, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=16.000, wall=3721, train_wall=3332 | |
| epoch 002: 3560 / 8862 loss=4.602, nll_loss=2.994, ppl=7.97, wps=52312, ups=3, wpb=15174.693, bsz=557.205, num_updates=12418, lr=0.000283775, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=16.000, wall=3724, train_wall=3335 | |
| WARNING: overflow detected, setting loss scale to: 8.0 | |
| epoch 002: 3570 / 8862 loss=4.601, nll_loss=2.994, ppl=7.96, wps=52298, ups=3, wpb=15174.842, bsz=557.369, num_updates=12427, lr=0.000283672, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3727, train_wall=3338 | |
| epoch 002: 3580 / 8862 loss=4.601, nll_loss=2.993, ppl=7.96, wps=52298, ups=3, wpb=15175.406, bsz=557.363, num_updates=12437, lr=0.000283558, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3730, train_wall=3340 | |
| epoch 002: 3590 / 8862 loss=4.601, nll_loss=2.993, ppl=7.96, wps=52300, ups=3, wpb=15175.721, bsz=557.368, num_updates=12447, lr=0.000283444, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3733, train_wall=3343 | |
| epoch 002: 3600 / 8862 loss=4.600, nll_loss=2.992, ppl=7.96, wps=52298, ups=3, wpb=15175.542, bsz=557.524, num_updates=12457, lr=0.00028333, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3736, train_wall=3346 | |
| epoch 002: 3610 / 8862 loss=4.600, nll_loss=2.992, ppl=7.96, wps=52299, ups=3, wpb=15175.499, bsz=557.662, num_updates=12467, lr=0.000283217, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3739, train_wall=3348 | |
| epoch 002: 3620 / 8862 loss=4.599, nll_loss=2.991, ppl=7.95, wps=52292, ups=3, wpb=15173.849, bsz=558.057, num_updates=12477, lr=0.000283103, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3742, train_wall=3351 | |
| epoch 002: 3630 / 8862 loss=4.599, nll_loss=2.991, ppl=7.95, wps=52291, ups=3, wpb=15173.370, bsz=558.096, num_updates=12487, lr=0.00028299, gnorm=0.590, clip=0.000, oom=0.000, loss_scale=8.000, wall=3744, train_wall=3354 | |
| epoch 002: 3640 / 8862 loss=4.599, nll_loss=2.991, ppl=7.95, wps=52291, ups=3, wpb=15173.580, bsz=558.270, num_updates=12497, lr=0.000282877, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3747, train_wall=3357 | |
| epoch 002: 3650 / 8862 loss=4.598, nll_loss=2.990, ppl=7.95, wps=52290, ups=3, wpb=15173.130, bsz=558.341, num_updates=12507, lr=0.000282764, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3750, train_wall=3359 | |
| epoch 002: 3660 / 8862 loss=4.598, nll_loss=2.990, ppl=7.94, wps=52291, ups=3, wpb=15173.428, bsz=558.367, num_updates=12517, lr=0.000282651, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3753, train_wall=3362 | |
| epoch 002: 3670 / 8862 loss=4.597, nll_loss=2.989, ppl=7.94, wps=52290, ups=3, wpb=15172.659, bsz=558.609, num_updates=12527, lr=0.000282538, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3756, train_wall=3365 | |
| epoch 002: 3680 / 8862 loss=4.597, nll_loss=2.989, ppl=7.94, wps=52289, ups=3, wpb=15172.767, bsz=558.813, num_updates=12537, lr=0.000282425, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3759, train_wall=3367 | |
| epoch 002: 3690 / 8862 loss=4.596, nll_loss=2.988, ppl=7.94, wps=52289, ups=3, wpb=15172.899, bsz=558.862, num_updates=12547, lr=0.000282312, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3762, train_wall=3370 | |
| epoch 002: 3700 / 8862 loss=4.596, nll_loss=2.988, ppl=7.93, wps=52294, ups=3, wpb=15174.408, bsz=558.798, num_updates=12557, lr=0.0002822, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3765, train_wall=3373 | |
| epoch 002: 3710 / 8862 loss=4.596, nll_loss=2.988, ppl=7.93, wps=52294, ups=3, wpb=15174.116, bsz=558.799, num_updates=12567, lr=0.000282088, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3768, train_wall=3375 | |
| epoch 002: 3720 / 8862 loss=4.596, nll_loss=2.988, ppl=7.93, wps=52298, ups=3, wpb=15174.915, bsz=558.806, num_updates=12577, lr=0.000281976, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3771, train_wall=3378 | |
| epoch 002: 3730 / 8862 loss=4.596, nll_loss=2.988, ppl=7.93, wps=52298, ups=3, wpb=15174.991, bsz=558.848, num_updates=12587, lr=0.000281864, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3773, train_wall=3381 | |
| epoch 002: 3740 / 8862 loss=4.596, nll_loss=2.988, ppl=7.93, wps=52297, ups=3, wpb=15174.694, bsz=558.766, num_updates=12597, lr=0.000281752, gnorm=0.589, clip=0.000, oom=0.000, loss_scale=8.000, wall=3776, train_wall=3383 | |
| epoch 002: 3750 / 8862 loss=4.595, nll_loss=2.987, ppl=7.93, wps=52299, ups=3, wpb=15175.082, bsz=558.697, num_updates=12607, lr=0.00028164, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3779, train_wall=3386 | |
| epoch 002: 3760 / 8862 loss=4.595, nll_loss=2.987, ppl=7.93, wps=52297, ups=3, wpb=15174.300, bsz=558.579, num_updates=12617, lr=0.000281528, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3782, train_wall=3389 | |
| epoch 002: 3770 / 8862 loss=4.595, nll_loss=2.987, ppl=7.93, wps=52297, ups=3, wpb=15174.152, bsz=558.566, num_updates=12627, lr=0.000281417, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3785, train_wall=3391 | |
| epoch 002: 3780 / 8862 loss=4.595, nll_loss=2.986, ppl=7.92, wps=52296, ups=3, wpb=15173.818, bsz=558.690, num_updates=12637, lr=0.000281305, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3788, train_wall=3394 | |
| epoch 002: 3790 / 8862 loss=4.595, nll_loss=2.986, ppl=7.93, wps=52292, ups=3, wpb=15172.889, bsz=558.658, num_updates=12647, lr=0.000281194, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3791, train_wall=3397 | |
| epoch 002: 3800 / 8862 loss=4.594, nll_loss=2.986, ppl=7.92, wps=52288, ups=3, wpb=15172.516, bsz=559.255, num_updates=12657, lr=0.000281083, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3794, train_wall=3399 | |
| epoch 002: 3810 / 8862 loss=4.594, nll_loss=2.986, ppl=7.92, wps=52285, ups=3, wpb=15171.762, bsz=559.269, num_updates=12667, lr=0.000280972, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3797, train_wall=3402 | |
| epoch 002: 3820 / 8862 loss=4.594, nll_loss=2.985, ppl=7.92, wps=52289, ups=3, wpb=15172.949, bsz=559.489, num_updates=12677, lr=0.000280861, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3800, train_wall=3405 | |
| epoch 002: 3830 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52286, ups=3, wpb=15171.745, bsz=559.484, num_updates=12687, lr=0.00028075, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3803, train_wall=3407 | |
| epoch 002: 3840 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52285, ups=3, wpb=15170.973, bsz=559.342, num_updates=12697, lr=0.00028064, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3805, train_wall=3410 | |
| epoch 002: 3850 / 8862 loss=4.594, nll_loss=2.985, ppl=7.92, wps=52288, ups=3, wpb=15171.761, bsz=559.208, num_updates=12707, lr=0.000280529, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3808, train_wall=3413 | |
| epoch 002: 3860 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52289, ups=3, wpb=15171.736, bsz=559.022, num_updates=12717, lr=0.000280419, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3811, train_wall=3415 | |
| epoch 002: 3870 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52289, ups=3, wpb=15171.688, bsz=559.016, num_updates=12727, lr=0.000280309, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3814, train_wall=3418 | |
| epoch 002: 3880 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52291, ups=3, wpb=15172.394, bsz=558.761, num_updates=12737, lr=0.000280199, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3817, train_wall=3421 | |
| epoch 002: 3890 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52289, ups=3, wpb=15171.272, bsz=558.639, num_updates=12747, lr=0.000280089, gnorm=0.588, clip=0.000, oom=0.000, loss_scale=8.000, wall=3820, train_wall=3423 | |
| epoch 002: 3900 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52291, ups=3, wpb=15171.785, bsz=558.505, num_updates=12757, lr=0.000279979, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3823, train_wall=3426 | |
| epoch 002: 3910 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52292, ups=3, wpb=15171.855, bsz=558.396, num_updates=12767, lr=0.000279869, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3826, train_wall=3429 | |
| epoch 002: 3920 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52291, ups=3, wpb=15171.037, bsz=558.241, num_updates=12777, lr=0.00027976, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3828, train_wall=3431 | |
| epoch 002: 3930 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52293, ups=3, wpb=15171.351, bsz=558.089, num_updates=12787, lr=0.000279651, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3831, train_wall=3434 | |
| epoch 002: 3940 / 8862 loss=4.593, nll_loss=2.985, ppl=7.92, wps=52292, ups=3, wpb=15171.174, bsz=558.173, num_updates=12797, lr=0.000279541, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3834, train_wall=3437 | |
| epoch 002: 3950 / 8862 loss=4.593, nll_loss=2.984, ppl=7.91, wps=52294, ups=3, wpb=15171.622, bsz=558.072, num_updates=12807, lr=0.000279432, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3837, train_wall=3439 | |
| epoch 002: 3960 / 8862 loss=4.592, nll_loss=2.984, ppl=7.91, wps=52295, ups=3, wpb=15172.570, bsz=558.505, num_updates=12817, lr=0.000279323, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3840, train_wall=3442 | |
| epoch 002: 3970 / 8862 loss=4.592, nll_loss=2.984, ppl=7.91, wps=52293, ups=3, wpb=15171.994, bsz=558.654, num_updates=12827, lr=0.000279214, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3843, train_wall=3445 | |
| epoch 002: 3980 / 8862 loss=4.592, nll_loss=2.983, ppl=7.91, wps=52294, ups=3, wpb=15172.198, bsz=558.754, num_updates=12837, lr=0.000279105, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3846, train_wall=3448 | |
| epoch 002: 3990 / 8862 loss=4.591, nll_loss=2.983, ppl=7.91, wps=52295, ups=3, wpb=15172.475, bsz=558.695, num_updates=12847, lr=0.000278997, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3849, train_wall=3450 | |
| epoch 002: 4000 / 8862 loss=4.591, nll_loss=2.983, ppl=7.91, wps=52293, ups=3, wpb=15171.787, bsz=558.708, num_updates=12857, lr=0.000278888, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3852, train_wall=3453 | |
| epoch 002: 4010 / 8862 loss=4.591, nll_loss=2.982, ppl=7.90, wps=52294, ups=3, wpb=15172.117, bsz=558.975, num_updates=12867, lr=0.00027878, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3855, train_wall=3456 | |
| epoch 002: 4020 / 8862 loss=4.591, nll_loss=2.982, ppl=7.90, wps=52291, ups=3, wpb=15171.588, bsz=558.796, num_updates=12877, lr=0.000278672, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3858, train_wall=3458 | |
| epoch 002: 4030 / 8862 loss=4.591, nll_loss=2.982, ppl=7.90, wps=52292, ups=3, wpb=15171.800, bsz=558.527, num_updates=12887, lr=0.000278563, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3860, train_wall=3461 | |
| epoch 002: 4040 / 8862 loss=4.590, nll_loss=2.982, ppl=7.90, wps=52291, ups=3, wpb=15171.670, bsz=558.584, num_updates=12897, lr=0.000278455, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3863, train_wall=3464 | |
| epoch 002: 4050 / 8862 loss=4.590, nll_loss=2.982, ppl=7.90, wps=52288, ups=3, wpb=15170.526, bsz=558.384, num_updates=12907, lr=0.000278348, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3866, train_wall=3466 | |
| epoch 002: 4060 / 8862 loss=4.590, nll_loss=2.982, ppl=7.90, wps=52289, ups=3, wpb=15170.651, bsz=558.276, num_updates=12917, lr=0.00027824, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3869, train_wall=3469 | |
| epoch 002: 4070 / 8862 loss=4.590, nll_loss=2.981, ppl=7.90, wps=52284, ups=3, wpb=15170.143, bsz=558.899, num_updates=12927, lr=0.000278132, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3872, train_wall=3472 | |
| epoch 002: 4080 / 8862 loss=4.590, nll_loss=2.982, ppl=7.90, wps=52285, ups=3, wpb=15169.909, bsz=558.682, num_updates=12937, lr=0.000278025, gnorm=0.587, clip=0.000, oom=0.000, loss_scale=8.000, wall=3875, train_wall=3474 | |
| epoch 002: 4090 / 8862 loss=4.590, nll_loss=2.981, ppl=7.90, wps=52286, ups=3, wpb=15170.530, bsz=558.613, num_updates=12947, lr=0.000277917, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3878, train_wall=3477 | |
| epoch 002: 4100 / 8862 loss=4.590, nll_loss=2.981, ppl=7.90, wps=52286, ups=3, wpb=15170.411, bsz=558.714, num_updates=12957, lr=0.00027781, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3881, train_wall=3480 | |
| epoch 002: 4110 / 8862 loss=4.590, nll_loss=2.981, ppl=7.90, wps=52288, ups=3, wpb=15170.536, bsz=558.425, num_updates=12967, lr=0.000277703, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3884, train_wall=3482 | |
| epoch 002: 4120 / 8862 loss=4.590, nll_loss=2.981, ppl=7.90, wps=52287, ups=3, wpb=15170.464, bsz=558.326, num_updates=12977, lr=0.000277596, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3887, train_wall=3485 | |
| epoch 002: 4130 / 8862 loss=4.589, nll_loss=2.981, ppl=7.89, wps=52288, ups=3, wpb=15170.495, bsz=558.354, num_updates=12987, lr=0.000277489, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3889, train_wall=3488 | |
| epoch 002: 4140 / 8862 loss=4.589, nll_loss=2.981, ppl=7.89, wps=52286, ups=3, wpb=15169.656, bsz=558.301, num_updates=12997, lr=0.000277382, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3892, train_wall=3490 | |
| epoch 002: 4150 / 8862 loss=4.589, nll_loss=2.980, ppl=7.89, wps=52286, ups=3, wpb=15169.635, bsz=558.442, num_updates=13007, lr=0.000277275, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3895, train_wall=3493 | |
| epoch 002: 4160 / 8862 loss=4.589, nll_loss=2.980, ppl=7.89, wps=52286, ups=3, wpb=15169.521, bsz=558.354, num_updates=13017, lr=0.000277169, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3898, train_wall=3496 | |
| epoch 002: 4170 / 8862 loss=4.589, nll_loss=2.980, ppl=7.89, wps=52288, ups=3, wpb=15169.993, bsz=558.406, num_updates=13027, lr=0.000277063, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3901, train_wall=3498 | |
| epoch 002: 4180 / 8862 loss=4.589, nll_loss=2.980, ppl=7.89, wps=52289, ups=3, wpb=15170.331, bsz=558.329, num_updates=13037, lr=0.000276956, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3904, train_wall=3501 | |
| epoch 002: 4190 / 8862 loss=4.589, nll_loss=2.980, ppl=7.89, wps=52289, ups=3, wpb=15170.090, bsz=558.175, num_updates=13047, lr=0.00027685, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3907, train_wall=3504 | |
| epoch 002: 4200 / 8862 loss=4.588, nll_loss=2.980, ppl=7.89, wps=52287, ups=3, wpb=15169.839, bsz=558.187, num_updates=13057, lr=0.000276744, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3910, train_wall=3507 | |
| epoch 002: 4210 / 8862 loss=4.588, nll_loss=2.979, ppl=7.89, wps=52285, ups=3, wpb=15169.445, bsz=558.343, num_updates=13067, lr=0.000276638, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3913, train_wall=3509 | |
| epoch 002: 4220 / 8862 loss=4.587, nll_loss=2.979, ppl=7.88, wps=52284, ups=3, wpb=15169.260, bsz=558.667, num_updates=13077, lr=0.000276532, gnorm=0.586, clip=0.000, oom=0.000, loss_scale=8.000, wall=3916, train_wall=3512 | |
| epoch 002: 4230 / 8862 loss=4.587, nll_loss=2.978, ppl=7.88, wps=52284, ups=3, wpb=15169.175, bsz=558.568, num_updates=13087, lr=0.000276427, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3918, train_wall=3515 | |
| epoch 002: 4240 / 8862 loss=4.587, nll_loss=2.978, ppl=7.88, wps=52283, ups=3, wpb=15168.803, bsz=558.558, num_updates=13097, lr=0.000276321, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3921, train_wall=3517 | |
| epoch 002: 4250 / 8862 loss=4.587, nll_loss=2.978, ppl=7.88, wps=52282, ups=3, wpb=15168.290, bsz=558.536, num_updates=13107, lr=0.000276216, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3924, train_wall=3520 | |
| epoch 002: 4260 / 8862 loss=4.586, nll_loss=2.978, ppl=7.88, wps=52282, ups=3, wpb=15168.285, bsz=558.577, num_updates=13117, lr=0.00027611, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3927, train_wall=3523 | |
| epoch 002: 4270 / 8862 loss=4.586, nll_loss=2.978, ppl=7.88, wps=52283, ups=3, wpb=15168.411, bsz=558.406, num_updates=13127, lr=0.000276005, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3930, train_wall=3525 | |
| epoch 002: 4280 / 8862 loss=4.586, nll_loss=2.977, ppl=7.88, wps=52282, ups=3, wpb=15167.896, bsz=558.288, num_updates=13137, lr=0.0002759, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3933, train_wall=3528 | |
| epoch 002: 4290 / 8862 loss=4.586, nll_loss=2.977, ppl=7.88, wps=52283, ups=3, wpb=15168.163, bsz=558.111, num_updates=13147, lr=0.000275795, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3936, train_wall=3531 | |
| epoch 002: 4300 / 8862 loss=4.586, nll_loss=2.977, ppl=7.87, wps=52285, ups=3, wpb=15168.366, bsz=557.972, num_updates=13157, lr=0.00027569, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3939, train_wall=3533 | |
| epoch 002: 4310 / 8862 loss=4.585, nll_loss=2.976, ppl=7.87, wps=52286, ups=3, wpb=15169.067, bsz=558.159, num_updates=13167, lr=0.000275586, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3942, train_wall=3536 | |
| epoch 002: 4320 / 8862 loss=4.585, nll_loss=2.976, ppl=7.87, wps=52286, ups=3, wpb=15169.395, bsz=558.211, num_updates=13177, lr=0.000275481, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3944, train_wall=3539 | |
| epoch 002: 4330 / 8862 loss=4.584, nll_loss=2.976, ppl=7.87, wps=52288, ups=3, wpb=15170.155, bsz=558.284, num_updates=13187, lr=0.000275377, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3947, train_wall=3541 | |
| epoch 002: 4340 / 8862 loss=4.584, nll_loss=2.975, ppl=7.86, wps=52288, ups=3, wpb=15170.059, bsz=558.142, num_updates=13197, lr=0.000275272, gnorm=0.585, clip=0.000, oom=0.000, loss_scale=8.000, wall=3950, train_wall=3544 | |
| epoch 002: 4350 / 8862 loss=4.584, nll_loss=2.975, ppl=7.86, wps=52288, ups=3, wpb=15170.345, bsz=558.194, num_updates=13207, lr=0.000275168, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3953, train_wall=3547 | |
| epoch 002: 4360 / 8862 loss=4.584, nll_loss=2.975, ppl=7.86, wps=52288, ups=3, wpb=15170.340, bsz=558.310, num_updates=13217, lr=0.000275064, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3956, train_wall=3549 | |
| epoch 002: 4370 / 8862 loss=4.583, nll_loss=2.974, ppl=7.86, wps=52286, ups=3, wpb=15169.711, bsz=558.270, num_updates=13227, lr=0.00027496, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3959, train_wall=3552 | |
| epoch 002: 4380 / 8862 loss=4.583, nll_loss=2.974, ppl=7.86, wps=52286, ups=3, wpb=15169.515, bsz=558.212, num_updates=13237, lr=0.000274856, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3962, train_wall=3555 | |
| epoch 002: 4390 / 8862 loss=4.583, nll_loss=2.974, ppl=7.86, wps=52286, ups=3, wpb=15169.402, bsz=558.123, num_updates=13247, lr=0.000274752, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3965, train_wall=3557 | |
| epoch 002: 4400 / 8862 loss=4.583, nll_loss=2.974, ppl=7.86, wps=52286, ups=3, wpb=15169.551, bsz=557.995, num_updates=13257, lr=0.000274649, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3968, train_wall=3560 | |
| epoch 002: 4410 / 8862 loss=4.583, nll_loss=2.974, ppl=7.86, wps=52286, ups=3, wpb=15169.872, bsz=557.917, num_updates=13267, lr=0.000274545, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3971, train_wall=3563 | |
| epoch 002: 4420 / 8862 loss=4.583, nll_loss=2.974, ppl=7.86, wps=52286, ups=3, wpb=15169.383, bsz=557.911, num_updates=13277, lr=0.000274442, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3974, train_wall=3566 | |
| epoch 002: 4430 / 8862 loss=4.583, nll_loss=2.973, ppl=7.85, wps=52284, ups=3, wpb=15169.044, bsz=557.851, num_updates=13287, lr=0.000274338, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3976, train_wall=3568 | |
| epoch 002: 4440 / 8862 loss=4.582, nll_loss=2.973, ppl=7.85, wps=52282, ups=3, wpb=15168.911, bsz=558.405, num_updates=13297, lr=0.000274235, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3979, train_wall=3571 | |
| epoch 002: 4450 / 8862 loss=4.582, nll_loss=2.972, ppl=7.85, wps=52279, ups=3, wpb=15168.937, bsz=558.698, num_updates=13307, lr=0.000274132, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3982, train_wall=3574 | |
| epoch 002: 4460 / 8862 loss=4.581, nll_loss=2.972, ppl=7.85, wps=52278, ups=3, wpb=15168.479, bsz=558.657, num_updates=13317, lr=0.000274029, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3985, train_wall=3576 | |
| epoch 002: 4470 / 8862 loss=4.581, nll_loss=2.972, ppl=7.84, wps=52279, ups=3, wpb=15168.628, bsz=558.801, num_updates=13327, lr=0.000273926, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3988, train_wall=3579 | |
| epoch 002: 4480 / 8862 loss=4.581, nll_loss=2.971, ppl=7.84, wps=52278, ups=3, wpb=15168.585, bsz=558.809, num_updates=13337, lr=0.000273824, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3991, train_wall=3582 | |
| epoch 002: 4490 / 8862 loss=4.581, nll_loss=2.971, ppl=7.84, wps=52277, ups=3, wpb=15168.281, bsz=558.692, num_updates=13347, lr=0.000273721, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3994, train_wall=3584 | |
| epoch 002: 4500 / 8862 loss=4.581, nll_loss=2.971, ppl=7.84, wps=52274, ups=3, wpb=15167.322, bsz=558.725, num_updates=13357, lr=0.000273619, gnorm=0.584, clip=0.000, oom=0.000, loss_scale=8.000, wall=3997, train_wall=3587 | |
| epoch 002: 4510 / 8862 loss=4.580, nll_loss=2.971, ppl=7.84, wps=52275, ups=3, wpb=15167.384, bsz=558.707, num_updates=13367, lr=0.000273516, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4000, train_wall=3590 | |
| epoch 002: 4520 / 8862 loss=4.580, nll_loss=2.971, ppl=7.84, wps=52275, ups=3, wpb=15167.443, bsz=558.529, num_updates=13377, lr=0.000273414, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4003, train_wall=3592 | |
| epoch 002: 4530 / 8862 loss=4.580, nll_loss=2.970, ppl=7.84, wps=52276, ups=3, wpb=15167.822, bsz=558.460, num_updates=13387, lr=0.000273312, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4006, train_wall=3595 | |
| epoch 002: 4540 / 8862 loss=4.579, nll_loss=2.970, ppl=7.84, wps=52276, ups=3, wpb=15167.638, bsz=558.511, num_updates=13397, lr=0.00027321, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4008, train_wall=3598 | |
| epoch 002: 4550 / 8862 loss=4.579, nll_loss=2.970, ppl=7.83, wps=52274, ups=3, wpb=15167.434, bsz=558.734, num_updates=13407, lr=0.000273108, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4011, train_wall=3600 | |
| epoch 002: 4560 / 8862 loss=4.579, nll_loss=2.970, ppl=7.83, wps=52273, ups=3, wpb=15166.972, bsz=558.581, num_updates=13417, lr=0.000273006, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4014, train_wall=3603 | |
| epoch 002: 4570 / 8862 loss=4.579, nll_loss=2.969, ppl=7.83, wps=52273, ups=3, wpb=15166.950, bsz=558.598, num_updates=13427, lr=0.000272904, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4017, train_wall=3606 | |
| epoch 002: 4580 / 8862 loss=4.579, nll_loss=2.969, ppl=7.83, wps=52275, ups=3, wpb=15167.321, bsz=558.524, num_updates=13437, lr=0.000272803, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4020, train_wall=3609 | |
| epoch 002: 4590 / 8862 loss=4.578, nll_loss=2.969, ppl=7.83, wps=52272, ups=3, wpb=15167.058, bsz=558.764, num_updates=13447, lr=0.000272701, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4023, train_wall=3611 | |
| epoch 002: 4600 / 8862 loss=4.578, nll_loss=2.969, ppl=7.83, wps=52273, ups=3, wpb=15167.069, bsz=558.612, num_updates=13457, lr=0.0002726, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4026, train_wall=3614 | |
| epoch 002: 4610 / 8862 loss=4.578, nll_loss=2.969, ppl=7.83, wps=52273, ups=3, wpb=15166.740, bsz=558.456, num_updates=13467, lr=0.000272499, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4029, train_wall=3617 | |
| epoch 002: 4620 / 8862 loss=4.578, nll_loss=2.968, ppl=7.83, wps=52275, ups=3, wpb=15167.301, bsz=558.639, num_updates=13477, lr=0.000272398, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4032, train_wall=3619 | |
| epoch 002: 4630 / 8862 loss=4.577, nll_loss=2.968, ppl=7.82, wps=52276, ups=3, wpb=15167.647, bsz=558.723, num_updates=13487, lr=0.000272297, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4035, train_wall=3622 | |
| epoch 002: 4640 / 8862 loss=4.577, nll_loss=2.967, ppl=7.82, wps=52278, ups=3, wpb=15168.293, bsz=558.897, num_updates=13497, lr=0.000272196, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4037, train_wall=3625 | |
| epoch 002: 4650 / 8862 loss=4.577, nll_loss=2.967, ppl=7.82, wps=52276, ups=3, wpb=15167.801, bsz=558.854, num_updates=13507, lr=0.000272095, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4040, train_wall=3627 | |
| epoch 002: 4660 / 8862 loss=4.577, nll_loss=2.967, ppl=7.82, wps=52278, ups=3, wpb=15167.975, bsz=558.757, num_updates=13517, lr=0.000271994, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4043, train_wall=3630 | |
| epoch 002: 4670 / 8862 loss=4.576, nll_loss=2.967, ppl=7.82, wps=52278, ups=3, wpb=15168.204, bsz=558.888, num_updates=13527, lr=0.000271894, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4046, train_wall=3633 | |
| epoch 002: 4680 / 8862 loss=4.576, nll_loss=2.967, ppl=7.82, wps=52281, ups=3, wpb=15168.815, bsz=558.769, num_updates=13537, lr=0.000271793, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4049, train_wall=3635 | |
| epoch 002: 4690 / 8862 loss=4.576, nll_loss=2.966, ppl=7.81, wps=52281, ups=3, wpb=15169.451, bsz=559.024, num_updates=13547, lr=0.000271693, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4052, train_wall=3638 | |
| epoch 002: 4700 / 8862 loss=4.576, nll_loss=2.966, ppl=7.81, wps=52280, ups=3, wpb=15169.147, bsz=558.948, num_updates=13557, lr=0.000271593, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4055, train_wall=3641 | |
| epoch 002: 4710 / 8862 loss=4.575, nll_loss=2.966, ppl=7.81, wps=52280, ups=3, wpb=15169.365, bsz=558.903, num_updates=13567, lr=0.000271493, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4058, train_wall=3643 | |
| epoch 002: 4720 / 8862 loss=4.575, nll_loss=2.966, ppl=7.81, wps=52281, ups=3, wpb=15169.610, bsz=558.951, num_updates=13577, lr=0.000271393, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4061, train_wall=3646 | |
| epoch 002: 4730 / 8862 loss=4.575, nll_loss=2.965, ppl=7.81, wps=52281, ups=3, wpb=15169.588, bsz=559.002, num_updates=13587, lr=0.000271293, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4064, train_wall=3649 | |
| epoch 002: 4740 / 8862 loss=4.575, nll_loss=2.966, ppl=7.81, wps=52281, ups=3, wpb=15169.200, bsz=558.893, num_updates=13597, lr=0.000271193, gnorm=0.583, clip=0.000, oom=0.000, loss_scale=8.000, wall=4066, train_wall=3651 | |
| epoch 002: 4750 / 8862 loss=4.575, nll_loss=2.965, ppl=7.81, wps=52281, ups=3, wpb=15169.519, bsz=558.880, num_updates=13607, lr=0.000271093, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4069, train_wall=3654 | |
| epoch 002: 4760 / 8862 loss=4.574, nll_loss=2.965, ppl=7.81, wps=52281, ups=3, wpb=15169.603, bsz=559.049, num_updates=13617, lr=0.000270994, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4072, train_wall=3657 | |
| epoch 002: 4770 / 8862 loss=4.574, nll_loss=2.965, ppl=7.81, wps=52283, ups=3, wpb=15170.197, bsz=558.960, num_updates=13627, lr=0.000270894, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4075, train_wall=3660 | |
| epoch 002: 4780 / 8862 loss=4.574, nll_loss=2.964, ppl=7.80, wps=52283, ups=3, wpb=15170.351, bsz=558.909, num_updates=13637, lr=0.000270795, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4078, train_wall=3662 | |
| epoch 002: 4790 / 8862 loss=4.574, nll_loss=2.964, ppl=7.80, wps=52279, ups=3, wpb=15169.608, bsz=559.095, num_updates=13647, lr=0.000270696, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4081, train_wall=3665 | |
| epoch 002: 4800 / 8862 loss=4.573, nll_loss=2.964, ppl=7.80, wps=52281, ups=3, wpb=15170.129, bsz=558.972, num_updates=13657, lr=0.000270597, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4084, train_wall=3668 | |
| epoch 002: 4810 / 8862 loss=4.574, nll_loss=2.964, ppl=7.80, wps=52280, ups=3, wpb=15169.767, bsz=558.859, num_updates=13667, lr=0.000270498, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4087, train_wall=3670 | |
| epoch 002: 4820 / 8862 loss=4.574, nll_loss=2.964, ppl=7.80, wps=52280, ups=3, wpb=15169.844, bsz=558.873, num_updates=13677, lr=0.000270399, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4090, train_wall=3673 | |
| epoch 002: 4830 / 8862 loss=4.573, nll_loss=2.963, ppl=7.80, wps=52282, ups=3, wpb=15170.292, bsz=558.953, num_updates=13687, lr=0.0002703, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4093, train_wall=3676 | |
| epoch 002: 4840 / 8862 loss=4.573, nll_loss=2.963, ppl=7.80, wps=52283, ups=3, wpb=15170.690, bsz=559.053, num_updates=13697, lr=0.000270201, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4096, train_wall=3678 | |
| epoch 002: 4850 / 8862 loss=4.572, nll_loss=2.962, ppl=7.79, wps=52284, ups=3, wpb=15170.981, bsz=559.113, num_updates=13707, lr=0.000270103, gnorm=0.582, clip=0.000, oom=0.000, loss_scale=8.000, wall=4098, train_wall=3681 | |
| epoch 002: 4860 / 8862 loss=4.572, nll_loss=2.962, ppl=7.79, wps=52285, ups=3, wpb=15171.422, bsz=559.263, num_updates=13717, lr=0.000270004, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4101, train_wall=3684 | |
| epoch 002: 4870 / 8862 loss=4.572, nll_loss=2.962, ppl=7.79, wps=52285, ups=3, wpb=15171.286, bsz=559.032, num_updates=13727, lr=0.000269906, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4104, train_wall=3686 | |
| epoch 002: 4880 / 8862 loss=4.572, nll_loss=2.962, ppl=7.79, wps=52285, ups=3, wpb=15171.452, bsz=559.098, num_updates=13737, lr=0.000269808, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4107, train_wall=3689 | |
| epoch 002: 4890 / 8862 loss=4.571, nll_loss=2.961, ppl=7.79, wps=52287, ups=3, wpb=15172.384, bsz=559.117, num_updates=13747, lr=0.000269709, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4110, train_wall=3692 | |
| epoch 002: 4900 / 8862 loss=4.571, nll_loss=2.961, ppl=7.79, wps=52286, ups=3, wpb=15172.043, bsz=559.047, num_updates=13757, lr=0.000269611, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4113, train_wall=3694 | |
| epoch 002: 4910 / 8862 loss=4.571, nll_loss=2.961, ppl=7.79, wps=52288, ups=3, wpb=15172.388, bsz=558.894, num_updates=13767, lr=0.000269513, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4116, train_wall=3697 | |
| epoch 002: 4920 / 8862 loss=4.571, nll_loss=2.961, ppl=7.79, wps=52288, ups=3, wpb=15172.392, bsz=558.912, num_updates=13777, lr=0.000269416, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4119, train_wall=3700 | |
| epoch 002: 4930 / 8862 loss=4.570, nll_loss=2.960, ppl=7.78, wps=52290, ups=3, wpb=15172.975, bsz=558.940, num_updates=13787, lr=0.000269318, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4122, train_wall=3702 | |
| epoch 002: 4940 / 8862 loss=4.570, nll_loss=2.960, ppl=7.78, wps=52291, ups=3, wpb=15173.320, bsz=558.891, num_updates=13797, lr=0.00026922, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4125, train_wall=3705 | |
| epoch 002: 4950 / 8862 loss=4.570, nll_loss=2.960, ppl=7.78, wps=52292, ups=3, wpb=15173.833, bsz=558.956, num_updates=13807, lr=0.000269123, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4128, train_wall=3708 | |
| epoch 002: 4960 / 8862 loss=4.570, nll_loss=2.960, ppl=7.78, wps=52292, ups=3, wpb=15173.705, bsz=558.879, num_updates=13817, lr=0.000269025, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4130, train_wall=3711 | |
| epoch 002: 4970 / 8862 loss=4.570, nll_loss=2.960, ppl=7.78, wps=52294, ups=3, wpb=15174.389, bsz=558.759, num_updates=13827, lr=0.000268928, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4133, train_wall=3713 | |
| epoch 002: 4980 / 8862 loss=4.569, nll_loss=2.959, ppl=7.78, wps=52291, ups=3, wpb=15174.092, bsz=559.036, num_updates=13837, lr=0.000268831, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4136, train_wall=3716 | |
| epoch 002: 4990 / 8862 loss=4.569, nll_loss=2.959, ppl=7.78, wps=52291, ups=3, wpb=15174.152, bsz=559.012, num_updates=13847, lr=0.000268734, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4139, train_wall=3719 | |
| epoch 002: 5000 / 8862 loss=4.569, nll_loss=2.959, ppl=7.77, wps=52292, ups=3, wpb=15174.104, bsz=559.021, num_updates=13857, lr=0.000268637, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4142, train_wall=3721 | |
| epoch 002: 5010 / 8862 loss=4.569, nll_loss=2.958, ppl=7.77, wps=52293, ups=3, wpb=15174.708, bsz=559.029, num_updates=13867, lr=0.00026854, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4145, train_wall=3724 | |
| epoch 002: 5020 / 8862 loss=4.568, nll_loss=2.958, ppl=7.77, wps=52293, ups=3, wpb=15174.665, bsz=559.018, num_updates=13877, lr=0.000268443, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4148, train_wall=3727 | |
| epoch 002: 5030 / 8862 loss=4.568, nll_loss=2.958, ppl=7.77, wps=52294, ups=3, wpb=15174.991, bsz=558.985, num_updates=13887, lr=0.000268346, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4151, train_wall=3729 | |
| epoch 002: 5040 / 8862 loss=4.568, nll_loss=2.958, ppl=7.77, wps=52296, ups=3, wpb=15175.181, bsz=558.897, num_updates=13897, lr=0.00026825, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4154, train_wall=3732 | |
| epoch 002: 5050 / 8862 loss=4.568, nll_loss=2.958, ppl=7.77, wps=52296, ups=3, wpb=15175.317, bsz=558.825, num_updates=13907, lr=0.000268153, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4157, train_wall=3735 | |
| epoch 002: 5060 / 8862 loss=4.568, nll_loss=2.958, ppl=7.77, wps=52296, ups=3, wpb=15175.118, bsz=558.754, num_updates=13917, lr=0.000268057, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4159, train_wall=3737 | |
| epoch 002: 5070 / 8862 loss=4.568, nll_loss=2.957, ppl=7.77, wps=52297, ups=3, wpb=15175.321, bsz=558.616, num_updates=13927, lr=0.000267961, gnorm=0.581, clip=0.000, oom=0.000, loss_scale=8.000, wall=4162, train_wall=3740 | |
| epoch 002: 5080 / 8862 loss=4.567, nll_loss=2.957, ppl=7.77, wps=52296, ups=3, wpb=15175.292, bsz=558.602, num_updates=13937, lr=0.000267865, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4165, train_wall=3743 | |
| epoch 002: 5090 / 8862 loss=4.567, nll_loss=2.957, ppl=7.76, wps=52296, ups=3, wpb=15175.253, bsz=558.609, num_updates=13947, lr=0.000267769, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4168, train_wall=3745 | |
| epoch 002: 5100 / 8862 loss=4.567, nll_loss=2.956, ppl=7.76, wps=52295, ups=3, wpb=15175.277, bsz=558.836, num_updates=13957, lr=0.000267673, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4171, train_wall=3748 | |
| epoch 002: 5110 / 8862 loss=4.567, nll_loss=2.956, ppl=7.76, wps=52295, ups=3, wpb=15175.304, bsz=558.666, num_updates=13967, lr=0.000267577, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4174, train_wall=3751 | |
| epoch 002: 5120 / 8862 loss=4.566, nll_loss=2.956, ppl=7.76, wps=52297, ups=3, wpb=15175.905, bsz=558.781, num_updates=13977, lr=0.000267481, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4177, train_wall=3753 | |
| epoch 002: 5130 / 8862 loss=4.566, nll_loss=2.956, ppl=7.76, wps=52297, ups=3, wpb=15175.657, bsz=558.749, num_updates=13987, lr=0.000267385, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4180, train_wall=3756 | |
| epoch 002: 5140 / 8862 loss=4.566, nll_loss=2.955, ppl=7.76, wps=52297, ups=3, wpb=15175.651, bsz=558.693, num_updates=13997, lr=0.00026729, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4183, train_wall=3759 | |
| epoch 002: 5150 / 8862 loss=4.566, nll_loss=2.955, ppl=7.75, wps=52298, ups=3, wpb=15175.767, bsz=558.622, num_updates=14007, lr=0.000267194, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4186, train_wall=3762 | |
| epoch 002: 5160 / 8862 loss=4.565, nll_loss=2.955, ppl=7.75, wps=52298, ups=3, wpb=15175.949, bsz=558.619, num_updates=14017, lr=0.000267099, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4189, train_wall=3764 | |
| epoch 002: 5170 / 8862 loss=4.565, nll_loss=2.955, ppl=7.75, wps=52297, ups=3, wpb=15175.672, bsz=558.637, num_updates=14027, lr=0.000267004, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4191, train_wall=3767 | |
| epoch 002: 5180 / 8862 loss=4.565, nll_loss=2.954, ppl=7.75, wps=52298, ups=3, wpb=15175.838, bsz=558.547, num_updates=14037, lr=0.000266909, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4194, train_wall=3770 | |
| epoch 002: 5190 / 8862 loss=4.565, nll_loss=2.954, ppl=7.75, wps=52300, ups=3, wpb=15176.136, bsz=558.409, num_updates=14047, lr=0.000266814, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4197, train_wall=3772 | |
| epoch 002: 5200 / 8862 loss=4.564, nll_loss=2.954, ppl=7.75, wps=52301, ups=3, wpb=15176.650, bsz=558.703, num_updates=14057, lr=0.000266719, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4200, train_wall=3775 | |
| epoch 002: 5210 / 8862 loss=4.564, nll_loss=2.954, ppl=7.75, wps=52302, ups=3, wpb=15176.968, bsz=558.586, num_updates=14067, lr=0.000266624, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4203, train_wall=3778 | |
| epoch 002: 5220 / 8862 loss=4.564, nll_loss=2.953, ppl=7.74, wps=52303, ups=3, wpb=15177.331, bsz=558.849, num_updates=14077, lr=0.000266529, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4206, train_wall=3780 | |
| epoch 002: 5230 / 8862 loss=4.563, nll_loss=2.953, ppl=7.74, wps=52306, ups=3, wpb=15178.251, bsz=559.029, num_updates=14087, lr=0.000266435, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4209, train_wall=3783 | |
| epoch 002: 5240 / 8862 loss=4.563, nll_loss=2.952, ppl=7.74, wps=52305, ups=3, wpb=15177.815, bsz=559.035, num_updates=14097, lr=0.00026634, gnorm=0.580, clip=0.000, oom=0.000, loss_scale=8.000, wall=4212, train_wall=3786 | |
| epoch 002: 5250 / 8862 loss=4.563, nll_loss=2.952, ppl=7.74, wps=52305, ups=3, wpb=15177.888, bsz=559.075, num_updates=14107, lr=0.000266246, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4215, train_wall=3788 | |
| epoch 002: 5260 / 8862 loss=4.562, nll_loss=2.952, ppl=7.74, wps=52305, ups=3, wpb=15177.765, bsz=558.993, num_updates=14117, lr=0.000266151, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4217, train_wall=3791 | |
| epoch 002: 5270 / 8862 loss=4.562, nll_loss=2.952, ppl=7.74, wps=52304, ups=3, wpb=15177.475, bsz=558.869, num_updates=14127, lr=0.000266057, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4220, train_wall=3794 | |
| epoch 002: 5280 / 8862 loss=4.562, nll_loss=2.951, ppl=7.73, wps=52305, ups=3, wpb=15177.516, bsz=558.842, num_updates=14137, lr=0.000265963, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4223, train_wall=3796 | |
| epoch 002: 5290 / 8862 loss=4.562, nll_loss=2.951, ppl=7.73, wps=52301, ups=3, wpb=15176.282, bsz=558.790, num_updates=14147, lr=0.000265869, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4226, train_wall=3799 | |
| epoch 002: 5300 / 8862 loss=4.562, nll_loss=2.951, ppl=7.73, wps=52299, ups=3, wpb=15176.273, bsz=559.035, num_updates=14157, lr=0.000265775, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4229, train_wall=3802 | |
| epoch 002: 5310 / 8862 loss=4.562, nll_loss=2.951, ppl=7.73, wps=52299, ups=3, wpb=15176.287, bsz=559.012, num_updates=14167, lr=0.000265681, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4232, train_wall=3804 | |
| epoch 002: 5320 / 8862 loss=4.561, nll_loss=2.951, ppl=7.73, wps=52298, ups=3, wpb=15176.011, bsz=559.156, num_updates=14177, lr=0.000265588, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4235, train_wall=3807 | |
| epoch 002: 5330 / 8862 loss=4.561, nll_loss=2.950, ppl=7.73, wps=52297, ups=3, wpb=15175.655, bsz=559.101, num_updates=14187, lr=0.000265494, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4238, train_wall=3810 | |
| epoch 002: 5340 / 8862 loss=4.561, nll_loss=2.950, ppl=7.73, wps=52296, ups=3, wpb=15175.343, bsz=559.091, num_updates=14197, lr=0.0002654, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4241, train_wall=3812 | |
| epoch 002: 5350 / 8862 loss=4.561, nll_loss=2.950, ppl=7.73, wps=52295, ups=3, wpb=15174.951, bsz=559.154, num_updates=14207, lr=0.000265307, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4244, train_wall=3815 | |
| epoch 002: 5360 / 8862 loss=4.561, nll_loss=2.950, ppl=7.73, wps=52295, ups=3, wpb=15174.768, bsz=559.030, num_updates=14217, lr=0.000265214, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4247, train_wall=3818 | |
| epoch 002: 5370 / 8862 loss=4.561, nll_loss=2.950, ppl=7.73, wps=52296, ups=3, wpb=15175.047, bsz=558.962, num_updates=14227, lr=0.000265121, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4249, train_wall=3820 | |
| epoch 002: 5380 / 8862 loss=4.561, nll_loss=2.950, ppl=7.73, wps=52297, ups=3, wpb=15175.177, bsz=558.956, num_updates=14237, lr=0.000265027, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4252, train_wall=3823 | |
| epoch 002: 5390 / 8862 loss=4.560, nll_loss=2.949, ppl=7.72, wps=52298, ups=3, wpb=15175.587, bsz=559.032, num_updates=14247, lr=0.000264934, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4255, train_wall=3826 | |
| epoch 002: 5400 / 8862 loss=4.560, nll_loss=2.949, ppl=7.72, wps=52295, ups=3, wpb=15174.835, bsz=559.241, num_updates=14257, lr=0.000264841, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4258, train_wall=3829 | |
| epoch 002: 5410 / 8862 loss=4.560, nll_loss=2.949, ppl=7.72, wps=52294, ups=3, wpb=15174.466, bsz=559.199, num_updates=14267, lr=0.000264749, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4261, train_wall=3831 | |
| epoch 002: 5420 / 8862 loss=4.560, nll_loss=2.949, ppl=7.72, wps=52293, ups=3, wpb=15174.258, bsz=559.292, num_updates=14277, lr=0.000264656, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4264, train_wall=3834 | |
| epoch 002: 5430 / 8862 loss=4.560, nll_loss=2.949, ppl=7.72, wps=52293, ups=3, wpb=15174.202, bsz=559.219, num_updates=14287, lr=0.000264563, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4267, train_wall=3837 | |
| epoch 002: 5440 / 8862 loss=4.560, nll_loss=2.949, ppl=7.72, wps=52294, ups=3, wpb=15174.345, bsz=559.271, num_updates=14297, lr=0.000264471, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4270, train_wall=3839 | |
| epoch 002: 5450 / 8862 loss=4.560, nll_loss=2.949, ppl=7.72, wps=52293, ups=3, wpb=15174.001, bsz=559.328, num_updates=14307, lr=0.000264378, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4273, train_wall=3842 | |
| epoch 002: 5460 / 8862 loss=4.559, nll_loss=2.949, ppl=7.72, wps=52294, ups=3, wpb=15174.536, bsz=559.514, num_updates=14317, lr=0.000264286, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4276, train_wall=3845 | |
| epoch 002: 5470 / 8862 loss=4.559, nll_loss=2.948, ppl=7.72, wps=52291, ups=3, wpb=15174.610, bsz=559.903, num_updates=14327, lr=0.000264194, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4279, train_wall=3847 | |
| epoch 002: 5480 / 8862 loss=4.559, nll_loss=2.948, ppl=7.72, wps=52289, ups=3, wpb=15174.197, bsz=559.981, num_updates=14337, lr=0.000264101, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4281, train_wall=3850 | |
| epoch 002: 5490 / 8862 loss=4.559, nll_loss=2.948, ppl=7.72, wps=52289, ups=3, wpb=15174.183, bsz=559.914, num_updates=14347, lr=0.000264009, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4284, train_wall=3853 | |
| epoch 002: 5500 / 8862 loss=4.558, nll_loss=2.947, ppl=7.71, wps=52290, ups=3, wpb=15174.041, bsz=559.823, num_updates=14357, lr=0.000263917, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4287, train_wall=3855 | |
| epoch 002: 5510 / 8862 loss=4.558, nll_loss=2.947, ppl=7.71, wps=52289, ups=3, wpb=15173.936, bsz=559.901, num_updates=14367, lr=0.000263826, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4290, train_wall=3858 | |
| epoch 002: 5520 / 8862 loss=4.558, nll_loss=2.947, ppl=7.71, wps=52290, ups=3, wpb=15173.853, bsz=559.855, num_updates=14377, lr=0.000263734, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4293, train_wall=3861 | |
| epoch 002: 5530 / 8862 loss=4.558, nll_loss=2.947, ppl=7.71, wps=52291, ups=3, wpb=15174.074, bsz=559.832, num_updates=14387, lr=0.000263642, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4296, train_wall=3863 | |
| epoch 002: 5540 / 8862 loss=4.558, nll_loss=2.947, ppl=7.71, wps=52290, ups=3, wpb=15173.756, bsz=559.900, num_updates=14397, lr=0.000263551, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4299, train_wall=3866 | |
| epoch 002: 5550 / 8862 loss=4.557, nll_loss=2.946, ppl=7.71, wps=52288, ups=3, wpb=15173.067, bsz=559.928, num_updates=14407, lr=0.000263459, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4302, train_wall=3869 | |
| epoch 002: 5560 / 8862 loss=4.557, nll_loss=2.946, ppl=7.71, wps=52287, ups=3, wpb=15172.864, bsz=559.929, num_updates=14417, lr=0.000263368, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4305, train_wall=3871 | |
| epoch 002: 5570 / 8862 loss=4.558, nll_loss=2.947, ppl=7.71, wps=52288, ups=3, wpb=15172.797, bsz=559.714, num_updates=14427, lr=0.000263276, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4307, train_wall=3874 | |
| epoch 002: 5580 / 8862 loss=4.557, nll_loss=2.946, ppl=7.71, wps=52286, ups=3, wpb=15172.429, bsz=559.801, num_updates=14437, lr=0.000263185, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4310, train_wall=3877 | |
| epoch 002: 5590 / 8862 loss=4.557, nll_loss=2.946, ppl=7.71, wps=52286, ups=3, wpb=15172.360, bsz=559.621, num_updates=14447, lr=0.000263094, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4313, train_wall=3880 | |
| epoch 002: 5600 / 8862 loss=4.557, nll_loss=2.946, ppl=7.71, wps=52286, ups=3, wpb=15172.186, bsz=559.414, num_updates=14457, lr=0.000263003, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4316, train_wall=3882 | |
| epoch 002: 5610 / 8862 loss=4.557, nll_loss=2.946, ppl=7.71, wps=52286, ups=3, wpb=15172.163, bsz=559.367, num_updates=14467, lr=0.000262912, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4319, train_wall=3885 | |
| epoch 002: 5620 / 8862 loss=4.557, nll_loss=2.946, ppl=7.70, wps=52286, ups=3, wpb=15172.084, bsz=559.344, num_updates=14477, lr=0.000262821, gnorm=0.579, clip=0.000, oom=0.000, loss_scale=8.000, wall=4322, train_wall=3888 | |
| epoch 002: 5630 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52287, ups=3, wpb=15172.553, bsz=559.378, num_updates=14487, lr=0.000262731, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4325, train_wall=3890 | |
| epoch 002: 5640 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52288, ups=3, wpb=15172.716, bsz=559.362, num_updates=14497, lr=0.00026264, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4328, train_wall=3893 | |
| epoch 002: 5650 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52287, ups=3, wpb=15172.348, bsz=559.329, num_updates=14507, lr=0.000262549, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4331, train_wall=3896 | |
| epoch 002: 5660 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52288, ups=3, wpb=15172.617, bsz=559.339, num_updates=14517, lr=0.000262459, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4334, train_wall=3898 | |
| epoch 002: 5670 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52289, ups=3, wpb=15172.763, bsz=559.276, num_updates=14527, lr=0.000262369, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4336, train_wall=3901 | |
| epoch 002: 5680 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52289, ups=3, wpb=15172.267, bsz=559.079, num_updates=14537, lr=0.000262278, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4339, train_wall=3904 | |
| epoch 002: 5690 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52289, ups=3, wpb=15172.342, bsz=558.975, num_updates=14547, lr=0.000262188, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4342, train_wall=3906 | |
| epoch 002: 5700 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52289, ups=3, wpb=15172.217, bsz=558.897, num_updates=14557, lr=0.000262098, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4345, train_wall=3909 | |
| epoch 002: 5710 / 8862 loss=4.556, nll_loss=2.945, ppl=7.70, wps=52288, ups=3, wpb=15171.888, bsz=558.885, num_updates=14567, lr=0.000262008, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4348, train_wall=3912 | |
| epoch 002: 5720 / 8862 loss=4.555, nll_loss=2.944, ppl=7.70, wps=52290, ups=3, wpb=15172.569, bsz=559.137, num_updates=14577, lr=0.000261918, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4351, train_wall=3914 | |
| epoch 002: 5730 / 8862 loss=4.555, nll_loss=2.944, ppl=7.69, wps=52289, ups=3, wpb=15172.229, bsz=559.030, num_updates=14587, lr=0.000261829, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4354, train_wall=3917 | |
| epoch 002: 5740 / 8862 loss=4.555, nll_loss=2.944, ppl=7.70, wps=52289, ups=3, wpb=15172.016, bsz=559.054, num_updates=14597, lr=0.000261739, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4357, train_wall=3920 | |
| epoch 002: 5750 / 8862 loss=4.555, nll_loss=2.944, ppl=7.70, wps=52289, ups=3, wpb=15171.623, bsz=558.916, num_updates=14607, lr=0.000261649, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4360, train_wall=3922 | |
| epoch 002: 5760 / 8862 loss=4.555, nll_loss=2.943, ppl=7.69, wps=52288, ups=3, wpb=15171.966, bsz=559.219, num_updates=14617, lr=0.00026156, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4363, train_wall=3925 | |
| epoch 002: 5770 / 8862 loss=4.555, nll_loss=2.943, ppl=7.69, wps=52286, ups=3, wpb=15171.201, bsz=559.183, num_updates=14627, lr=0.00026147, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4365, train_wall=3928 | |
| epoch 002: 5780 / 8862 loss=4.555, nll_loss=2.943, ppl=7.69, wps=52286, ups=3, wpb=15171.273, bsz=559.103, num_updates=14637, lr=0.000261381, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4368, train_wall=3930 | |
| epoch 002: 5790 / 8862 loss=4.554, nll_loss=2.943, ppl=7.69, wps=52285, ups=3, wpb=15170.922, bsz=559.136, num_updates=14647, lr=0.000261292, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4371, train_wall=3933 | |
| epoch 002: 5800 / 8862 loss=4.554, nll_loss=2.943, ppl=7.69, wps=52284, ups=3, wpb=15171.202, bsz=559.407, num_updates=14657, lr=0.000261203, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4374, train_wall=3936 | |
| epoch 002: 5810 / 8862 loss=4.554, nll_loss=2.942, ppl=7.69, wps=52284, ups=3, wpb=15171.354, bsz=559.393, num_updates=14667, lr=0.000261114, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4377, train_wall=3938 | |
| epoch 002: 5820 / 8862 loss=4.553, nll_loss=2.942, ppl=7.68, wps=52283, ups=3, wpb=15171.377, bsz=559.623, num_updates=14677, lr=0.000261025, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4380, train_wall=3941 | |
| epoch 002: 5830 / 8862 loss=4.553, nll_loss=2.942, ppl=7.68, wps=52281, ups=3, wpb=15170.736, bsz=559.569, num_updates=14687, lr=0.000260936, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4383, train_wall=3944 | |
| epoch 002: 5840 / 8862 loss=4.553, nll_loss=2.942, ppl=7.68, wps=52281, ups=3, wpb=15170.489, bsz=559.571, num_updates=14697, lr=0.000260847, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4386, train_wall=3947 | |
| epoch 002: 5850 / 8862 loss=4.553, nll_loss=2.942, ppl=7.68, wps=52282, ups=3, wpb=15170.662, bsz=559.457, num_updates=14707, lr=0.000260758, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4389, train_wall=3949 | |
| epoch 002: 5860 / 8862 loss=4.552, nll_loss=2.941, ppl=7.68, wps=52283, ups=3, wpb=15171.278, bsz=559.629, num_updates=14717, lr=0.00026067, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4392, train_wall=3952 | |
| epoch 002: 5870 / 8862 loss=4.552, nll_loss=2.941, ppl=7.68, wps=52285, ups=3, wpb=15171.761, bsz=559.744, num_updates=14727, lr=0.000260581, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4395, train_wall=3955 | |
| epoch 002: 5880 / 8862 loss=4.552, nll_loss=2.940, ppl=7.68, wps=52284, ups=3, wpb=15171.654, bsz=559.758, num_updates=14737, lr=0.000260493, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4397, train_wall=3957 | |
| epoch 002: 5890 / 8862 loss=4.552, nll_loss=2.940, ppl=7.68, wps=52285, ups=3, wpb=15171.735, bsz=559.784, num_updates=14747, lr=0.000260404, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4400, train_wall=3960 | |
| epoch 002: 5900 / 8862 loss=4.551, nll_loss=2.940, ppl=7.67, wps=52284, ups=3, wpb=15171.612, bsz=559.873, num_updates=14757, lr=0.000260316, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4403, train_wall=3963 | |
| epoch 002: 5910 / 8862 loss=4.551, nll_loss=2.940, ppl=7.67, wps=52285, ups=3, wpb=15171.942, bsz=559.928, num_updates=14767, lr=0.000260228, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4406, train_wall=3965 | |
| epoch 002: 5920 / 8862 loss=4.551, nll_loss=2.939, ppl=7.67, wps=52285, ups=3, wpb=15172.255, bsz=559.884, num_updates=14777, lr=0.00026014, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4409, train_wall=3968 | |
| epoch 002: 5930 / 8862 loss=4.551, nll_loss=2.939, ppl=7.67, wps=52285, ups=3, wpb=15172.043, bsz=559.821, num_updates=14787, lr=0.000260052, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4412, train_wall=3971 | |
| epoch 002: 5940 / 8862 loss=4.551, nll_loss=2.939, ppl=7.67, wps=52285, ups=3, wpb=15171.909, bsz=559.811, num_updates=14797, lr=0.000259964, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4415, train_wall=3973 | |
| epoch 002: 5950 / 8862 loss=4.550, nll_loss=2.939, ppl=7.67, wps=52284, ups=3, wpb=15171.714, bsz=559.809, num_updates=14807, lr=0.000259876, gnorm=0.578, clip=0.000, oom=0.000, loss_scale=8.000, wall=4418, train_wall=3976 | |
| epoch 002: 5960 / 8862 loss=4.550, nll_loss=2.939, ppl=7.67, wps=52284, ups=3, wpb=15171.498, bsz=559.788, num_updates=14817, lr=0.000259788, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4421, train_wall=3979 | |
| epoch 002: 5970 / 8862 loss=4.550, nll_loss=2.938, ppl=7.66, wps=52285, ups=3, wpb=15171.657, bsz=559.979, num_updates=14827, lr=0.000259701, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4424, train_wall=3981 | |
| epoch 002: 5980 / 8862 loss=4.550, nll_loss=2.938, ppl=7.66, wps=52285, ups=3, wpb=15171.592, bsz=559.961, num_updates=14837, lr=0.000259613, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4426, train_wall=3984 | |
| epoch 002: 5990 / 8862 loss=4.550, nll_loss=2.938, ppl=7.66, wps=52285, ups=3, wpb=15171.634, bsz=559.893, num_updates=14847, lr=0.000259526, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4429, train_wall=3987 | |
| epoch 002: 6000 / 8862 loss=4.550, nll_loss=2.938, ppl=7.66, wps=52284, ups=3, wpb=15171.123, bsz=559.852, num_updates=14857, lr=0.000259439, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4432, train_wall=3989 | |
| epoch 002: 6010 / 8862 loss=4.549, nll_loss=2.938, ppl=7.66, wps=52285, ups=3, wpb=15171.684, bsz=559.864, num_updates=14867, lr=0.000259351, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4435, train_wall=3992 | |
| epoch 002: 6020 / 8862 loss=4.549, nll_loss=2.938, ppl=7.66, wps=52286, ups=3, wpb=15171.973, bsz=559.864, num_updates=14877, lr=0.000259264, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4438, train_wall=3995 | |
| epoch 002: 6030 / 8862 loss=4.549, nll_loss=2.937, ppl=7.66, wps=52287, ups=3, wpb=15172.179, bsz=559.881, num_updates=14887, lr=0.000259177, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4441, train_wall=3997 | |
| epoch 002: 6040 / 8862 loss=4.549, nll_loss=2.937, ppl=7.66, wps=52286, ups=3, wpb=15171.955, bsz=559.970, num_updates=14897, lr=0.00025909, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4444, train_wall=4000 | |
| epoch 002: 6050 / 8862 loss=4.548, nll_loss=2.937, ppl=7.66, wps=52286, ups=3, wpb=15171.810, bsz=560.008, num_updates=14907, lr=0.000259003, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4447, train_wall=4003 | |
| epoch 002: 6060 / 8862 loss=4.548, nll_loss=2.936, ppl=7.66, wps=52288, ups=3, wpb=15172.622, bsz=560.057, num_updates=14917, lr=0.000258916, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4450, train_wall=4006 | |
| epoch 002: 6070 / 8862 loss=4.548, nll_loss=2.936, ppl=7.65, wps=52288, ups=3, wpb=15172.909, bsz=560.148, num_updates=14927, lr=0.000258829, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4453, train_wall=4008 | |
| epoch 002: 6080 / 8862 loss=4.548, nll_loss=2.936, ppl=7.65, wps=52289, ups=3, wpb=15173.076, bsz=560.093, num_updates=14937, lr=0.000258743, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4455, train_wall=4011 | |
| epoch 002: 6090 / 8862 loss=4.548, nll_loss=2.936, ppl=7.65, wps=52290, ups=3, wpb=15173.283, bsz=560.166, num_updates=14947, lr=0.000258656, gnorm=0.577, clip=0.000, oom=0.000, loss_scale=8.000, wall=4458, train_wall=4014 | |
| epoch 002: 6100 / 8862 loss=4.547, nll_loss=2.935, ppl=7.65, wps=52291, ups=3, wpb=15173.773, bsz=560.195, num_updates=14957, lr=0.00025857, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4461, train_wall=4016 | |
| epoch 002: 6110 / 8862 loss=4.547, nll_loss=2.935, ppl=7.65, wps=52289, ups=3, wpb=15173.210, bsz=560.316, num_updates=14967, lr=0.000258483, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4464, train_wall=4019 | |
| epoch 002: 6120 / 8862 loss=4.547, nll_loss=2.935, ppl=7.65, wps=52291, ups=3, wpb=15173.628, bsz=560.231, num_updates=14977, lr=0.000258397, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4467, train_wall=4022 | |
| epoch 002: 6130 / 8862 loss=4.547, nll_loss=2.935, ppl=7.65, wps=52291, ups=3, wpb=15173.779, bsz=560.108, num_updates=14987, lr=0.000258311, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4470, train_wall=4024 | |
| epoch 002: 6140 / 8862 loss=4.547, nll_loss=2.935, ppl=7.65, wps=52291, ups=3, wpb=15173.754, bsz=560.013, num_updates=14997, lr=0.000258225, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4473, train_wall=4027 | |
| epoch 002: 6150 / 8862 loss=4.547, nll_loss=2.935, ppl=7.65, wps=52290, ups=3, wpb=15173.203, bsz=559.918, num_updates=15007, lr=0.000258139, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4476, train_wall=4030 | |
| epoch 002: 6160 / 8862 loss=4.546, nll_loss=2.935, ppl=7.65, wps=52290, ups=3, wpb=15173.488, bsz=560.074, num_updates=15017, lr=0.000258053, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4479, train_wall=4032 | |
| epoch 002: 6170 / 8862 loss=4.546, nll_loss=2.935, ppl=7.65, wps=52292, ups=3, wpb=15173.857, bsz=559.931, num_updates=15027, lr=0.000257967, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4482, train_wall=4035 | |
| epoch 002: 6180 / 8862 loss=4.546, nll_loss=2.934, ppl=7.64, wps=52288, ups=3, wpb=15173.023, bsz=560.252, num_updates=15037, lr=0.000257881, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4484, train_wall=4038 | |
| epoch 002: 6190 / 8862 loss=4.546, nll_loss=2.934, ppl=7.64, wps=52288, ups=3, wpb=15173.373, bsz=560.313, num_updates=15047, lr=0.000257795, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4487, train_wall=4040 | |
| epoch 002: 6200 / 8862 loss=4.546, nll_loss=2.934, ppl=7.64, wps=52286, ups=3, wpb=15172.834, bsz=560.254, num_updates=15057, lr=0.00025771, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4490, train_wall=4043 | |
| epoch 002: 6210 / 8862 loss=4.546, nll_loss=2.934, ppl=7.64, wps=52288, ups=3, wpb=15173.215, bsz=560.218, num_updates=15067, lr=0.000257624, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4493, train_wall=4046 | |
| epoch 002: 6220 / 8862 loss=4.546, nll_loss=2.934, ppl=7.64, wps=52289, ups=3, wpb=15173.420, bsz=560.250, num_updates=15077, lr=0.000257539, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4496, train_wall=4048 | |
| epoch 002: 6230 / 8862 loss=4.546, nll_loss=2.934, ppl=7.64, wps=52288, ups=3, wpb=15173.170, bsz=560.345, num_updates=15087, lr=0.000257453, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4499, train_wall=4051 | |
| epoch 002: 6240 / 8862 loss=4.545, nll_loss=2.934, ppl=7.64, wps=52286, ups=3, wpb=15172.722, bsz=560.258, num_updates=15097, lr=0.000257368, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4502, train_wall=4054 | |
| epoch 002: 6250 / 8862 loss=4.545, nll_loss=2.933, ppl=7.64, wps=52286, ups=3, wpb=15172.698, bsz=560.251, num_updates=15107, lr=0.000257283, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4505, train_wall=4057 | |
| epoch 002: 6260 / 8862 loss=4.545, nll_loss=2.933, ppl=7.64, wps=52285, ups=3, wpb=15172.032, bsz=560.118, num_updates=15117, lr=0.000257198, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4508, train_wall=4059 | |
| epoch 002: 6270 / 8862 loss=4.545, nll_loss=2.933, ppl=7.64, wps=52286, ups=3, wpb=15172.225, bsz=560.080, num_updates=15127, lr=0.000257113, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4511, train_wall=4062 | |
| epoch 002: 6280 / 8862 loss=4.545, nll_loss=2.933, ppl=7.63, wps=52286, ups=3, wpb=15172.085, bsz=560.088, num_updates=15137, lr=0.000257028, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4513, train_wall=4065 | |
| epoch 002: 6290 / 8862 loss=4.544, nll_loss=2.932, ppl=7.63, wps=52287, ups=3, wpb=15172.484, bsz=560.156, num_updates=15147, lr=0.000256943, gnorm=0.576, clip=0.000, oom=0.000, loss_scale=8.000, wall=4516, train_wall=4067 | |
| epoch 002: 6300 / 8862 loss=4.544, nll_loss=2.932, ppl=7.63, wps=52285, ups=3, wpb=15172.079, bsz=560.180, num_updates=15157, lr=0.000256858, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4519, train_wall=4070 | |
| epoch 002: 6310 / 8862 loss=4.544, nll_loss=2.932, ppl=7.63, wps=52285, ups=3, wpb=15172.007, bsz=560.113, num_updates=15167, lr=0.000256773, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4522, train_wall=4073 | |
| epoch 002: 6320 / 8862 loss=4.544, nll_loss=2.932, ppl=7.63, wps=52284, ups=3, wpb=15171.211, bsz=560.028, num_updates=15177, lr=0.000256689, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4525, train_wall=4075 | |
| epoch 002: 6330 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52283, ups=3, wpb=15171.021, bsz=560.058, num_updates=15187, lr=0.000256604, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4528, train_wall=4078 | |
| epoch 002: 6340 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52283, ups=3, wpb=15171.149, bsz=560.105, num_updates=15197, lr=0.00025652, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4531, train_wall=4081 | |
| epoch 002: 6350 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52285, ups=3, wpb=15171.769, bsz=560.023, num_updates=15207, lr=0.000256436, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4534, train_wall=4083 | |
| epoch 002: 6360 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52284, ups=3, wpb=15171.648, bsz=560.026, num_updates=15217, lr=0.000256351, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4537, train_wall=4086 | |
| epoch 002: 6370 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52284, ups=3, wpb=15171.349, bsz=559.917, num_updates=15227, lr=0.000256267, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4540, train_wall=4089 | |
| epoch 002: 6380 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52284, ups=3, wpb=15171.258, bsz=559.794, num_updates=15237, lr=0.000256183, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4542, train_wall=4091 | |
| epoch 002: 6390 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52283, ups=3, wpb=15171.028, bsz=559.830, num_updates=15247, lr=0.000256099, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4545, train_wall=4094 | |
| epoch 002: 6400 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52282, ups=3, wpb=15170.764, bsz=559.962, num_updates=15257, lr=0.000256015, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4548, train_wall=4097 | |
| epoch 002: 6410 / 8862 loss=4.543, nll_loss=2.931, ppl=7.63, wps=52282, ups=3, wpb=15170.715, bsz=559.936, num_updates=15267, lr=0.000255931, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4551, train_wall=4099 | |
| epoch 002: 6420 / 8862 loss=4.543, nll_loss=2.931, ppl=7.62, wps=52283, ups=3, wpb=15170.850, bsz=559.973, num_updates=15277, lr=0.000255847, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4554, train_wall=4102 | |
| epoch 002: 6430 / 8862 loss=4.542, nll_loss=2.930, ppl=7.62, wps=52282, ups=3, wpb=15170.482, bsz=559.978, num_updates=15287, lr=0.000255764, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4557, train_wall=4105 | |
| epoch 002: 6440 / 8862 loss=4.542, nll_loss=2.930, ppl=7.62, wps=52283, ups=3, wpb=15171.001, bsz=559.968, num_updates=15297, lr=0.00025568, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4560, train_wall=4107 | |
| epoch 002: 6450 / 8862 loss=4.542, nll_loss=2.930, ppl=7.62, wps=52283, ups=3, wpb=15170.922, bsz=559.934, num_updates=15307, lr=0.000255597, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4563, train_wall=4110 | |
| epoch 002: 6460 / 8862 loss=4.542, nll_loss=2.930, ppl=7.62, wps=52284, ups=3, wpb=15171.427, bsz=559.927, num_updates=15317, lr=0.000255513, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4566, train_wall=4113 | |
| epoch 002: 6470 / 8862 loss=4.542, nll_loss=2.929, ppl=7.62, wps=52283, ups=3, wpb=15171.118, bsz=559.967, num_updates=15327, lr=0.00025543, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4569, train_wall=4115 | |
| epoch 002: 6480 / 8862 loss=4.541, nll_loss=2.929, ppl=7.62, wps=52284, ups=3, wpb=15171.260, bsz=559.917, num_updates=15337, lr=0.000255346, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4571, train_wall=4118 | |
| epoch 002: 6490 / 8862 loss=4.541, nll_loss=2.929, ppl=7.62, wps=52284, ups=3, wpb=15171.230, bsz=559.846, num_updates=15347, lr=0.000255263, gnorm=0.575, clip=0.000, oom=0.000, loss_scale=8.000, wall=4574, train_wall=4121 | |
| epoch 002: 6500 / 8862 loss=4.541, nll_loss=2.929, ppl=7.61, wps=52284, ups=3, wpb=15171.085, bsz=559.862, num_updates=15357, lr=0.00025518, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4577, train_wall=4123 | |
| epoch 002: 6510 / 8862 loss=4.541, nll_loss=2.929, ppl=7.61, wps=52283, ups=3, wpb=15171.073, bsz=559.882, num_updates=15367, lr=0.000255097, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4580, train_wall=4126 | |
| epoch 002: 6520 / 8862 loss=4.541, nll_loss=2.928, ppl=7.61, wps=52283, ups=3, wpb=15171.037, bsz=559.856, num_updates=15377, lr=0.000255014, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4583, train_wall=4129 | |
| epoch 002: 6530 / 8862 loss=4.541, nll_loss=2.928, ppl=7.61, wps=52282, ups=3, wpb=15170.664, bsz=559.783, num_updates=15387, lr=0.000254931, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4586, train_wall=4132 | |
| epoch 002: 6540 / 8862 loss=4.540, nll_loss=2.928, ppl=7.61, wps=52282, ups=3, wpb=15170.773, bsz=559.961, num_updates=15397, lr=0.000254848, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4589, train_wall=4134 | |
| epoch 002: 6550 / 8862 loss=4.540, nll_loss=2.928, ppl=7.61, wps=52283, ups=3, wpb=15171.028, bsz=559.957, num_updates=15407, lr=0.000254766, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4592, train_wall=4137 | |
| epoch 002: 6560 / 8862 loss=4.540, nll_loss=2.928, ppl=7.61, wps=52283, ups=3, wpb=15171.082, bsz=559.924, num_updates=15417, lr=0.000254683, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4595, train_wall=4140 | |
| epoch 002: 6570 / 8862 loss=4.540, nll_loss=2.928, ppl=7.61, wps=52283, ups=3, wpb=15171.003, bsz=559.940, num_updates=15427, lr=0.000254601, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4598, train_wall=4142 | |
| epoch 002: 6580 / 8862 loss=4.540, nll_loss=2.927, ppl=7.61, wps=52284, ups=3, wpb=15171.061, bsz=559.844, num_updates=15437, lr=0.000254518, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4600, train_wall=4145 | |
| epoch 002: 6590 / 8862 loss=4.540, nll_loss=2.927, ppl=7.61, wps=52283, ups=3, wpb=15170.886, bsz=559.848, num_updates=15447, lr=0.000254436, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4603, train_wall=4148 | |
| epoch 002: 6600 / 8862 loss=4.539, nll_loss=2.927, ppl=7.61, wps=52283, ups=3, wpb=15170.696, bsz=559.858, num_updates=15457, lr=0.000254353, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4606, train_wall=4150 | |
| epoch 002: 6610 / 8862 loss=4.539, nll_loss=2.927, ppl=7.61, wps=52282, ups=3, wpb=15170.674, bsz=559.714, num_updates=15467, lr=0.000254271, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4609, train_wall=4153 | |
| epoch 002: 6620 / 8862 loss=4.539, nll_loss=2.927, ppl=7.60, wps=52284, ups=3, wpb=15171.099, bsz=559.697, num_updates=15477, lr=0.000254189, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4612, train_wall=4156 | |
| epoch 002: 6630 / 8862 loss=4.539, nll_loss=2.927, ppl=7.60, wps=52283, ups=3, wpb=15171.063, bsz=559.860, num_updates=15487, lr=0.000254107, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4615, train_wall=4158 | |
| epoch 002: 6640 / 8862 loss=4.539, nll_loss=2.926, ppl=7.60, wps=52284, ups=3, wpb=15171.424, bsz=559.866, num_updates=15497, lr=0.000254025, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4618, train_wall=4161 | |
| epoch 002: 6650 / 8862 loss=4.539, nll_loss=2.926, ppl=7.60, wps=52284, ups=3, wpb=15171.230, bsz=559.718, num_updates=15507, lr=0.000253943, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4621, train_wall=4164 | |
| epoch 002: 6660 / 8862 loss=4.539, nll_loss=2.926, ppl=7.60, wps=52284, ups=3, wpb=15171.400, bsz=559.641, num_updates=15517, lr=0.000253861, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4624, train_wall=4166 | |
| epoch 002: 6670 / 8862 loss=4.539, nll_loss=2.926, ppl=7.60, wps=52284, ups=3, wpb=15171.045, bsz=559.625, num_updates=15527, lr=0.000253779, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4627, train_wall=4169 | |
| epoch 002: 6680 / 8862 loss=4.538, nll_loss=2.926, ppl=7.60, wps=52285, ups=3, wpb=15171.199, bsz=559.558, num_updates=15537, lr=0.000253698, gnorm=0.574, clip=0.000, oom=0.000, loss_scale=8.000, wall=4629, train_wall=4172 | |
| epoch 002: 6690 / 8862 loss=4.538, nll_loss=2.926, ppl=7.60, wps=52285, ups=3, wpb=15171.149, bsz=559.419, num_updates=15547, lr=0.000253616, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4632, train_wall=4174 | |
| epoch 002: 6700 / 8862 loss=4.538, nll_loss=2.926, ppl=7.60, wps=52286, ups=3, wpb=15171.178, bsz=559.392, num_updates=15557, lr=0.000253535, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4635, train_wall=4177 | |
| epoch 002: 6710 / 8862 loss=4.538, nll_loss=2.926, ppl=7.60, wps=52285, ups=3, wpb=15171.040, bsz=559.399, num_updates=15567, lr=0.000253453, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4638, train_wall=4180 | |
| epoch 002: 6720 / 8862 loss=4.538, nll_loss=2.925, ppl=7.60, wps=52284, ups=3, wpb=15170.671, bsz=559.340, num_updates=15577, lr=0.000253372, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4641, train_wall=4182 | |
| epoch 002: 6730 / 8862 loss=4.538, nll_loss=2.925, ppl=7.60, wps=52284, ups=3, wpb=15170.587, bsz=559.338, num_updates=15587, lr=0.00025329, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4644, train_wall=4185 | |
| epoch 002: 6740 / 8862 loss=4.537, nll_loss=2.925, ppl=7.59, wps=52282, ups=3, wpb=15170.275, bsz=559.440, num_updates=15597, lr=0.000253209, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4647, train_wall=4188 | |
| epoch 002: 6750 / 8862 loss=4.537, nll_loss=2.925, ppl=7.59, wps=52283, ups=3, wpb=15170.076, bsz=559.337, num_updates=15607, lr=0.000253128, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4650, train_wall=4190 | |
| epoch 002: 6760 / 8862 loss=4.537, nll_loss=2.925, ppl=7.59, wps=52283, ups=3, wpb=15170.427, bsz=559.246, num_updates=15617, lr=0.000253047, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4653, train_wall=4193 | |
| epoch 002: 6770 / 8862 loss=4.537, nll_loss=2.924, ppl=7.59, wps=52284, ups=3, wpb=15170.499, bsz=559.326, num_updates=15627, lr=0.000252966, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4656, train_wall=4196 | |
| epoch 002: 6780 / 8862 loss=4.537, nll_loss=2.924, ppl=7.59, wps=52283, ups=3, wpb=15170.258, bsz=559.255, num_updates=15637, lr=0.000252885, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4658, train_wall=4198 | |
| epoch 002: 6790 / 8862 loss=4.536, nll_loss=2.924, ppl=7.59, wps=52284, ups=3, wpb=15170.300, bsz=559.341, num_updates=15647, lr=0.000252804, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4661, train_wall=4201 | |
| epoch 002: 6800 / 8862 loss=4.536, nll_loss=2.924, ppl=7.59, wps=52283, ups=3, wpb=15170.267, bsz=559.311, num_updates=15657, lr=0.000252724, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4664, train_wall=4204 | |
| epoch 002: 6810 / 8862 loss=4.536, nll_loss=2.924, ppl=7.59, wps=52284, ups=3, wpb=15170.576, bsz=559.307, num_updates=15667, lr=0.000252643, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4667, train_wall=4207 | |
| epoch 002: 6820 / 8862 loss=4.536, nll_loss=2.923, ppl=7.59, wps=52286, ups=3, wpb=15171.198, bsz=559.450, num_updates=15677, lr=0.000252562, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4670, train_wall=4209 | |
| epoch 002: 6830 / 8862 loss=4.536, nll_loss=2.923, ppl=7.58, wps=52285, ups=3, wpb=15171.008, bsz=559.411, num_updates=15687, lr=0.000252482, gnorm=0.573, clip=0.000, oom=0.000, loss_scale=8.000, wall=4673, train_wall=4212 | |
| epoch 002: 6840 / 8862 loss=4.535, nll_loss=2.923, ppl=7.58, wps=52284, ups=3, wpb=15170.700, bsz=559.476, num_updates=15697, lr=0.000252401, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4676, train_wall=4215 | |
| epoch 002: 6850 / 8862 loss=4.535, nll_loss=2.922, ppl=7.58, wps=52285, ups=3, wpb=15171.153, bsz=559.526, num_updates=15707, lr=0.000252321, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4679, train_wall=4217 | |
| epoch 002: 6860 / 8862 loss=4.535, nll_loss=2.922, ppl=7.58, wps=52283, ups=3, wpb=15170.946, bsz=559.761, num_updates=15717, lr=0.000252241, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4682, train_wall=4220 | |
| epoch 002: 6870 / 8862 loss=4.535, nll_loss=2.922, ppl=7.58, wps=52283, ups=3, wpb=15170.932, bsz=559.842, num_updates=15727, lr=0.00025216, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4685, train_wall=4223 | |
| epoch 002: 6880 / 8862 loss=4.534, nll_loss=2.922, ppl=7.58, wps=52283, ups=3, wpb=15171.345, bsz=560.021, num_updates=15737, lr=0.00025208, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4688, train_wall=4225 | |
| epoch 002: 6890 / 8862 loss=4.534, nll_loss=2.921, ppl=7.58, wps=52284, ups=3, wpb=15171.617, bsz=560.014, num_updates=15747, lr=0.000252, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4690, train_wall=4228 | |
| epoch 002: 6900 / 8862 loss=4.534, nll_loss=2.921, ppl=7.57, wps=52285, ups=3, wpb=15171.894, bsz=560.028, num_updates=15757, lr=0.00025192, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4693, train_wall=4231 | |
| epoch 002: 6910 / 8862 loss=4.534, nll_loss=2.921, ppl=7.57, wps=52286, ups=3, wpb=15172.023, bsz=560.052, num_updates=15767, lr=0.00025184, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4696, train_wall=4233 | |
| epoch 002: 6920 / 8862 loss=4.534, nll_loss=2.921, ppl=7.57, wps=52286, ups=3, wpb=15171.968, bsz=560.105, num_updates=15777, lr=0.000251761, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4699, train_wall=4236 | |
| epoch 002: 6930 / 8862 loss=4.534, nll_loss=2.921, ppl=7.57, wps=52287, ups=3, wpb=15172.069, bsz=560.012, num_updates=15787, lr=0.000251681, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4702, train_wall=4239 | |
| epoch 002: 6940 / 8862 loss=4.533, nll_loss=2.921, ppl=7.57, wps=52287, ups=3, wpb=15172.020, bsz=559.945, num_updates=15797, lr=0.000251601, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4705, train_wall=4241 | |
| epoch 002: 6950 / 8862 loss=4.533, nll_loss=2.920, ppl=7.57, wps=52288, ups=3, wpb=15172.310, bsz=560.077, num_updates=15807, lr=0.000251522, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4708, train_wall=4244 | |
| epoch 002: 6960 / 8862 loss=4.533, nll_loss=2.920, ppl=7.57, wps=52287, ups=3, wpb=15171.980, bsz=560.020, num_updates=15817, lr=0.000251442, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4711, train_wall=4247 | |
| epoch 002: 6970 / 8862 loss=4.533, nll_loss=2.920, ppl=7.57, wps=52287, ups=3, wpb=15171.840, bsz=559.940, num_updates=15827, lr=0.000251363, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4714, train_wall=4249 | |
| epoch 002: 6980 / 8862 loss=4.533, nll_loss=2.920, ppl=7.57, wps=52287, ups=3, wpb=15172.040, bsz=559.950, num_updates=15837, lr=0.000251283, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4717, train_wall=4252 | |
| epoch 002: 6990 / 8862 loss=4.532, nll_loss=2.920, ppl=7.57, wps=52286, ups=3, wpb=15171.650, bsz=559.973, num_updates=15847, lr=0.000251204, gnorm=0.572, clip=0.000, oom=0.000, loss_scale=8.000, wall=4719, train_wall=4255 | |
| epoch 002: 7000 / 8862 loss=4.532, nll_loss=2.919, ppl=7.57, wps=52287, ups=3, wpb=15171.916, bsz=559.966, num_updates=15857, lr=0.000251125, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4722, train_wall=4258 | |
| epoch 002: 7010 / 8862 loss=4.532, nll_loss=2.919, ppl=7.56, wps=52286, ups=3, wpb=15171.756, bsz=559.985, num_updates=15867, lr=0.000251046, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4725, train_wall=4260 | |
| epoch 002: 7020 / 8862 loss=4.532, nll_loss=2.919, ppl=7.56, wps=52286, ups=3, wpb=15171.745, bsz=560.059, num_updates=15877, lr=0.000250967, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4728, train_wall=4263 | |
| epoch 002: 7030 / 8862 loss=4.532, nll_loss=2.919, ppl=7.56, wps=52286, ups=3, wpb=15171.926, bsz=560.180, num_updates=15887, lr=0.000250888, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4731, train_wall=4266 | |
| epoch 002: 7040 / 8862 loss=4.532, nll_loss=2.919, ppl=7.56, wps=52286, ups=3, wpb=15172.038, bsz=560.182, num_updates=15897, lr=0.000250809, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4734, train_wall=4268 | |
| epoch 002: 7050 / 8862 loss=4.532, nll_loss=2.919, ppl=7.56, wps=52286, ups=3, wpb=15172.128, bsz=560.162, num_updates=15907, lr=0.00025073, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4737, train_wall=4271 | |
| epoch 002: 7060 / 8862 loss=4.531, nll_loss=2.918, ppl=7.56, wps=52287, ups=3, wpb=15172.552, bsz=560.189, num_updates=15917, lr=0.000250651, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4740, train_wall=4274 | |
| epoch 002: 7070 / 8862 loss=4.531, nll_loss=2.918, ppl=7.56, wps=52287, ups=3, wpb=15172.670, bsz=560.127, num_updates=15927, lr=0.000250572, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4743, train_wall=4276 | |
| epoch 002: 7080 / 8862 loss=4.531, nll_loss=2.918, ppl=7.56, wps=52286, ups=3, wpb=15172.185, bsz=560.112, num_updates=15937, lr=0.000250494, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4746, train_wall=4279 | |
| epoch 002: 7090 / 8862 loss=4.531, nll_loss=2.918, ppl=7.56, wps=52284, ups=3, wpb=15171.890, bsz=560.217, num_updates=15947, lr=0.000250415, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4749, train_wall=4282 | |
| epoch 002: 7100 / 8862 loss=4.531, nll_loss=2.918, ppl=7.56, wps=52285, ups=3, wpb=15172.038, bsz=560.122, num_updates=15957, lr=0.000250337, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4751, train_wall=4284 | |
| epoch 002: 7110 / 8862 loss=4.530, nll_loss=2.917, ppl=7.55, wps=52285, ups=3, wpb=15171.809, bsz=560.150, num_updates=15967, lr=0.000250258, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4754, train_wall=4287 | |
| epoch 002: 7120 / 8862 loss=4.530, nll_loss=2.917, ppl=7.55, wps=52285, ups=3, wpb=15171.709, bsz=560.022, num_updates=15977, lr=0.00025018, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4757, train_wall=4290 | |
| epoch 002: 7130 / 8862 loss=4.530, nll_loss=2.917, ppl=7.55, wps=52285, ups=3, wpb=15171.843, bsz=559.978, num_updates=15987, lr=0.000250102, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4760, train_wall=4292 | |
| epoch 002: 7140 / 8862 loss=4.530, nll_loss=2.917, ppl=7.55, wps=52285, ups=3, wpb=15172.055, bsz=560.030, num_updates=15997, lr=0.000250023, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4763, train_wall=4295 | |
| epoch 002: 7150 / 8862 loss=4.530, nll_loss=2.916, ppl=7.55, wps=52286, ups=3, wpb=15171.993, bsz=560.036, num_updates=16007, lr=0.000249945, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4766, train_wall=4298 | |
| epoch 002: 7160 / 8862 loss=4.529, nll_loss=2.916, ppl=7.55, wps=52285, ups=3, wpb=15172.195, bsz=560.237, num_updates=16017, lr=0.000249867, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4769, train_wall=4301 | |
| epoch 002: 7170 / 8862 loss=4.529, nll_loss=2.916, ppl=7.55, wps=52284, ups=3, wpb=15172.047, bsz=560.312, num_updates=16027, lr=0.000249789, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4772, train_wall=4303 | |
| epoch 002: 7180 / 8862 loss=4.529, nll_loss=2.916, ppl=7.55, wps=52285, ups=3, wpb=15172.306, bsz=560.270, num_updates=16037, lr=0.000249711, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4775, train_wall=4306 | |
| epoch 002: 7190 / 8862 loss=4.529, nll_loss=2.916, ppl=7.54, wps=52287, ups=3, wpb=15172.663, bsz=560.239, num_updates=16047, lr=0.000249634, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4778, train_wall=4309 | |
| epoch 002: 7200 / 8862 loss=4.529, nll_loss=2.915, ppl=7.54, wps=52286, ups=3, wpb=15172.626, bsz=560.346, num_updates=16057, lr=0.000249556, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4780, train_wall=4311 | |
| epoch 002: 7210 / 8862 loss=4.528, nll_loss=2.915, ppl=7.54, wps=52286, ups=3, wpb=15172.392, bsz=560.286, num_updates=16067, lr=0.000249478, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4783, train_wall=4314 | |
| epoch 002: 7220 / 8862 loss=4.528, nll_loss=2.915, ppl=7.54, wps=52286, ups=3, wpb=15172.296, bsz=560.321, num_updates=16077, lr=0.000249401, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4786, train_wall=4317 | |
| epoch 002: 7230 / 8862 loss=4.528, nll_loss=2.915, ppl=7.54, wps=52285, ups=3, wpb=15171.925, bsz=560.297, num_updates=16087, lr=0.000249323, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4789, train_wall=4319 | |
| epoch 002: 7240 / 8862 loss=4.528, nll_loss=2.915, ppl=7.54, wps=52285, ups=3, wpb=15172.085, bsz=560.313, num_updates=16097, lr=0.000249246, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4792, train_wall=4322 | |
| epoch 002: 7250 / 8862 loss=4.528, nll_loss=2.915, ppl=7.54, wps=52285, ups=3, wpb=15171.956, bsz=560.316, num_updates=16107, lr=0.000249168, gnorm=0.571, clip=0.000, oom=0.000, loss_scale=8.000, wall=4795, train_wall=4325 | |
| epoch 002: 7260 / 8862 loss=4.528, nll_loss=2.914, ppl=7.54, wps=52286, ups=3, wpb=15172.130, bsz=560.245, num_updates=16117, lr=0.000249091, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4798, train_wall=4327 | |
| epoch 002: 7270 / 8862 loss=4.528, nll_loss=2.914, ppl=7.54, wps=52286, ups=3, wpb=15172.041, bsz=560.122, num_updates=16127, lr=0.000249014, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4801, train_wall=4330 | |
| epoch 002: 7280 / 8862 loss=4.528, nll_loss=2.915, ppl=7.54, wps=52283, ups=3, wpb=15171.237, bsz=559.989, num_updates=16137, lr=0.000248937, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4804, train_wall=4333 | |
| epoch 002: 7290 / 8862 loss=4.528, nll_loss=2.914, ppl=7.54, wps=52284, ups=3, wpb=15171.441, bsz=559.936, num_updates=16147, lr=0.000248859, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4807, train_wall=4335 | |
| epoch 002: 7300 / 8862 loss=4.528, nll_loss=2.914, ppl=7.54, wps=52283, ups=3, wpb=15170.952, bsz=559.928, num_updates=16157, lr=0.000248782, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4809, train_wall=4338 | |
| epoch 002: 7310 / 8862 loss=4.527, nll_loss=2.914, ppl=7.54, wps=52284, ups=3, wpb=15171.347, bsz=559.873, num_updates=16167, lr=0.000248705, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4812, train_wall=4341 | |
| epoch 002: 7320 / 8862 loss=4.527, nll_loss=2.914, ppl=7.54, wps=52281, ups=3, wpb=15170.939, bsz=560.128, num_updates=16177, lr=0.000248629, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4815, train_wall=4343 | |
| epoch 002: 7330 / 8862 loss=4.527, nll_loss=2.914, ppl=7.54, wps=52281, ups=3, wpb=15170.783, bsz=560.182, num_updates=16187, lr=0.000248552, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4818, train_wall=4346 | |
| epoch 002: 7340 / 8862 loss=4.527, nll_loss=2.914, ppl=7.54, wps=52282, ups=3, wpb=15170.996, bsz=560.184, num_updates=16197, lr=0.000248475, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4821, train_wall=4349 | |
| epoch 002: 7350 / 8862 loss=4.527, nll_loss=2.914, ppl=7.53, wps=52283, ups=3, wpb=15171.122, bsz=560.234, num_updates=16207, lr=0.000248398, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4824, train_wall=4351 | |
| epoch 002: 7360 / 8862 loss=4.527, nll_loss=2.913, ppl=7.53, wps=52284, ups=3, wpb=15171.229, bsz=560.171, num_updates=16217, lr=0.000248322, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4827, train_wall=4354 | |
| epoch 002: 7370 / 8862 loss=4.527, nll_loss=2.913, ppl=7.53, wps=52285, ups=3, wpb=15171.365, bsz=560.196, num_updates=16227, lr=0.000248245, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4830, train_wall=4357 | |
| epoch 002: 7380 / 8862 loss=4.526, nll_loss=2.913, ppl=7.53, wps=52286, ups=3, wpb=15171.415, bsz=560.250, num_updates=16237, lr=0.000248169, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4833, train_wall=4359 | |
| epoch 002: 7390 / 8862 loss=4.526, nll_loss=2.913, ppl=7.53, wps=52286, ups=3, wpb=15171.306, bsz=560.178, num_updates=16247, lr=0.000248092, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4835, train_wall=4362 | |
| epoch 002: 7400 / 8862 loss=4.526, nll_loss=2.913, ppl=7.53, wps=52287, ups=3, wpb=15171.386, bsz=560.119, num_updates=16257, lr=0.000248016, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4838, train_wall=4365 | |
| epoch 002: 7410 / 8862 loss=4.526, nll_loss=2.912, ppl=7.53, wps=52288, ups=3, wpb=15171.500, bsz=560.046, num_updates=16267, lr=0.00024794, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4841, train_wall=4367 | |
| epoch 002: 7420 / 8862 loss=4.526, nll_loss=2.912, ppl=7.53, wps=52290, ups=3, wpb=15171.843, bsz=560.110, num_updates=16277, lr=0.000247864, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4844, train_wall=4370 | |
| epoch 002: 7430 / 8862 loss=4.526, nll_loss=2.912, ppl=7.53, wps=52290, ups=3, wpb=15171.619, bsz=560.068, num_updates=16287, lr=0.000247788, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4847, train_wall=4373 | |
| epoch 002: 7440 / 8862 loss=4.525, nll_loss=2.912, ppl=7.53, wps=52291, ups=3, wpb=15171.793, bsz=560.004, num_updates=16297, lr=0.000247711, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4850, train_wall=4375 | |
| epoch 002: 7450 / 8862 loss=4.525, nll_loss=2.912, ppl=7.53, wps=52292, ups=3, wpb=15171.809, bsz=559.953, num_updates=16307, lr=0.000247636, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4853, train_wall=4378 | |
| epoch 002: 7460 / 8862 loss=4.525, nll_loss=2.912, ppl=7.52, wps=52293, ups=3, wpb=15172.123, bsz=559.886, num_updates=16317, lr=0.00024756, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4856, train_wall=4381 | |
| epoch 002: 7470 / 8862 loss=4.525, nll_loss=2.911, ppl=7.52, wps=52296, ups=3, wpb=15172.784, bsz=559.956, num_updates=16327, lr=0.000247484, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4858, train_wall=4383 | |
| epoch 002: 7480 / 8862 loss=4.524, nll_loss=2.911, ppl=7.52, wps=52298, ups=3, wpb=15173.231, bsz=559.951, num_updates=16337, lr=0.000247408, gnorm=0.570, clip=0.000, oom=0.000, loss_scale=8.000, wall=4861, train_wall=4386 | |
| epoch 002: 7490 / 8862 loss=4.524, nll_loss=2.910, ppl=7.52, wps=52300, ups=3, wpb=15173.856, bsz=560.032, num_updates=16347, lr=0.000247332, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4864, train_wall=4389 | |
| epoch 002: 7500 / 8862 loss=4.524, nll_loss=2.910, ppl=7.52, wps=52302, ups=3, wpb=15174.311, bsz=560.080, num_updates=16357, lr=0.000247257, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4867, train_wall=4391 | |
| epoch 002: 7510 / 8862 loss=4.523, nll_loss=2.910, ppl=7.52, wps=52302, ups=3, wpb=15174.363, bsz=560.060, num_updates=16367, lr=0.000247181, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4870, train_wall=4394 | |
| epoch 002: 7520 / 8862 loss=4.523, nll_loss=2.910, ppl=7.52, wps=52303, ups=3, wpb=15174.306, bsz=559.915, num_updates=16377, lr=0.000247106, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4873, train_wall=4397 | |
| epoch 002: 7530 / 8862 loss=4.523, nll_loss=2.910, ppl=7.52, wps=52303, ups=3, wpb=15173.850, bsz=559.792, num_updates=16387, lr=0.00024703, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4876, train_wall=4399 | |
| epoch 002: 7540 / 8862 loss=4.523, nll_loss=2.910, ppl=7.51, wps=52303, ups=3, wpb=15173.716, bsz=559.880, num_updates=16397, lr=0.000246955, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4879, train_wall=4402 | |
| epoch 002: 7550 / 8862 loss=4.523, nll_loss=2.909, ppl=7.51, wps=52304, ups=3, wpb=15173.895, bsz=559.759, num_updates=16407, lr=0.00024688, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4881, train_wall=4405 | |
| epoch 002: 7560 / 8862 loss=4.523, nll_loss=2.909, ppl=7.51, wps=52305, ups=3, wpb=15173.897, bsz=559.624, num_updates=16417, lr=0.000246805, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4884, train_wall=4407 | |
| epoch 002: 7570 / 8862 loss=4.523, nll_loss=2.909, ppl=7.51, wps=52307, ups=3, wpb=15174.141, bsz=559.613, num_updates=16427, lr=0.000246729, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4887, train_wall=4410 | |
| epoch 002: 7580 / 8862 loss=4.523, nll_loss=2.909, ppl=7.51, wps=52307, ups=3, wpb=15173.942, bsz=559.604, num_updates=16437, lr=0.000246654, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4890, train_wall=4413 | |
| epoch 002: 7590 / 8862 loss=4.522, nll_loss=2.909, ppl=7.51, wps=52307, ups=3, wpb=15174.195, bsz=559.616, num_updates=16447, lr=0.000246579, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4893, train_wall=4415 | |
| epoch 002: 7600 / 8862 loss=4.522, nll_loss=2.909, ppl=7.51, wps=52308, ups=3, wpb=15174.229, bsz=559.602, num_updates=16457, lr=0.000246504, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4896, train_wall=4418 | |
| epoch 002: 7610 / 8862 loss=4.522, nll_loss=2.909, ppl=7.51, wps=52306, ups=3, wpb=15173.563, bsz=559.602, num_updates=16467, lr=0.00024643, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4899, train_wall=4421 | |
| epoch 002: 7620 / 8862 loss=4.522, nll_loss=2.908, ppl=7.51, wps=52306, ups=3, wpb=15173.380, bsz=559.462, num_updates=16477, lr=0.000246355, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4902, train_wall=4423 | |
| epoch 002: 7630 / 8862 loss=4.522, nll_loss=2.908, ppl=7.51, wps=52309, ups=3, wpb=15173.795, bsz=559.338, num_updates=16487, lr=0.00024628, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4904, train_wall=4426 | |
| epoch 002: 7640 / 8862 loss=4.522, nll_loss=2.908, ppl=7.51, wps=52309, ups=3, wpb=15173.867, bsz=559.379, num_updates=16497, lr=0.000246205, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4907, train_wall=4429 | |
| epoch 002: 7650 / 8862 loss=4.521, nll_loss=2.908, ppl=7.50, wps=52310, ups=3, wpb=15173.929, bsz=559.400, num_updates=16507, lr=0.000246131, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=8.000, wall=4910, train_wall=4431 | |
| epoch 002: 7660 / 8862 loss=4.521, nll_loss=2.908, ppl=7.50, wps=52310, ups=3, wpb=15173.902, bsz=559.347, num_updates=16517, lr=0.000246056, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=16.000, wall=4913, train_wall=4434 | |
| epoch 002: 7670 / 8862 loss=4.521, nll_loss=2.907, ppl=7.50, wps=52310, ups=3, wpb=15173.782, bsz=559.337, num_updates=16527, lr=0.000245982, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=16.000, wall=4916, train_wall=4437 | |
| epoch 002: 7680 / 8862 loss=4.521, nll_loss=2.907, ppl=7.50, wps=52311, ups=3, wpb=15173.765, bsz=559.342, num_updates=16537, lr=0.000245907, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=16.000, wall=4919, train_wall=4439 | |
| epoch 002: 7690 / 8862 loss=4.521, nll_loss=2.907, ppl=7.50, wps=52312, ups=3, wpb=15173.886, bsz=559.243, num_updates=16547, lr=0.000245833, gnorm=0.569, clip=0.000, oom=0.000, loss_scale=16.000, wall=4922, train_wall=4442 | |
| epoch 002: 7700 / 8862 loss=4.521, nll_loss=2.907, ppl=7.50, wps=52313, ups=3, wpb=15173.966, bsz=559.227, num_updates=16557, lr=0.000245759, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4925, train_wall=4444 | |
| epoch 002: 7710 / 8862 loss=4.520, nll_loss=2.907, ppl=7.50, wps=52313, ups=3, wpb=15173.752, bsz=559.102, num_updates=16567, lr=0.000245685, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4928, train_wall=4447 | |
| epoch 002: 7720 / 8862 loss=4.520, nll_loss=2.906, ppl=7.50, wps=52315, ups=3, wpb=15174.126, bsz=559.015, num_updates=16577, lr=0.000245611, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4930, train_wall=4450 | |
| epoch 002: 7730 / 8862 loss=4.520, nll_loss=2.906, ppl=7.50, wps=52315, ups=3, wpb=15174.229, bsz=559.157, num_updates=16587, lr=0.000245537, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4933, train_wall=4452 | |
| epoch 002: 7740 / 8862 loss=4.520, nll_loss=2.906, ppl=7.50, wps=52315, ups=3, wpb=15173.882, bsz=559.098, num_updates=16597, lr=0.000245463, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4936, train_wall=4455 | |
| epoch 002: 7750 / 8862 loss=4.520, nll_loss=2.906, ppl=7.49, wps=52314, ups=3, wpb=15173.653, bsz=559.054, num_updates=16607, lr=0.000245389, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4939, train_wall=4458 | |
| epoch 002: 7760 / 8862 loss=4.520, nll_loss=2.906, ppl=7.49, wps=52316, ups=3, wpb=15173.917, bsz=558.956, num_updates=16617, lr=0.000245315, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4942, train_wall=4460 | |
| epoch 002: 7770 / 8862 loss=4.519, nll_loss=2.905, ppl=7.49, wps=52316, ups=3, wpb=15174.103, bsz=558.985, num_updates=16627, lr=0.000245241, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4945, train_wall=4463 | |
| epoch 002: 7780 / 8862 loss=4.519, nll_loss=2.905, ppl=7.49, wps=52318, ups=3, wpb=15174.423, bsz=558.931, num_updates=16637, lr=0.000245167, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4948, train_wall=4466 | |
| epoch 002: 7790 / 8862 loss=4.519, nll_loss=2.905, ppl=7.49, wps=52317, ups=3, wpb=15174.721, bsz=559.137, num_updates=16647, lr=0.000245094, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4951, train_wall=4469 | |
| epoch 002: 7800 / 8862 loss=4.519, nll_loss=2.905, ppl=7.49, wps=52317, ups=3, wpb=15174.736, bsz=559.071, num_updates=16657, lr=0.00024502, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4954, train_wall=4471 | |
| epoch 002: 7810 / 8862 loss=4.519, nll_loss=2.905, ppl=7.49, wps=52318, ups=3, wpb=15174.734, bsz=558.985, num_updates=16667, lr=0.000244947, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4956, train_wall=4474 | |
| epoch 002: 7820 / 8862 loss=4.519, nll_loss=2.905, ppl=7.49, wps=52317, ups=3, wpb=15174.502, bsz=559.050, num_updates=16677, lr=0.000244873, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=16.000, wall=4959, train_wall=4477 | |
| WARNING: overflow detected, setting loss scale to: 8.0 | |
| epoch 002: 7830 / 8862 loss=4.518, nll_loss=2.904, ppl=7.49, wps=52312, ups=3, wpb=15174.791, bsz=558.948, num_updates=16686, lr=0.000244807, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=8.000, wall=4962, train_wall=4479 | |
| epoch 002: 7840 / 8862 loss=4.518, nll_loss=2.904, ppl=7.49, wps=52312, ups=3, wpb=15174.634, bsz=558.969, num_updates=16696, lr=0.000244734, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=8.000, wall=4965, train_wall=4482 | |
| epoch 002: 7850 / 8862 loss=4.518, nll_loss=2.904, ppl=7.49, wps=52312, ups=3, wpb=15174.619, bsz=558.860, num_updates=16706, lr=0.00024466, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=8.000, wall=4968, train_wall=4485 | |
| epoch 002: 7860 / 8862 loss=4.518, nll_loss=2.904, ppl=7.49, wps=52312, ups=3, wpb=15174.240, bsz=558.689, num_updates=16716, lr=0.000244587, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=8.000, wall=4971, train_wall=4487 | |
| epoch 002: 7870 / 8862 loss=4.518, nll_loss=2.904, ppl=7.49, wps=52312, ups=3, wpb=15174.333, bsz=558.699, num_updates=16726, lr=0.000244514, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=8.000, wall=4974, train_wall=4490 | |
| epoch 002: 7880 / 8862 loss=4.518, nll_loss=2.904, ppl=7.49, wps=52312, ups=3, wpb=15174.354, bsz=558.743, num_updates=16736, lr=0.000244441, gnorm=0.568, clip=0.000, oom=0.000, loss_scale=8.000, wall=4977, train_wall=4493 | |
| epoch 002: 7890 / 8862 loss=4.518, nll_loss=2.904, ppl=7.49, wps=52312, ups=3, wpb=15174.187, bsz=558.791, num_updates=16746, lr=0.000244368, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=4980, train_wall=4495 | |
| epoch 002: 7900 / 8862 loss=4.518, nll_loss=2.904, ppl=7.48, wps=52311, ups=3, wpb=15174.117, bsz=558.826, num_updates=16756, lr=0.000244295, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=4982, train_wall=4498 | |
| epoch 002: 7910 / 8862 loss=4.518, nll_loss=2.904, ppl=7.48, wps=52311, ups=3, wpb=15173.858, bsz=558.787, num_updates=16766, lr=0.000244222, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=4985, train_wall=4501 | |
| epoch 002: 7920 / 8862 loss=4.517, nll_loss=2.903, ppl=7.48, wps=52310, ups=3, wpb=15173.801, bsz=558.819, num_updates=16776, lr=0.000244149, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=4988, train_wall=4503 | |
| epoch 002: 7930 / 8862 loss=4.517, nll_loss=2.903, ppl=7.48, wps=52309, ups=3, wpb=15173.438, bsz=558.823, num_updates=16786, lr=0.000244077, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=4991, train_wall=4506 | |
| epoch 002: 7940 / 8862 loss=4.517, nll_loss=2.903, ppl=7.48, wps=52309, ups=3, wpb=15173.437, bsz=558.826, num_updates=16796, lr=0.000244004, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=4994, train_wall=4509 | |
| epoch 002: 7950 / 8862 loss=4.517, nll_loss=2.903, ppl=7.48, wps=52311, ups=3, wpb=15173.795, bsz=558.853, num_updates=16806, lr=0.000243931, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=4997, train_wall=4511 | |
| epoch 002: 7960 / 8862 loss=4.517, nll_loss=2.903, ppl=7.48, wps=52312, ups=3, wpb=15174.172, bsz=558.972, num_updates=16816, lr=0.000243859, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5000, train_wall=4514 | |
| epoch 002: 7970 / 8862 loss=4.517, nll_loss=2.902, ppl=7.48, wps=52312, ups=3, wpb=15174.126, bsz=558.961, num_updates=16826, lr=0.000243786, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5003, train_wall=4517 | |
| epoch 002: 7980 / 8862 loss=4.516, nll_loss=2.902, ppl=7.48, wps=52311, ups=3, wpb=15173.907, bsz=558.958, num_updates=16836, lr=0.000243714, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5006, train_wall=4519 | |
| epoch 002: 7990 / 8862 loss=4.516, nll_loss=2.902, ppl=7.47, wps=52310, ups=3, wpb=15173.591, bsz=558.918, num_updates=16846, lr=0.000243642, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5009, train_wall=4522 | |
| epoch 002: 8000 / 8862 loss=4.516, nll_loss=2.902, ppl=7.47, wps=52311, ups=3, wpb=15173.734, bsz=558.830, num_updates=16856, lr=0.000243569, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5011, train_wall=4525 | |
| epoch 002: 8010 / 8862 loss=4.516, nll_loss=2.902, ppl=7.47, wps=52311, ups=3, wpb=15173.900, bsz=558.819, num_updates=16866, lr=0.000243497, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5014, train_wall=4527 | |
| epoch 002: 8020 / 8862 loss=4.516, nll_loss=2.901, ppl=7.47, wps=52311, ups=3, wpb=15173.988, bsz=558.852, num_updates=16876, lr=0.000243425, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5017, train_wall=4530 | |
| epoch 002: 8030 / 8862 loss=4.515, nll_loss=2.901, ppl=7.47, wps=52311, ups=3, wpb=15174.073, bsz=558.822, num_updates=16886, lr=0.000243353, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5020, train_wall=4533 | |
| epoch 002: 8040 / 8862 loss=4.515, nll_loss=2.901, ppl=7.47, wps=52308, ups=3, wpb=15173.477, bsz=559.080, num_updates=16896, lr=0.000243281, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5023, train_wall=4536 | |
| epoch 002: 8050 / 8862 loss=4.515, nll_loss=2.901, ppl=7.47, wps=52308, ups=3, wpb=15173.692, bsz=558.950, num_updates=16906, lr=0.000243209, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5026, train_wall=4538 | |
| epoch 002: 8060 / 8862 loss=4.515, nll_loss=2.901, ppl=7.47, wps=52309, ups=3, wpb=15173.826, bsz=558.981, num_updates=16916, lr=0.000243137, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5029, train_wall=4541 | |
| epoch 002: 8070 / 8862 loss=4.515, nll_loss=2.901, ppl=7.47, wps=52309, ups=3, wpb=15173.946, bsz=558.925, num_updates=16926, lr=0.000243065, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5032, train_wall=4544 | |
| epoch 002: 8080 / 8862 loss=4.515, nll_loss=2.900, ppl=7.47, wps=52308, ups=3, wpb=15173.840, bsz=559.022, num_updates=16936, lr=0.000242993, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5035, train_wall=4546 | |
| epoch 002: 8090 / 8862 loss=4.515, nll_loss=2.900, ppl=7.47, wps=52307, ups=3, wpb=15173.540, bsz=558.921, num_updates=16946, lr=0.000242922, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5038, train_wall=4549 | |
| epoch 002: 8100 / 8862 loss=4.515, nll_loss=2.900, ppl=7.47, wps=52308, ups=3, wpb=15173.588, bsz=558.814, num_updates=16956, lr=0.00024285, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5041, train_wall=4552 | |
| epoch 002: 8110 / 8862 loss=4.514, nll_loss=2.900, ppl=7.47, wps=52305, ups=3, wpb=15173.172, bsz=559.039, num_updates=16966, lr=0.000242779, gnorm=0.567, clip=0.000, oom=0.000, loss_scale=8.000, wall=5044, train_wall=4554 | |
| epoch 002: 8120 / 8862 loss=4.514, nll_loss=2.900, ppl=7.46, wps=52305, ups=3, wpb=15173.295, bsz=559.007, num_updates=16976, lr=0.000242707, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5046, train_wall=4557 | |
| epoch 002: 8130 / 8862 loss=4.514, nll_loss=2.900, ppl=7.46, wps=52305, ups=3, wpb=15173.345, bsz=558.957, num_updates=16986, lr=0.000242636, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5049, train_wall=4560 | |
| epoch 002: 8140 / 8862 loss=4.514, nll_loss=2.900, ppl=7.46, wps=52305, ups=3, wpb=15173.286, bsz=558.886, num_updates=16996, lr=0.000242564, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5052, train_wall=4562 | |
| epoch 002: 8150 / 8862 loss=4.514, nll_loss=2.900, ppl=7.46, wps=52307, ups=3, wpb=15173.410, bsz=558.785, num_updates=17006, lr=0.000242493, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5055, train_wall=4565 | |
| epoch 002: 8160 / 8862 loss=4.514, nll_loss=2.899, ppl=7.46, wps=52307, ups=3, wpb=15173.374, bsz=558.770, num_updates=17016, lr=0.000242422, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5058, train_wall=4568 | |
| epoch 002: 8170 / 8862 loss=4.513, nll_loss=2.899, ppl=7.46, wps=52307, ups=3, wpb=15173.611, bsz=558.786, num_updates=17026, lr=0.00024235, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5061, train_wall=4570 | |
| epoch 002: 8180 / 8862 loss=4.513, nll_loss=2.899, ppl=7.46, wps=52308, ups=3, wpb=15173.631, bsz=558.733, num_updates=17036, lr=0.000242279, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5064, train_wall=4573 | |
| epoch 002: 8190 / 8862 loss=4.513, nll_loss=2.899, ppl=7.46, wps=52310, ups=3, wpb=15173.997, bsz=558.768, num_updates=17046, lr=0.000242208, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5067, train_wall=4576 | |
| epoch 002: 8200 / 8862 loss=4.513, nll_loss=2.898, ppl=7.46, wps=52310, ups=3, wpb=15173.918, bsz=558.829, num_updates=17056, lr=0.000242137, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5070, train_wall=4578 | |
| epoch 002: 8210 / 8862 loss=4.513, nll_loss=2.898, ppl=7.46, wps=52311, ups=3, wpb=15174.069, bsz=558.780, num_updates=17066, lr=0.000242066, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5072, train_wall=4581 | |
| epoch 002: 8220 / 8862 loss=4.512, nll_loss=2.898, ppl=7.45, wps=52312, ups=3, wpb=15174.468, bsz=558.771, num_updates=17076, lr=0.000241995, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5075, train_wall=4584 | |
| epoch 002: 8230 / 8862 loss=4.512, nll_loss=2.898, ppl=7.45, wps=52312, ups=3, wpb=15174.292, bsz=558.767, num_updates=17086, lr=0.000241924, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5078, train_wall=4586 | |
| epoch 002: 8240 / 8862 loss=4.512, nll_loss=2.898, ppl=7.45, wps=52313, ups=3, wpb=15174.560, bsz=558.739, num_updates=17096, lr=0.000241854, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5081, train_wall=4589 | |
| epoch 002: 8250 / 8862 loss=4.512, nll_loss=2.897, ppl=7.45, wps=52315, ups=3, wpb=15174.975, bsz=558.718, num_updates=17106, lr=0.000241783, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5084, train_wall=4592 | |
| epoch 002: 8260 / 8862 loss=4.512, nll_loss=2.897, ppl=7.45, wps=52316, ups=3, wpb=15175.001, bsz=558.671, num_updates=17116, lr=0.000241712, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5087, train_wall=4594 | |
| epoch 002: 8270 / 8862 loss=4.511, nll_loss=2.897, ppl=7.45, wps=52316, ups=3, wpb=15174.948, bsz=558.714, num_updates=17126, lr=0.000241642, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5090, train_wall=4597 | |
| epoch 002: 8280 / 8862 loss=4.511, nll_loss=2.897, ppl=7.45, wps=52317, ups=3, wpb=15175.189, bsz=558.720, num_updates=17136, lr=0.000241571, gnorm=0.566, clip=0.000, oom=0.000, loss_scale=8.000, wall=5093, train_wall=4600 | |
| epoch 002: 8290 / 8862 loss=4.511, nll_loss=2.896, ppl=7.45, wps=52318, ups=3, wpb=15175.097, bsz=558.719, num_updates=17146, lr=0.000241501, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5095, train_wall=4602 | |
| epoch 002: 8300 / 8862 loss=4.511, nll_loss=2.896, ppl=7.45, wps=52318, ups=3, wpb=15174.932, bsz=558.646, num_updates=17156, lr=0.00024143, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5098, train_wall=4605 | |
| epoch 002: 8310 / 8862 loss=4.511, nll_loss=2.896, ppl=7.44, wps=52317, ups=3, wpb=15174.795, bsz=558.589, num_updates=17166, lr=0.00024136, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5101, train_wall=4608 | |
| epoch 002: 8320 / 8862 loss=4.511, nll_loss=2.896, ppl=7.44, wps=52318, ups=3, wpb=15174.766, bsz=558.421, num_updates=17176, lr=0.00024129, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5104, train_wall=4610 | |
| epoch 002: 8330 / 8862 loss=4.511, nll_loss=2.896, ppl=7.44, wps=52318, ups=3, wpb=15174.703, bsz=558.400, num_updates=17186, lr=0.00024122, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5107, train_wall=4613 | |
| epoch 002: 8340 / 8862 loss=4.510, nll_loss=2.896, ppl=7.44, wps=52320, ups=3, wpb=15175.140, bsz=558.299, num_updates=17196, lr=0.000241149, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5110, train_wall=4616 | |
| epoch 002: 8350 / 8862 loss=4.510, nll_loss=2.896, ppl=7.44, wps=52320, ups=3, wpb=15175.069, bsz=558.232, num_updates=17206, lr=0.000241079, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5113, train_wall=4618 | |
| epoch 002: 8360 / 8862 loss=4.510, nll_loss=2.896, ppl=7.44, wps=52321, ups=3, wpb=15175.429, bsz=558.190, num_updates=17216, lr=0.000241009, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5116, train_wall=4621 | |
| epoch 002: 8370 / 8862 loss=4.510, nll_loss=2.895, ppl=7.44, wps=52323, ups=3, wpb=15175.781, bsz=558.146, num_updates=17226, lr=0.000240939, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5119, train_wall=4624 | |
| epoch 002: 8380 / 8862 loss=4.510, nll_loss=2.895, ppl=7.44, wps=52324, ups=3, wpb=15175.838, bsz=558.153, num_updates=17236, lr=0.000240869, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5121, train_wall=4626 | |
| epoch 002: 8390 / 8862 loss=4.510, nll_loss=2.895, ppl=7.44, wps=52324, ups=3, wpb=15175.936, bsz=558.059, num_updates=17246, lr=0.0002408, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5124, train_wall=4629 | |
| epoch 002: 8400 / 8862 loss=4.509, nll_loss=2.895, ppl=7.44, wps=52325, ups=3, wpb=15175.968, bsz=558.152, num_updates=17256, lr=0.00024073, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5127, train_wall=4632 | |
| epoch 002: 8410 / 8862 loss=4.509, nll_loss=2.895, ppl=7.44, wps=52324, ups=3, wpb=15175.648, bsz=558.171, num_updates=17266, lr=0.00024066, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5130, train_wall=4634 | |
| epoch 002: 8420 / 8862 loss=4.509, nll_loss=2.895, ppl=7.44, wps=52324, ups=3, wpb=15175.312, bsz=558.084, num_updates=17276, lr=0.00024059, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5133, train_wall=4637 | |
| epoch 002: 8430 / 8862 loss=4.509, nll_loss=2.895, ppl=7.44, wps=52325, ups=3, wpb=15175.321, bsz=557.994, num_updates=17286, lr=0.000240521, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5136, train_wall=4640 | |
| epoch 002: 8440 / 8862 loss=4.509, nll_loss=2.894, ppl=7.43, wps=52324, ups=3, wpb=15175.274, bsz=558.119, num_updates=17296, lr=0.000240451, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5139, train_wall=4642 | |
| epoch 002: 8450 / 8862 loss=4.509, nll_loss=2.894, ppl=7.43, wps=52324, ups=3, wpb=15175.184, bsz=558.177, num_updates=17306, lr=0.000240382, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5142, train_wall=4645 | |
| epoch 002: 8460 / 8862 loss=4.509, nll_loss=2.894, ppl=7.43, wps=52325, ups=3, wpb=15175.386, bsz=558.137, num_updates=17316, lr=0.000240312, gnorm=0.565, clip=0.000, oom=0.000, loss_scale=8.000, wall=5144, train_wall=4648 | |
| epoch 002: 8470 / 8862 loss=4.508, nll_loss=2.894, ppl=7.43, wps=52326, ups=3, wpb=15175.601, bsz=558.096, num_updates=17326, lr=0.000240243, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5147, train_wall=4650 | |
| epoch 002: 8480 / 8862 loss=4.508, nll_loss=2.894, ppl=7.43, wps=52327, ups=3, wpb=15175.817, bsz=558.124, num_updates=17336, lr=0.000240174, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5150, train_wall=4653 | |
| epoch 002: 8490 / 8862 loss=4.508, nll_loss=2.893, ppl=7.43, wps=52328, ups=3, wpb=15176.031, bsz=558.147, num_updates=17346, lr=0.000240105, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5153, train_wall=4656 | |
| epoch 002: 8500 / 8862 loss=4.508, nll_loss=2.893, ppl=7.43, wps=52330, ups=3, wpb=15176.413, bsz=558.016, num_updates=17356, lr=0.000240035, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5156, train_wall=4658 | |
| epoch 002: 8510 / 8862 loss=4.508, nll_loss=2.893, ppl=7.43, wps=52332, ups=3, wpb=15176.660, bsz=557.933, num_updates=17366, lr=0.000239966, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5159, train_wall=4661 | |
| epoch 002: 8520 / 8862 loss=4.508, nll_loss=2.893, ppl=7.43, wps=52332, ups=3, wpb=15176.661, bsz=557.881, num_updates=17376, lr=0.000239897, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5162, train_wall=4664 | |
| epoch 002: 8530 / 8862 loss=4.508, nll_loss=2.893, ppl=7.43, wps=52331, ups=3, wpb=15176.232, bsz=557.834, num_updates=17386, lr=0.000239828, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5165, train_wall=4666 | |
| epoch 002: 8540 / 8862 loss=4.507, nll_loss=2.892, ppl=7.43, wps=52331, ups=3, wpb=15176.309, bsz=557.918, num_updates=17396, lr=0.000239759, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5168, train_wall=4669 | |
| epoch 002: 8550 / 8862 loss=4.507, nll_loss=2.892, ppl=7.42, wps=52332, ups=3, wpb=15176.434, bsz=557.820, num_updates=17406, lr=0.00023969, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5170, train_wall=4672 | |
| epoch 002: 8560 / 8862 loss=4.507, nll_loss=2.892, ppl=7.42, wps=52333, ups=3, wpb=15176.778, bsz=557.815, num_updates=17416, lr=0.000239622, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5173, train_wall=4674 | |
| epoch 002: 8570 / 8862 loss=4.507, nll_loss=2.892, ppl=7.42, wps=52335, ups=3, wpb=15177.096, bsz=557.792, num_updates=17426, lr=0.000239553, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5176, train_wall=4677 | |
| epoch 002: 8580 / 8862 loss=4.507, nll_loss=2.892, ppl=7.42, wps=52335, ups=3, wpb=15176.932, bsz=557.782, num_updates=17436, lr=0.000239484, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5179, train_wall=4680 | |
| epoch 002: 8590 / 8862 loss=4.506, nll_loss=2.892, ppl=7.42, wps=52335, ups=3, wpb=15177.094, bsz=557.844, num_updates=17446, lr=0.000239415, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5182, train_wall=4682 | |
| epoch 002: 8600 / 8862 loss=4.506, nll_loss=2.891, ppl=7.42, wps=52335, ups=3, wpb=15177.032, bsz=557.846, num_updates=17456, lr=0.000239347, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5185, train_wall=4685 | |
| epoch 002: 8610 / 8862 loss=4.506, nll_loss=2.891, ppl=7.42, wps=52336, ups=3, wpb=15177.222, bsz=557.824, num_updates=17466, lr=0.000239278, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5188, train_wall=4688 | |
| epoch 002: 8620 / 8862 loss=4.506, nll_loss=2.891, ppl=7.42, wps=52337, ups=3, wpb=15177.334, bsz=557.746, num_updates=17476, lr=0.00023921, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5191, train_wall=4690 | |
| epoch 002: 8630 / 8862 loss=4.506, nll_loss=2.891, ppl=7.42, wps=52337, ups=3, wpb=15177.290, bsz=557.644, num_updates=17486, lr=0.000239141, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5193, train_wall=4693 | |
| epoch 002: 8640 / 8862 loss=4.506, nll_loss=2.891, ppl=7.42, wps=52337, ups=3, wpb=15177.191, bsz=557.667, num_updates=17496, lr=0.000239073, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5196, train_wall=4696 | |
| epoch 002: 8650 / 8862 loss=4.506, nll_loss=2.891, ppl=7.42, wps=52337, ups=3, wpb=15177.156, bsz=557.796, num_updates=17506, lr=0.000239005, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5199, train_wall=4698 | |
| epoch 002: 8660 / 8862 loss=4.505, nll_loss=2.891, ppl=7.42, wps=52338, ups=3, wpb=15177.373, bsz=557.694, num_updates=17516, lr=0.000238937, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5202, train_wall=4701 | |
| epoch 002: 8670 / 8862 loss=4.505, nll_loss=2.890, ppl=7.41, wps=52338, ups=3, wpb=15177.308, bsz=557.746, num_updates=17526, lr=0.000238868, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5205, train_wall=4704 | |
| epoch 002: 8680 / 8862 loss=4.505, nll_loss=2.890, ppl=7.41, wps=52338, ups=3, wpb=15177.430, bsz=557.822, num_updates=17536, lr=0.0002388, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5208, train_wall=4706 | |
| epoch 002: 8690 / 8862 loss=4.505, nll_loss=2.890, ppl=7.41, wps=52339, ups=3, wpb=15177.614, bsz=557.851, num_updates=17546, lr=0.000238732, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5211, train_wall=4709 | |
| epoch 002: 8700 / 8862 loss=4.505, nll_loss=2.890, ppl=7.41, wps=52340, ups=3, wpb=15177.718, bsz=557.772, num_updates=17556, lr=0.000238664, gnorm=0.564, clip=0.000, oom=0.000, loss_scale=8.000, wall=5214, train_wall=4712 | |
| epoch 002: 8710 / 8862 loss=4.505, nll_loss=2.890, ppl=7.41, wps=52341, ups=3, wpb=15177.842, bsz=557.715, num_updates=17566, lr=0.000238596, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5217, train_wall=4714 | |
| epoch 002: 8720 / 8862 loss=4.504, nll_loss=2.889, ppl=7.41, wps=52341, ups=3, wpb=15177.696, bsz=557.697, num_updates=17576, lr=0.000238528, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5219, train_wall=4717 | |
| epoch 002: 8730 / 8862 loss=4.504, nll_loss=2.889, ppl=7.41, wps=52341, ups=3, wpb=15177.567, bsz=557.684, num_updates=17586, lr=0.000238461, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5222, train_wall=4720 | |
| epoch 002: 8740 / 8862 loss=4.504, nll_loss=2.889, ppl=7.41, wps=52341, ups=3, wpb=15177.608, bsz=557.732, num_updates=17596, lr=0.000238393, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5225, train_wall=4722 | |
| epoch 002: 8750 / 8862 loss=4.504, nll_loss=2.889, ppl=7.41, wps=52342, ups=3, wpb=15177.808, bsz=557.674, num_updates=17606, lr=0.000238325, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5228, train_wall=4725 | |
| epoch 002: 8760 / 8862 loss=4.504, nll_loss=2.889, ppl=7.41, wps=52341, ups=3, wpb=15177.727, bsz=557.785, num_updates=17616, lr=0.000238257, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5231, train_wall=4728 | |
| epoch 002: 8770 / 8862 loss=4.504, nll_loss=2.889, ppl=7.41, wps=52342, ups=3, wpb=15177.885, bsz=557.862, num_updates=17626, lr=0.00023819, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5234, train_wall=4730 | |
| epoch 002: 8780 / 8862 loss=4.503, nll_loss=2.888, ppl=7.40, wps=52341, ups=3, wpb=15177.476, bsz=557.823, num_updates=17636, lr=0.000238122, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5237, train_wall=4733 | |
| epoch 002: 8790 / 8862 loss=4.503, nll_loss=2.888, ppl=7.40, wps=52342, ups=3, wpb=15177.851, bsz=557.908, num_updates=17646, lr=0.000238055, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5240, train_wall=4736 | |
| epoch 002: 8800 / 8862 loss=4.503, nll_loss=2.888, ppl=7.40, wps=52343, ups=3, wpb=15177.940, bsz=557.975, num_updates=17656, lr=0.000237987, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5243, train_wall=4738 | |
| epoch 002: 8810 / 8862 loss=4.503, nll_loss=2.888, ppl=7.40, wps=52343, ups=3, wpb=15177.966, bsz=557.987, num_updates=17666, lr=0.00023792, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5246, train_wall=4741 | |
| epoch 002: 8820 / 8862 loss=4.503, nll_loss=2.888, ppl=7.40, wps=52343, ups=3, wpb=15177.971, bsz=557.926, num_updates=17676, lr=0.000237853, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5248, train_wall=4744 | |
| epoch 002: 8830 / 8862 loss=4.503, nll_loss=2.887, ppl=7.40, wps=52343, ups=3, wpb=15178.068, bsz=557.993, num_updates=17686, lr=0.000237785, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5251, train_wall=4746 | |
| epoch 002: 8840 / 8862 loss=4.502, nll_loss=2.887, ppl=7.40, wps=52343, ups=3, wpb=15177.948, bsz=557.988, num_updates=17696, lr=0.000237718, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5254, train_wall=4749 | |
| epoch 002: 8850 / 8862 loss=4.502, nll_loss=2.887, ppl=7.40, wps=52343, ups=3, wpb=15177.915, bsz=557.981, num_updates=17706, lr=0.000237651, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5257, train_wall=4752 | |
| epoch 002: 8860 / 8862 loss=4.502, nll_loss=2.887, ppl=7.40, wps=52344, ups=3, wpb=15178.222, bsz=558.012, num_updates=17716, lr=0.000237584, gnorm=0.563, clip=0.000, oom=0.000, loss_scale=8.000, wall=5260, train_wall=4754 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 002 | loss 4.502 | nll_loss 2.887 | ppl 7.40 | wps 52337 | ups 3 | wpb 15177.853 | bsz 558.015 | num_updates 17717 | lr 0.000237577 | gnorm 0.563 | clip 0.000 | oom 0.000 | loss_scale 8.000 | wall 5261 | train_wall 4755 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| WARNING: 2459 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[34935, 29199, 25522, 50610, 31640, 50522, 29514, 23772, 21318, 30173] | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 002 | valid on 'valid' subset | loss 4.282 | nll_loss 2.514 | ppl 5.71 | num_updates 17717 | best_loss 4.28208 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 003: 10 / 8862 loss=4.209, nll_loss=2.562, ppl=5.90, wps=52721, ups=0, wpb=15429.818, bsz=650.182, num_updates=17728, lr=0.000237504, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=8.000, wall=5307, train_wall=4759 | |
| epoch 003: 20 / 8862 loss=4.215, nll_loss=2.568, ppl=5.93, wps=52617, ups=0, wpb=15370.476, bsz=609.524, num_updates=17738, lr=0.000237437, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=5310, train_wall=4762 | |
| epoch 003: 30 / 8862 loss=4.214, nll_loss=2.567, ppl=5.93, wps=52616, ups=1, wpb=15334.806, bsz=594.839, num_updates=17748, lr=0.00023737, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=5313, train_wall=4765 | |
| epoch 003: 40 / 8862 loss=4.234, nll_loss=2.590, ppl=6.02, wps=52599, ups=1, wpb=15304.854, bsz=568.780, num_updates=17758, lr=0.000237303, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=5316, train_wall=4767 | |
| epoch 003: 50 / 8862 loss=4.239, nll_loss=2.596, ppl=6.05, wps=52529, ups=1, wpb=15290.412, bsz=589.647, num_updates=17768, lr=0.000237236, gnorm=0.497, clip=0.000, oom=0.000, loss_scale=8.000, wall=5319, train_wall=4770 | |
| epoch 003: 60 / 8862 loss=4.246, nll_loss=2.603, ppl=6.07, wps=52509, ups=1, wpb=15280.098, bsz=576.656, num_updates=17778, lr=0.000237169, gnorm=0.501, clip=0.000, oom=0.000, loss_scale=8.000, wall=5322, train_wall=4773 | |
| epoch 003: 70 / 8862 loss=4.240, nll_loss=2.597, ppl=6.05, wps=52603, ups=1, wpb=15301.577, bsz=574.535, num_updates=17788, lr=0.000237103, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=5325, train_wall=4775 | |
| epoch 003: 80 / 8862 loss=4.261, nll_loss=2.621, ppl=6.15, wps=52414, ups=1, wpb=15218.691, bsz=572.840, num_updates=17798, lr=0.000237036, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=8.000, wall=5327, train_wall=4778 | |
| epoch 003: 90 / 8862 loss=4.269, nll_loss=2.630, ppl=6.19, wps=52179, ups=1, wpb=15168.319, bsz=573.714, num_updates=17808, lr=0.000236969, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5330, train_wall=4781 | |
| epoch 003: 100 / 8862 loss=4.278, nll_loss=2.640, ppl=6.23, wps=52230, ups=1, wpb=15177.059, bsz=572.911, num_updates=17818, lr=0.000236903, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=5333, train_wall=4783 | |
| epoch 003: 110 / 8862 loss=4.276, nll_loss=2.638, ppl=6.22, wps=52326, ups=1, wpb=15199.748, bsz=573.261, num_updates=17828, lr=0.000236837, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=8.000, wall=5336, train_wall=4786 | |
| epoch 003: 120 / 8862 loss=4.276, nll_loss=2.638, ppl=6.22, wps=52145, ups=2, wpb=15151.281, bsz=580.430, num_updates=17838, lr=0.00023677, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=8.000, wall=5339, train_wall=4789 | |
| epoch 003: 130 / 8862 loss=4.275, nll_loss=2.637, ppl=6.22, wps=52221, ups=2, wpb=15173.908, bsz=577.710, num_updates=17848, lr=0.000236704, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=8.000, wall=5342, train_wall=4791 | |
| epoch 003: 140 / 8862 loss=4.279, nll_loss=2.641, ppl=6.24, wps=52190, ups=2, wpb=15165.340, bsz=579.234, num_updates=17858, lr=0.000236638, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=8.000, wall=5345, train_wall=4794 | |
| epoch 003: 150 / 8862 loss=4.278, nll_loss=2.640, ppl=6.23, wps=52203, ups=2, wpb=15166.801, bsz=576.954, num_updates=17868, lr=0.000236571, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=8.000, wall=5348, train_wall=4797 | |
| epoch 003: 160 / 8862 loss=4.276, nll_loss=2.638, ppl=6.22, wps=52172, ups=2, wpb=15160.236, bsz=576.845, num_updates=17878, lr=0.000236505, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=8.000, wall=5351, train_wall=4799 | |
| epoch 003: 170 / 8862 loss=4.282, nll_loss=2.645, ppl=6.25, wps=52167, ups=2, wpb=15158.012, bsz=576.468, num_updates=17888, lr=0.000236439, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=8.000, wall=5354, train_wall=4802 | |
| epoch 003: 180 / 8862 loss=4.289, nll_loss=2.652, ppl=6.29, wps=52221, ups=2, wpb=15168.530, bsz=575.028, num_updates=17898, lr=0.000236373, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=8.000, wall=5357, train_wall=4805 | |
| epoch 003: 190 / 8862 loss=4.287, nll_loss=2.650, ppl=6.28, wps=52262, ups=2, wpb=15184.366, bsz=574.618, num_updates=17908, lr=0.000236307, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=8.000, wall=5359, train_wall=4807 | |
| epoch 003: 200 / 8862 loss=4.290, nll_loss=2.653, ppl=6.29, wps=52277, ups=2, wpb=15182.607, bsz=571.622, num_updates=17918, lr=0.000236241, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5362, train_wall=4810 | |
| epoch 003: 210 / 8862 loss=4.290, nll_loss=2.653, ppl=6.29, wps=52286, ups=2, wpb=15186.199, bsz=568.417, num_updates=17928, lr=0.000236175, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5365, train_wall=4813 | |
| epoch 003: 220 / 8862 loss=4.295, nll_loss=2.659, ppl=6.32, wps=52309, ups=2, wpb=15188.344, bsz=565.719, num_updates=17938, lr=0.000236109, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5368, train_wall=4815 | |
| epoch 003: 230 / 8862 loss=4.299, nll_loss=2.663, ppl=6.33, wps=52288, ups=2, wpb=15187.775, bsz=564.156, num_updates=17948, lr=0.000236043, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5371, train_wall=4818 | |
| epoch 003: 240 / 8862 loss=4.299, nll_loss=2.663, ppl=6.33, wps=52249, ups=2, wpb=15173.697, bsz=564.780, num_updates=17958, lr=0.000235978, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5374, train_wall=4821 | |
| epoch 003: 250 / 8862 loss=4.297, nll_loss=2.661, ppl=6.32, wps=52190, ups=2, wpb=15171.490, bsz=573.036, num_updates=17968, lr=0.000235912, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5377, train_wall=4824 | |
| epoch 003: 260 / 8862 loss=4.297, nll_loss=2.661, ppl=6.33, wps=52172, ups=2, wpb=15166.548, bsz=572.046, num_updates=17978, lr=0.000235846, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5380, train_wall=4826 | |
| epoch 003: 270 / 8862 loss=4.298, nll_loss=2.663, ppl=6.33, wps=52200, ups=2, wpb=15170.085, bsz=570.539, num_updates=17988, lr=0.000235781, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5383, train_wall=4829 | |
| epoch 003: 280 / 8862 loss=4.299, nll_loss=2.664, ppl=6.34, wps=52182, ups=2, wpb=15165.085, bsz=569.822, num_updates=17998, lr=0.000235715, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5386, train_wall=4832 | |
| epoch 003: 290 / 8862 loss=4.302, nll_loss=2.667, ppl=6.35, wps=52215, ups=2, wpb=15169.694, bsz=568.742, num_updates=18008, lr=0.00023565, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5389, train_wall=4834 | |
| epoch 003: 300 / 8862 loss=4.300, nll_loss=2.665, ppl=6.34, wps=52200, ups=2, wpb=15171.791, bsz=568.133, num_updates=18018, lr=0.000235584, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5391, train_wall=4837 | |
| epoch 003: 310 / 8862 loss=4.302, nll_loss=2.667, ppl=6.35, wps=52224, ups=2, wpb=15176.688, bsz=568.566, num_updates=18028, lr=0.000235519, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5394, train_wall=4840 | |
| epoch 003: 320 / 8862 loss=4.298, nll_loss=2.663, ppl=6.33, wps=52202, ups=2, wpb=15180.380, bsz=573.757, num_updates=18038, lr=0.000235454, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5397, train_wall=4842 | |
| epoch 003: 330 / 8862 loss=4.298, nll_loss=2.662, ppl=6.33, wps=52202, ups=2, wpb=15181.057, bsz=574.381, num_updates=18048, lr=0.000235389, gnorm=0.519, clip=0.000, oom=0.000, loss_scale=8.000, wall=5400, train_wall=4845 | |
| epoch 003: 340 / 8862 loss=4.299, nll_loss=2.664, ppl=6.34, wps=52179, ups=2, wpb=15172.220, bsz=571.871, num_updates=18058, lr=0.000235323, gnorm=0.519, clip=0.000, oom=0.000, loss_scale=8.000, wall=5403, train_wall=4848 | |
| epoch 003: 350 / 8862 loss=4.298, nll_loss=2.662, ppl=6.33, wps=52199, ups=2, wpb=15179.066, bsz=572.741, num_updates=18068, lr=0.000235258, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5406, train_wall=4851 | |
| epoch 003: 360 / 8862 loss=4.301, nll_loss=2.666, ppl=6.35, wps=52217, ups=2, wpb=15183.571, bsz=571.169, num_updates=18078, lr=0.000235193, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5409, train_wall=4853 | |
| epoch 003: 370 / 8862 loss=4.300, nll_loss=2.665, ppl=6.34, wps=52205, ups=2, wpb=15177.889, bsz=570.264, num_updates=18088, lr=0.000235128, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5412, train_wall=4856 | |
| epoch 003: 380 / 8862 loss=4.298, nll_loss=2.663, ppl=6.33, wps=52229, ups=2, wpb=15186.273, bsz=570.016, num_updates=18098, lr=0.000235063, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5415, train_wall=4859 | |
| epoch 003: 390 / 8862 loss=4.300, nll_loss=2.665, ppl=6.34, wps=52230, ups=2, wpb=15188.266, bsz=571.008, num_updates=18108, lr=0.000234998, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5418, train_wall=4861 | |
| epoch 003: 400 / 8862 loss=4.303, nll_loss=2.668, ppl=6.36, wps=52226, ups=3, wpb=15183.145, bsz=569.397, num_updates=18118, lr=0.000234933, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5421, train_wall=4864 | |
| epoch 003: 410 / 8862 loss=4.305, nll_loss=2.670, ppl=6.37, wps=52217, ups=3, wpb=15181.049, bsz=569.032, num_updates=18128, lr=0.000234869, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5423, train_wall=4867 | |
| epoch 003: 420 / 8862 loss=4.305, nll_loss=2.671, ppl=6.37, wps=52222, ups=3, wpb=15181.354, bsz=569.387, num_updates=18138, lr=0.000234804, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5426, train_wall=4869 | |
| epoch 003: 430 / 8862 loss=4.305, nll_loss=2.670, ppl=6.37, wps=52225, ups=3, wpb=15180.283, bsz=568.204, num_updates=18148, lr=0.000234739, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5429, train_wall=4872 | |
| epoch 003: 440 / 8862 loss=4.303, nll_loss=2.669, ppl=6.36, wps=52228, ups=3, wpb=15182.497, bsz=567.111, num_updates=18158, lr=0.000234675, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5432, train_wall=4875 | |
| epoch 003: 450 / 8862 loss=4.303, nll_loss=2.668, ppl=6.36, wps=52209, ups=3, wpb=15179.639, bsz=569.845, num_updates=18168, lr=0.00023461, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5435, train_wall=4877 | |
| epoch 003: 460 / 8862 loss=4.304, nll_loss=2.669, ppl=6.36, wps=52234, ups=3, wpb=15187.646, bsz=569.857, num_updates=18178, lr=0.000234545, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5438, train_wall=4880 | |
| epoch 003: 470 / 8862 loss=4.304, nll_loss=2.669, ppl=6.36, wps=52261, ups=3, wpb=15193.539, bsz=568.476, num_updates=18188, lr=0.000234481, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5441, train_wall=4883 | |
| epoch 003: 480 / 8862 loss=4.307, nll_loss=2.672, ppl=6.38, wps=52241, ups=3, wpb=15185.193, bsz=567.401, num_updates=18198, lr=0.000234416, gnorm=0.518, clip=0.000, oom=0.000, loss_scale=8.000, wall=5444, train_wall=4885 | |
| epoch 003: 490 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52250, ups=3, wpb=15187.662, bsz=566.811, num_updates=18208, lr=0.000234352, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5447, train_wall=4888 | |
| epoch 003: 500 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52269, ups=3, wpb=15194.198, bsz=566.084, num_updates=18218, lr=0.000234288, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5450, train_wall=4891 | |
| epoch 003: 510 / 8862 loss=4.309, nll_loss=2.675, ppl=6.38, wps=52267, ups=3, wpb=15191.491, bsz=564.196, num_updates=18228, lr=0.000234224, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5452, train_wall=4893 | |
| epoch 003: 520 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52261, ups=3, wpb=15188.553, bsz=565.313, num_updates=18238, lr=0.000234159, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5455, train_wall=4896 | |
| epoch 003: 530 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52262, ups=3, wpb=15187.755, bsz=565.635, num_updates=18248, lr=0.000234095, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5458, train_wall=4899 | |
| epoch 003: 540 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52241, ups=3, wpb=15178.433, bsz=564.835, num_updates=18258, lr=0.000234031, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5461, train_wall=4901 | |
| epoch 003: 550 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52254, ups=3, wpb=15181.220, bsz=565.779, num_updates=18268, lr=0.000233967, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5464, train_wall=4904 | |
| epoch 003: 560 / 8862 loss=4.308, nll_loss=2.673, ppl=6.38, wps=52240, ups=3, wpb=15176.102, bsz=565.234, num_updates=18278, lr=0.000233903, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5467, train_wall=4907 | |
| epoch 003: 570 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52242, ups=3, wpb=15176.709, bsz=566.697, num_updates=18288, lr=0.000233839, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5470, train_wall=4910 | |
| epoch 003: 580 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52238, ups=3, wpb=15171.972, bsz=565.012, num_updates=18298, lr=0.000233775, gnorm=0.517, clip=0.000, oom=0.000, loss_scale=8.000, wall=5473, train_wall=4912 | |
| epoch 003: 590 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52275, ups=3, wpb=15179.980, bsz=563.804, num_updates=18308, lr=0.000233711, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5476, train_wall=4915 | |
| epoch 003: 600 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52273, ups=3, wpb=15176.850, bsz=563.421, num_updates=18318, lr=0.000233647, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5478, train_wall=4917 | |
| epoch 003: 610 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52286, ups=3, wpb=15179.625, bsz=562.527, num_updates=18328, lr=0.000233584, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5481, train_wall=4920 | |
| epoch 003: 620 / 8862 loss=4.309, nll_loss=2.675, ppl=6.38, wps=52286, ups=3, wpb=15178.155, bsz=561.636, num_updates=18338, lr=0.00023352, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5484, train_wall=4923 | |
| epoch 003: 630 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52302, ups=3, wpb=15180.550, bsz=560.634, num_updates=18348, lr=0.000233456, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5487, train_wall=4925 | |
| epoch 003: 640 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52318, ups=3, wpb=15183.705, bsz=560.412, num_updates=18358, lr=0.000233393, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5490, train_wall=4928 | |
| epoch 003: 650 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52321, ups=3, wpb=15182.206, bsz=560.627, num_updates=18368, lr=0.000233329, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5493, train_wall=4931 | |
| epoch 003: 660 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52329, ups=3, wpb=15183.929, bsz=559.334, num_updates=18378, lr=0.000233266, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5496, train_wall=4933 | |
| epoch 003: 670 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52341, ups=3, wpb=15187.089, bsz=560.048, num_updates=18388, lr=0.000233202, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5499, train_wall=4936 | |
| epoch 003: 680 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52347, ups=3, wpb=15186.131, bsz=558.485, num_updates=18398, lr=0.000233139, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5502, train_wall=4939 | |
| epoch 003: 690 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52375, ups=3, wpb=15193.135, bsz=558.298, num_updates=18408, lr=0.000233076, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5504, train_wall=4941 | |
| epoch 003: 700 / 8862 loss=4.308, nll_loss=2.673, ppl=6.38, wps=52379, ups=3, wpb=15192.478, bsz=557.421, num_updates=18418, lr=0.000233012, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5507, train_wall=4944 | |
| epoch 003: 710 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52369, ups=3, wpb=15189.402, bsz=558.672, num_updates=18428, lr=0.000232949, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5510, train_wall=4947 | |
| epoch 003: 720 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52364, ups=3, wpb=15186.610, bsz=559.123, num_updates=18438, lr=0.000232886, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5513, train_wall=4949 | |
| epoch 003: 730 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52361, ups=3, wpb=15184.041, bsz=558.446, num_updates=18448, lr=0.000232823, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5516, train_wall=4952 | |
| epoch 003: 740 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52384, ups=3, wpb=15189.741, bsz=557.862, num_updates=18458, lr=0.00023276, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5519, train_wall=4955 | |
| epoch 003: 750 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52394, ups=3, wpb=15194.117, bsz=559.009, num_updates=18468, lr=0.000232697, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5522, train_wall=4957 | |
| epoch 003: 760 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52390, ups=3, wpb=15191.101, bsz=558.392, num_updates=18478, lr=0.000232634, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5525, train_wall=4960 | |
| epoch 003: 770 / 8862 loss=4.305, nll_loss=2.670, ppl=6.37, wps=52398, ups=3, wpb=15194.351, bsz=558.942, num_updates=18488, lr=0.000232571, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5528, train_wall=4963 | |
| epoch 003: 780 / 8862 loss=4.304, nll_loss=2.670, ppl=6.36, wps=52403, ups=3, wpb=15197.275, bsz=560.502, num_updates=18498, lr=0.000232508, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5530, train_wall=4966 | |
| epoch 003: 790 / 8862 loss=4.305, nll_loss=2.670, ppl=6.36, wps=52405, ups=3, wpb=15195.234, bsz=559.525, num_updates=18508, lr=0.000232445, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5533, train_wall=4968 | |
| epoch 003: 800 / 8862 loss=4.306, nll_loss=2.671, ppl=6.37, wps=52403, ups=3, wpb=15193.586, bsz=559.361, num_updates=18518, lr=0.000232382, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5536, train_wall=4971 | |
| epoch 003: 810 / 8862 loss=4.305, nll_loss=2.671, ppl=6.37, wps=52401, ups=3, wpb=15192.434, bsz=560.217, num_updates=18528, lr=0.00023232, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5539, train_wall=4974 | |
| epoch 003: 820 / 8862 loss=4.305, nll_loss=2.671, ppl=6.37, wps=52416, ups=3, wpb=15195.957, bsz=560.000, num_updates=18538, lr=0.000232257, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5542, train_wall=4976 | |
| epoch 003: 830 / 8862 loss=4.305, nll_loss=2.670, ppl=6.37, wps=52410, ups=3, wpb=15192.792, bsz=559.134, num_updates=18548, lr=0.000232194, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5545, train_wall=4979 | |
| epoch 003: 840 / 8862 loss=4.306, nll_loss=2.671, ppl=6.37, wps=52411, ups=3, wpb=15191.504, bsz=558.449, num_updates=18558, lr=0.000232132, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5548, train_wall=4981 | |
| epoch 003: 850 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52406, ups=3, wpb=15189.959, bsz=558.844, num_updates=18568, lr=0.000232069, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5551, train_wall=4984 | |
| epoch 003: 860 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52408, ups=3, wpb=15189.710, bsz=558.142, num_updates=18578, lr=0.000232007, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5554, train_wall=4987 | |
| epoch 003: 870 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52408, ups=3, wpb=15187.392, bsz=557.171, num_updates=18588, lr=0.000231944, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5556, train_wall=4989 | |
| epoch 003: 880 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52408, ups=3, wpb=15186.392, bsz=557.022, num_updates=18598, lr=0.000231882, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5559, train_wall=4992 | |
| epoch 003: 890 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52407, ups=3, wpb=15188.090, bsz=558.770, num_updates=18608, lr=0.00023182, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5562, train_wall=4995 | |
| epoch 003: 900 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52415, ups=3, wpb=15188.178, bsz=557.496, num_updates=18618, lr=0.000231757, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5565, train_wall=4997 | |
| epoch 003: 910 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52411, ups=3, wpb=15185.598, bsz=557.260, num_updates=18628, lr=0.000231695, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5568, train_wall=5000 | |
| epoch 003: 920 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52413, ups=3, wpb=15185.034, bsz=557.021, num_updates=18638, lr=0.000231633, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5571, train_wall=5003 | |
| epoch 003: 930 / 8862 loss=4.309, nll_loss=2.675, ppl=6.38, wps=52413, ups=3, wpb=15184.524, bsz=556.365, num_updates=18648, lr=0.000231571, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5574, train_wall=5005 | |
| epoch 003: 940 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52425, ups=3, wpb=15186.442, bsz=555.996, num_updates=18658, lr=0.000231509, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5577, train_wall=5008 | |
| epoch 003: 950 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52434, ups=3, wpb=15188.377, bsz=555.769, num_updates=18668, lr=0.000231447, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5579, train_wall=5011 | |
| epoch 003: 960 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52440, ups=3, wpb=15188.814, bsz=555.555, num_updates=18678, lr=0.000231385, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5582, train_wall=5013 | |
| epoch 003: 970 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52443, ups=3, wpb=15189.488, bsz=556.358, num_updates=18688, lr=0.000231323, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5585, train_wall=5016 | |
| epoch 003: 980 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52444, ups=3, wpb=15189.086, bsz=556.020, num_updates=18698, lr=0.000231261, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5588, train_wall=5019 | |
| epoch 003: 990 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52440, ups=3, wpb=15188.288, bsz=556.642, num_updates=18708, lr=0.000231199, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5591, train_wall=5021 | |
| epoch 003: 1000 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52439, ups=3, wpb=15186.231, bsz=556.124, num_updates=18718, lr=0.000231137, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5594, train_wall=5024 | |
| epoch 003: 1010 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52448, ups=3, wpb=15188.284, bsz=556.993, num_updates=18728, lr=0.000231076, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5597, train_wall=5027 | |
| epoch 003: 1020 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52445, ups=3, wpb=15186.241, bsz=556.709, num_updates=18738, lr=0.000231014, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5600, train_wall=5029 | |
| epoch 003: 1030 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52451, ups=3, wpb=15187.180, bsz=556.361, num_updates=18748, lr=0.000230952, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5602, train_wall=5032 | |
| epoch 003: 1040 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52456, ups=3, wpb=15188.413, bsz=555.704, num_updates=18758, lr=0.000230891, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5605, train_wall=5035 | |
| epoch 003: 1050 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52443, ups=3, wpb=15189.872, bsz=557.062, num_updates=18768, lr=0.000230829, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5608, train_wall=5038 | |
| epoch 003: 1060 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52431, ups=3, wpb=15190.594, bsz=558.658, num_updates=18778, lr=0.000230768, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5611, train_wall=5040 | |
| epoch 003: 1070 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52417, ups=3, wpb=15185.505, bsz=559.365, num_updates=18788, lr=0.000230706, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5614, train_wall=5043 | |
| epoch 003: 1080 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52407, ups=3, wpb=15182.724, bsz=559.563, num_updates=18798, lr=0.000230645, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5617, train_wall=5046 | |
| epoch 003: 1090 / 8862 loss=4.307, nll_loss=2.672, ppl=6.37, wps=52402, ups=3, wpb=15181.573, bsz=559.281, num_updates=18808, lr=0.000230584, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5620, train_wall=5048 | |
| epoch 003: 1100 / 8862 loss=4.307, nll_loss=2.672, ppl=6.38, wps=52400, ups=3, wpb=15181.264, bsz=559.230, num_updates=18818, lr=0.000230522, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5623, train_wall=5051 | |
| epoch 003: 1110 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52401, ups=3, wpb=15181.411, bsz=559.489, num_updates=18828, lr=0.000230461, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5626, train_wall=5054 | |
| epoch 003: 1120 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52404, ups=3, wpb=15181.581, bsz=558.894, num_updates=18838, lr=0.0002304, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5629, train_wall=5056 | |
| epoch 003: 1130 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52401, ups=3, wpb=15181.748, bsz=559.342, num_updates=18848, lr=0.000230339, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5632, train_wall=5059 | |
| epoch 003: 1140 / 8862 loss=4.306, nll_loss=2.671, ppl=6.37, wps=52406, ups=3, wpb=15182.812, bsz=559.509, num_updates=18858, lr=0.000230278, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5635, train_wall=5062 | |
| epoch 003: 1150 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52401, ups=3, wpb=15180.436, bsz=558.923, num_updates=18868, lr=0.000230217, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5637, train_wall=5064 | |
| epoch 003: 1160 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52400, ups=3, wpb=15180.803, bsz=558.470, num_updates=18878, lr=0.000230156, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5640, train_wall=5067 | |
| epoch 003: 1170 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52394, ups=3, wpb=15177.963, bsz=557.868, num_updates=18888, lr=0.000230095, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5643, train_wall=5070 | |
| epoch 003: 1180 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52402, ups=3, wpb=15180.101, bsz=557.514, num_updates=18898, lr=0.000230034, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5646, train_wall=5072 | |
| epoch 003: 1190 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52410, ups=3, wpb=15182.915, bsz=557.837, num_updates=18908, lr=0.000229973, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5649, train_wall=5075 | |
| epoch 003: 1200 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52414, ups=3, wpb=15183.998, bsz=557.782, num_updates=18918, lr=0.000229912, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5652, train_wall=5078 | |
| epoch 003: 1210 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52410, ups=3, wpb=15182.836, bsz=558.368, num_updates=18928, lr=0.000229852, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5655, train_wall=5080 | |
| epoch 003: 1220 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52412, ups=3, wpb=15183.204, bsz=558.034, num_updates=18938, lr=0.000229791, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5658, train_wall=5083 | |
| epoch 003: 1230 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52418, ups=3, wpb=15184.600, bsz=558.167, num_updates=18948, lr=0.00022973, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5661, train_wall=5086 | |
| epoch 003: 1240 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52401, ups=3, wpb=15183.311, bsz=559.143, num_updates=18958, lr=0.00022967, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5664, train_wall=5089 | |
| epoch 003: 1250 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52403, ups=3, wpb=15183.922, bsz=558.843, num_updates=18968, lr=0.000229609, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5666, train_wall=5091 | |
| epoch 003: 1260 / 8862 loss=4.308, nll_loss=2.675, ppl=6.39, wps=52395, ups=3, wpb=15180.343, bsz=558.306, num_updates=18978, lr=0.000229549, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5669, train_wall=5094 | |
| epoch 003: 1270 / 8862 loss=4.309, nll_loss=2.675, ppl=6.39, wps=52393, ups=3, wpb=15179.869, bsz=557.784, num_updates=18988, lr=0.000229488, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5672, train_wall=5097 | |
| epoch 003: 1280 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52393, ups=3, wpb=15179.158, bsz=557.352, num_updates=18998, lr=0.000229428, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5675, train_wall=5099 | |
| epoch 003: 1290 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52380, ups=3, wpb=15177.517, bsz=558.327, num_updates=19008, lr=0.000229367, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5678, train_wall=5102 | |
| epoch 003: 1300 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52378, ups=3, wpb=15177.328, bsz=557.633, num_updates=19018, lr=0.000229307, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5681, train_wall=5105 | |
| epoch 003: 1310 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52375, ups=3, wpb=15176.934, bsz=557.962, num_updates=19028, lr=0.000229247, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5684, train_wall=5107 | |
| epoch 003: 1320 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52381, ups=3, wpb=15179.621, bsz=557.214, num_updates=19038, lr=0.000229187, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5687, train_wall=5110 | |
| epoch 003: 1330 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52382, ups=3, wpb=15179.415, bsz=557.698, num_updates=19048, lr=0.000229126, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5690, train_wall=5113 | |
| epoch 003: 1340 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52385, ups=3, wpb=15180.765, bsz=557.405, num_updates=19058, lr=0.000229066, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5693, train_wall=5115 | |
| epoch 003: 1350 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52387, ups=3, wpb=15182.377, bsz=557.128, num_updates=19068, lr=0.000229006, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5696, train_wall=5118 | |
| epoch 003: 1360 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52378, ups=3, wpb=15180.209, bsz=557.737, num_updates=19078, lr=0.000228946, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5698, train_wall=5121 | |
| epoch 003: 1370 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52371, ups=3, wpb=15178.059, bsz=557.683, num_updates=19088, lr=0.000228886, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5701, train_wall=5123 | |
| epoch 003: 1380 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52364, ups=3, wpb=15176.615, bsz=557.770, num_updates=19098, lr=0.000228826, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5704, train_wall=5126 | |
| epoch 003: 1390 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52364, ups=3, wpb=15177.324, bsz=558.085, num_updates=19108, lr=0.000228766, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5707, train_wall=5129 | |
| epoch 003: 1400 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52360, ups=3, wpb=15176.617, bsz=558.041, num_updates=19118, lr=0.000228707, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5710, train_wall=5132 | |
| epoch 003: 1410 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52358, ups=3, wpb=15175.036, bsz=557.517, num_updates=19128, lr=0.000228647, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5713, train_wall=5134 | |
| epoch 003: 1420 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52357, ups=3, wpb=15175.588, bsz=557.557, num_updates=19138, lr=0.000228587, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5716, train_wall=5137 | |
| epoch 003: 1430 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52360, ups=3, wpb=15176.381, bsz=557.842, num_updates=19148, lr=0.000228527, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5719, train_wall=5140 | |
| epoch 003: 1440 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52348, ups=3, wpb=15172.470, bsz=557.380, num_updates=19158, lr=0.000228468, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5722, train_wall=5142 | |
| epoch 003: 1450 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52350, ups=3, wpb=15172.480, bsz=556.929, num_updates=19168, lr=0.000228408, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5725, train_wall=5145 | |
| epoch 003: 1460 / 8862 loss=4.312, nll_loss=2.678, ppl=6.40, wps=52351, ups=3, wpb=15172.673, bsz=556.890, num_updates=19178, lr=0.000228349, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5727, train_wall=5148 | |
| epoch 003: 1470 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52353, ups=3, wpb=15173.014, bsz=556.644, num_updates=19188, lr=0.000228289, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5730, train_wall=5150 | |
| epoch 003: 1480 / 8862 loss=4.311, nll_loss=2.677, ppl=6.40, wps=52355, ups=3, wpb=15174.706, bsz=557.985, num_updates=19198, lr=0.00022823, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5733, train_wall=5153 | |
| epoch 003: 1490 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52353, ups=3, wpb=15173.932, bsz=557.580, num_updates=19208, lr=0.00022817, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5736, train_wall=5156 | |
| epoch 003: 1500 / 8862 loss=4.311, nll_loss=2.677, ppl=6.40, wps=52350, ups=3, wpb=15173.433, bsz=558.151, num_updates=19218, lr=0.000228111, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5739, train_wall=5158 | |
| epoch 003: 1510 / 8862 loss=4.311, nll_loss=2.677, ppl=6.40, wps=52349, ups=3, wpb=15172.389, bsz=557.475, num_updates=19228, lr=0.000228052, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5742, train_wall=5161 | |
| epoch 003: 1520 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52356, ups=3, wpb=15174.249, bsz=557.433, num_updates=19238, lr=0.000227992, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5745, train_wall=5164 | |
| epoch 003: 1530 / 8862 loss=4.311, nll_loss=2.677, ppl=6.40, wps=52358, ups=3, wpb=15175.008, bsz=557.696, num_updates=19248, lr=0.000227933, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5748, train_wall=5166 | |
| epoch 003: 1540 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52365, ups=3, wpb=15176.701, bsz=557.534, num_updates=19258, lr=0.000227874, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5751, train_wall=5169 | |
| epoch 003: 1550 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52368, ups=3, wpb=15177.420, bsz=556.962, num_updates=19268, lr=0.000227815, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5753, train_wall=5172 | |
| epoch 003: 1560 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52361, ups=3, wpb=15175.336, bsz=557.427, num_updates=19278, lr=0.000227756, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5756, train_wall=5174 | |
| epoch 003: 1570 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52360, ups=3, wpb=15174.789, bsz=557.469, num_updates=19288, lr=0.000227697, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5759, train_wall=5177 | |
| epoch 003: 1580 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52360, ups=3, wpb=15175.322, bsz=557.101, num_updates=19298, lr=0.000227638, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5762, train_wall=5180 | |
| epoch 003: 1590 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52358, ups=3, wpb=15174.898, bsz=557.003, num_updates=19308, lr=0.000227579, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5765, train_wall=5182 | |
| epoch 003: 1600 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52358, ups=3, wpb=15174.425, bsz=556.817, num_updates=19318, lr=0.00022752, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5768, train_wall=5185 | |
| epoch 003: 1610 / 8862 loss=4.312, nll_loss=2.678, ppl=6.40, wps=52353, ups=3, wpb=15172.139, bsz=556.583, num_updates=19328, lr=0.000227461, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5771, train_wall=5188 | |
| epoch 003: 1620 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52354, ups=3, wpb=15172.768, bsz=557.024, num_updates=19338, lr=0.000227402, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5774, train_wall=5190 | |
| epoch 003: 1630 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52355, ups=3, wpb=15173.188, bsz=556.934, num_updates=19348, lr=0.000227343, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5777, train_wall=5193 | |
| epoch 003: 1640 / 8862 loss=4.312, nll_loss=2.679, ppl=6.40, wps=52357, ups=3, wpb=15173.971, bsz=557.309, num_updates=19358, lr=0.000227284, gnorm=0.516, clip=0.000, oom=0.000, loss_scale=8.000, wall=5780, train_wall=5196 | |
| epoch 003: 1650 / 8862 loss=4.312, nll_loss=2.679, ppl=6.40, wps=52361, ups=3, wpb=15174.755, bsz=557.320, num_updates=19368, lr=0.000227226, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5782, train_wall=5198 | |
| epoch 003: 1660 / 8862 loss=4.313, nll_loss=2.680, ppl=6.41, wps=52359, ups=3, wpb=15173.335, bsz=556.918, num_updates=19378, lr=0.000227167, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5785, train_wall=5201 | |
| epoch 003: 1670 / 8862 loss=4.313, nll_loss=2.681, ppl=6.41, wps=52357, ups=3, wpb=15172.387, bsz=556.706, num_updates=19388, lr=0.000227109, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5788, train_wall=5204 | |
| epoch 003: 1680 / 8862 loss=4.314, nll_loss=2.681, ppl=6.41, wps=52355, ups=3, wpb=15171.858, bsz=556.302, num_updates=19398, lr=0.00022705, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5791, train_wall=5206 | |
| epoch 003: 1690 / 8862 loss=4.313, nll_loss=2.680, ppl=6.41, wps=52354, ups=3, wpb=15171.885, bsz=556.149, num_updates=19408, lr=0.000226992, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5794, train_wall=5209 | |
| epoch 003: 1700 / 8862 loss=4.313, nll_loss=2.680, ppl=6.41, wps=52358, ups=3, wpb=15173.560, bsz=556.633, num_updates=19418, lr=0.000226933, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5797, train_wall=5212 | |
| epoch 003: 1710 / 8862 loss=4.312, nll_loss=2.679, ppl=6.41, wps=52357, ups=3, wpb=15173.649, bsz=557.227, num_updates=19428, lr=0.000226875, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5800, train_wall=5214 | |
| epoch 003: 1720 / 8862 loss=4.313, nll_loss=2.680, ppl=6.41, wps=52363, ups=3, wpb=15174.485, bsz=556.890, num_updates=19438, lr=0.000226816, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5803, train_wall=5217 | |
| epoch 003: 1730 / 8862 loss=4.313, nll_loss=2.680, ppl=6.41, wps=52366, ups=3, wpb=15175.886, bsz=556.603, num_updates=19448, lr=0.000226758, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5806, train_wall=5220 | |
| epoch 003: 1740 / 8862 loss=4.312, nll_loss=2.679, ppl=6.41, wps=52373, ups=3, wpb=15178.148, bsz=556.834, num_updates=19458, lr=0.0002267, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5809, train_wall=5223 | |
| epoch 003: 1750 / 8862 loss=4.312, nll_loss=2.679, ppl=6.40, wps=52375, ups=3, wpb=15179.037, bsz=556.921, num_updates=19468, lr=0.000226641, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5811, train_wall=5225 | |
| epoch 003: 1760 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52369, ups=3, wpb=15179.306, bsz=557.978, num_updates=19478, lr=0.000226583, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5814, train_wall=5228 | |
| epoch 003: 1770 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52368, ups=3, wpb=15179.170, bsz=558.288, num_updates=19488, lr=0.000226525, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5817, train_wall=5231 | |
| epoch 003: 1780 / 8862 loss=4.311, nll_loss=2.677, ppl=6.40, wps=52376, ups=3, wpb=15180.587, bsz=558.055, num_updates=19498, lr=0.000226467, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5820, train_wall=5233 | |
| epoch 003: 1790 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52380, ups=3, wpb=15181.585, bsz=557.655, num_updates=19508, lr=0.000226409, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5823, train_wall=5236 | |
| epoch 003: 1800 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52380, ups=3, wpb=15181.208, bsz=557.157, num_updates=19518, lr=0.000226351, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5826, train_wall=5239 | |
| epoch 003: 1810 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52382, ups=3, wpb=15181.644, bsz=556.965, num_updates=19528, lr=0.000226293, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5829, train_wall=5241 | |
| epoch 003: 1820 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52384, ups=3, wpb=15182.338, bsz=557.092, num_updates=19538, lr=0.000226235, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5832, train_wall=5244 | |
| epoch 003: 1830 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52384, ups=3, wpb=15181.830, bsz=556.924, num_updates=19548, lr=0.000226177, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5835, train_wall=5247 | |
| epoch 003: 1840 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52377, ups=3, wpb=15179.361, bsz=556.515, num_updates=19558, lr=0.000226119, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5838, train_wall=5249 | |
| epoch 003: 1850 / 8862 loss=4.311, nll_loss=2.678, ppl=6.40, wps=52372, ups=3, wpb=15178.668, bsz=557.394, num_updates=19568, lr=0.000226062, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5840, train_wall=5252 | |
| epoch 003: 1860 / 8862 loss=4.311, nll_loss=2.677, ppl=6.40, wps=52374, ups=3, wpb=15180.088, bsz=557.515, num_updates=19578, lr=0.000226004, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5843, train_wall=5255 | |
| epoch 003: 1870 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52376, ups=3, wpb=15180.340, bsz=557.375, num_updates=19588, lr=0.000225946, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5846, train_wall=5257 | |
| epoch 003: 1880 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52374, ups=3, wpb=15180.035, bsz=557.635, num_updates=19598, lr=0.000225889, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5849, train_wall=5260 | |
| epoch 003: 1890 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52374, ups=3, wpb=15180.298, bsz=557.754, num_updates=19608, lr=0.000225831, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5852, train_wall=5263 | |
| epoch 003: 1900 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52375, ups=3, wpb=15180.665, bsz=557.694, num_updates=19618, lr=0.000225773, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5855, train_wall=5265 | |
| epoch 003: 1910 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52376, ups=3, wpb=15180.823, bsz=557.597, num_updates=19628, lr=0.000225716, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5858, train_wall=5268 | |
| epoch 003: 1920 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52376, ups=3, wpb=15180.874, bsz=557.772, num_updates=19638, lr=0.000225658, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5861, train_wall=5271 | |
| epoch 003: 1930 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52373, ups=3, wpb=15179.937, bsz=557.527, num_updates=19648, lr=0.000225601, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5864, train_wall=5273 | |
| epoch 003: 1940 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52373, ups=3, wpb=15179.930, bsz=557.577, num_updates=19658, lr=0.000225544, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5867, train_wall=5276 | |
| epoch 003: 1950 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52375, ups=3, wpb=15180.119, bsz=557.519, num_updates=19668, lr=0.000225486, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5869, train_wall=5279 | |
| epoch 003: 1960 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52377, ups=3, wpb=15180.710, bsz=557.642, num_updates=19678, lr=0.000225429, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5872, train_wall=5281 | |
| epoch 003: 1970 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52367, ups=3, wpb=15177.935, bsz=557.475, num_updates=19688, lr=0.000225372, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5875, train_wall=5284 | |
| epoch 003: 1980 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52364, ups=3, wpb=15176.964, bsz=557.424, num_updates=19698, lr=0.000225314, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5878, train_wall=5287 | |
| epoch 003: 1990 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52362, ups=3, wpb=15176.100, bsz=557.139, num_updates=19708, lr=0.000225257, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5881, train_wall=5290 | |
| epoch 003: 2000 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52360, ups=3, wpb=15175.607, bsz=557.025, num_updates=19718, lr=0.0002252, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5884, train_wall=5292 | |
| epoch 003: 2010 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52359, ups=3, wpb=15175.106, bsz=556.933, num_updates=19728, lr=0.000225143, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5887, train_wall=5295 | |
| epoch 003: 2020 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52356, ups=3, wpb=15175.451, bsz=557.281, num_updates=19738, lr=0.000225086, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5890, train_wall=5298 | |
| epoch 003: 2030 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52356, ups=3, wpb=15175.054, bsz=556.947, num_updates=19748, lr=0.000225029, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5893, train_wall=5300 | |
| epoch 003: 2040 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52357, ups=3, wpb=15175.184, bsz=556.864, num_updates=19758, lr=0.000224972, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5896, train_wall=5303 | |
| epoch 003: 2050 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52363, ups=3, wpb=15176.908, bsz=556.805, num_updates=19768, lr=0.000224915, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5898, train_wall=5306 | |
| epoch 003: 2060 / 8862 loss=4.310, nll_loss=2.676, ppl=6.39, wps=52361, ups=3, wpb=15176.037, bsz=556.689, num_updates=19778, lr=0.000224858, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5901, train_wall=5308 | |
| epoch 003: 2070 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52354, ups=3, wpb=15174.029, bsz=556.767, num_updates=19788, lr=0.000224801, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5904, train_wall=5311 | |
| epoch 003: 2080 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52356, ups=3, wpb=15174.487, bsz=556.682, num_updates=19798, lr=0.000224745, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5907, train_wall=5314 | |
| epoch 003: 2090 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52352, ups=3, wpb=15173.886, bsz=557.050, num_updates=19808, lr=0.000224688, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5910, train_wall=5316 | |
| epoch 003: 2100 / 8862 loss=4.310, nll_loss=2.677, ppl=6.40, wps=52356, ups=3, wpb=15174.963, bsz=556.969, num_updates=19818, lr=0.000224631, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=5913, train_wall=5319 | |
| epoch 003: 2110 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52359, ups=3, wpb=15175.934, bsz=557.158, num_updates=19828, lr=0.000224575, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5916, train_wall=5322 | |
| epoch 003: 2120 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52356, ups=3, wpb=15175.087, bsz=557.326, num_updates=19838, lr=0.000224518, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5919, train_wall=5324 | |
| epoch 003: 2130 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52354, ups=3, wpb=15174.194, bsz=557.226, num_updates=19848, lr=0.000224461, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5922, train_wall=5327 | |
| epoch 003: 2140 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52353, ups=3, wpb=15173.538, bsz=557.336, num_updates=19858, lr=0.000224405, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5924, train_wall=5330 | |
| epoch 003: 2150 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52353, ups=3, wpb=15173.325, bsz=557.694, num_updates=19868, lr=0.000224348, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5927, train_wall=5332 | |
| epoch 003: 2160 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52354, ups=3, wpb=15173.638, bsz=557.808, num_updates=19878, lr=0.000224292, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5930, train_wall=5335 | |
| epoch 003: 2170 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52346, ups=3, wpb=15172.239, bsz=557.620, num_updates=19888, lr=0.000224236, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5933, train_wall=5338 | |
| epoch 003: 2180 / 8862 loss=4.310, nll_loss=2.677, ppl=6.39, wps=52346, ups=3, wpb=15173.217, bsz=557.513, num_updates=19898, lr=0.000224179, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5936, train_wall=5340 | |
| epoch 003: 2190 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52345, ups=3, wpb=15173.834, bsz=557.630, num_updates=19908, lr=0.000224123, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5939, train_wall=5343 | |
| epoch 003: 2200 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52347, ups=3, wpb=15173.852, bsz=557.136, num_updates=19918, lr=0.000224067, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5942, train_wall=5346 | |
| epoch 003: 2210 / 8862 loss=4.309, nll_loss=2.676, ppl=6.39, wps=52347, ups=3, wpb=15174.022, bsz=556.972, num_updates=19928, lr=0.00022401, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5945, train_wall=5349 | |
| epoch 003: 2220 / 8862 loss=4.308, nll_loss=2.675, ppl=6.39, wps=52351, ups=3, wpb=15175.241, bsz=557.100, num_updates=19938, lr=0.000223954, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5948, train_wall=5351 | |
| epoch 003: 2230 / 8862 loss=4.308, nll_loss=2.675, ppl=6.39, wps=52356, ups=3, wpb=15176.489, bsz=557.099, num_updates=19948, lr=0.000223898, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5951, train_wall=5354 | |
| epoch 003: 2240 / 8862 loss=4.308, nll_loss=2.675, ppl=6.39, wps=52355, ups=3, wpb=15176.201, bsz=557.398, num_updates=19958, lr=0.000223842, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5954, train_wall=5357 | |
| epoch 003: 2250 / 8862 loss=4.308, nll_loss=2.675, ppl=6.39, wps=52356, ups=3, wpb=15176.803, bsz=557.462, num_updates=19968, lr=0.000223786, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5956, train_wall=5359 | |
| epoch 003: 2260 / 8862 loss=4.308, nll_loss=2.675, ppl=6.39, wps=52356, ups=3, wpb=15176.693, bsz=557.587, num_updates=19978, lr=0.00022373, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5959, train_wall=5362 | |
| epoch 003: 2270 / 8862 loss=4.308, nll_loss=2.675, ppl=6.39, wps=52356, ups=3, wpb=15177.339, bsz=557.770, num_updates=19988, lr=0.000223674, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5962, train_wall=5365 | |
| epoch 003: 2280 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52354, ups=3, wpb=15178.099, bsz=558.089, num_updates=19998, lr=0.000223618, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5965, train_wall=5367 | |
| epoch 003: 2290 / 8862 loss=4.308, nll_loss=2.675, ppl=6.38, wps=52354, ups=3, wpb=15177.636, bsz=558.083, num_updates=20008, lr=0.000223562, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5968, train_wall=5370 | |
| epoch 003: 2300 / 8862 loss=4.308, nll_loss=2.675, ppl=6.38, wps=52355, ups=3, wpb=15178.485, bsz=558.067, num_updates=20018, lr=0.000223506, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5971, train_wall=5373 | |
| epoch 003: 2310 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52360, ups=3, wpb=15180.006, bsz=557.892, num_updates=20028, lr=0.00022345, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5974, train_wall=5375 | |
| epoch 003: 2320 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52360, ups=3, wpb=15180.216, bsz=558.039, num_updates=20038, lr=0.000223395, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5977, train_wall=5378 | |
| epoch 003: 2330 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52360, ups=3, wpb=15180.032, bsz=557.766, num_updates=20048, lr=0.000223339, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5980, train_wall=5381 | |
| epoch 003: 2340 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52362, ups=3, wpb=15180.692, bsz=558.025, num_updates=20058, lr=0.000223283, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5983, train_wall=5383 | |
| epoch 003: 2350 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52359, ups=3, wpb=15179.144, bsz=557.795, num_updates=20068, lr=0.000223228, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=5986, train_wall=5386 | |
| epoch 003: 2360 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52361, ups=3, wpb=15179.073, bsz=557.401, num_updates=20078, lr=0.000223172, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5988, train_wall=5389 | |
| epoch 003: 2370 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52368, ups=3, wpb=15180.477, bsz=557.588, num_updates=20088, lr=0.000223116, gnorm=0.514, clip=0.000, oom=0.000, loss_scale=8.000, wall=5991, train_wall=5391 | |
| epoch 003: 2380 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52369, ups=3, wpb=15180.369, bsz=557.581, num_updates=20098, lr=0.000223061, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=5994, train_wall=5394 | |
| epoch 003: 2390 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52364, ups=3, wpb=15178.714, bsz=557.942, num_updates=20108, lr=0.000223005, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=5997, train_wall=5397 | |
| epoch 003: 2400 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52367, ups=3, wpb=15179.146, bsz=557.864, num_updates=20118, lr=0.00022295, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6000, train_wall=5399 | |
| epoch 003: 2410 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52368, ups=3, wpb=15179.055, bsz=557.837, num_updates=20128, lr=0.000222895, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6003, train_wall=5402 | |
| epoch 003: 2420 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52369, ups=3, wpb=15178.701, bsz=557.803, num_updates=20138, lr=0.000222839, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6006, train_wall=5405 | |
| epoch 003: 2430 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52370, ups=3, wpb=15178.309, bsz=557.529, num_updates=20148, lr=0.000222784, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6009, train_wall=5407 | |
| epoch 003: 2440 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52372, ups=3, wpb=15178.223, bsz=557.267, num_updates=20158, lr=0.000222729, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6011, train_wall=5410 | |
| epoch 003: 2450 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52371, ups=3, wpb=15177.137, bsz=556.896, num_updates=20168, lr=0.000222674, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6014, train_wall=5413 | |
| epoch 003: 2460 / 8862 loss=4.308, nll_loss=2.674, ppl=6.38, wps=52375, ups=3, wpb=15177.815, bsz=556.662, num_updates=20178, lr=0.000222618, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6017, train_wall=5415 | |
| epoch 003: 2470 / 8862 loss=4.307, nll_loss=2.674, ppl=6.38, wps=52374, ups=3, wpb=15177.023, bsz=556.707, num_updates=20188, lr=0.000222563, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6020, train_wall=5418 | |
| epoch 003: 2480 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52375, ups=3, wpb=15178.111, bsz=557.482, num_updates=20198, lr=0.000222508, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6023, train_wall=5421 | |
| epoch 003: 2490 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52379, ups=3, wpb=15178.485, bsz=557.212, num_updates=20208, lr=0.000222453, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6026, train_wall=5423 | |
| epoch 003: 2500 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52381, ups=3, wpb=15178.542, bsz=557.185, num_updates=20218, lr=0.000222398, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6029, train_wall=5426 | |
| epoch 003: 2510 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52384, ups=3, wpb=15179.033, bsz=557.107, num_updates=20228, lr=0.000222343, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6032, train_wall=5429 | |
| epoch 003: 2520 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52388, ups=3, wpb=15179.627, bsz=557.223, num_updates=20238, lr=0.000222288, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6034, train_wall=5431 | |
| epoch 003: 2530 / 8862 loss=4.307, nll_loss=2.673, ppl=6.38, wps=52392, ups=3, wpb=15180.286, bsz=557.354, num_updates=20248, lr=0.000222233, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6037, train_wall=5434 | |
| epoch 003: 2540 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52393, ups=3, wpb=15180.471, bsz=557.466, num_updates=20258, lr=0.000222178, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6040, train_wall=5437 | |
| epoch 003: 2550 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52391, ups=3, wpb=15179.249, bsz=557.221, num_updates=20268, lr=0.000222124, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6043, train_wall=5439 | |
| epoch 003: 2560 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52391, ups=3, wpb=15178.648, bsz=557.248, num_updates=20278, lr=0.000222069, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6046, train_wall=5442 | |
| epoch 003: 2570 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52395, ups=3, wpb=15179.671, bsz=557.355, num_updates=20288, lr=0.000222014, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6049, train_wall=5444 | |
| epoch 003: 2580 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52394, ups=3, wpb=15179.104, bsz=557.446, num_updates=20298, lr=0.000221959, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6052, train_wall=5447 | |
| epoch 003: 2590 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52395, ups=3, wpb=15178.856, bsz=557.690, num_updates=20308, lr=0.000221905, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6055, train_wall=5450 | |
| epoch 003: 2600 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52397, ups=3, wpb=15179.275, bsz=557.752, num_updates=20318, lr=0.00022185, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6057, train_wall=5452 | |
| epoch 003: 2610 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52399, ups=3, wpb=15179.722, bsz=557.987, num_updates=20328, lr=0.000221795, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6060, train_wall=5455 | |
| epoch 003: 2620 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52395, ups=3, wpb=15177.975, bsz=557.854, num_updates=20338, lr=0.000221741, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6063, train_wall=5458 | |
| epoch 003: 2630 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52395, ups=3, wpb=15178.269, bsz=557.905, num_updates=20348, lr=0.000221686, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6066, train_wall=5460 | |
| epoch 003: 2640 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52396, ups=3, wpb=15178.885, bsz=558.058, num_updates=20358, lr=0.000221632, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6069, train_wall=5463 | |
| epoch 003: 2650 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52399, ups=3, wpb=15179.819, bsz=557.993, num_updates=20368, lr=0.000221578, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=8.000, wall=6072, train_wall=5466 | |
| WARNING: overflow detected, setting loss scale to: 4.0 | |
| epoch 003: 2660 / 8862 loss=4.306, nll_loss=2.673, ppl=6.38, wps=52377, ups=3, wpb=15178.456, bsz=557.850, num_updates=20377, lr=0.000221529, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6075, train_wall=5469 | |
| epoch 003: 2670 / 8862 loss=4.306, nll_loss=2.672, ppl=6.38, wps=52378, ups=3, wpb=15178.785, bsz=557.708, num_updates=20387, lr=0.000221474, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6078, train_wall=5471 | |
| epoch 003: 2680 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52381, ups=3, wpb=15179.857, bsz=557.934, num_updates=20397, lr=0.00022142, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6081, train_wall=5474 | |
| epoch 003: 2690 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52379, ups=3, wpb=15179.196, bsz=557.984, num_updates=20407, lr=0.000221366, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6084, train_wall=5477 | |
| epoch 003: 2700 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52379, ups=3, wpb=15178.734, bsz=558.059, num_updates=20417, lr=0.000221312, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6086, train_wall=5479 | |
| epoch 003: 2710 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52372, ups=3, wpb=15177.349, bsz=558.430, num_updates=20427, lr=0.000221257, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6089, train_wall=5482 | |
| epoch 003: 2720 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52369, ups=3, wpb=15176.579, bsz=558.441, num_updates=20437, lr=0.000221203, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6092, train_wall=5485 | |
| epoch 003: 2730 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52367, ups=3, wpb=15176.370, bsz=558.139, num_updates=20447, lr=0.000221149, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6095, train_wall=5487 | |
| epoch 003: 2740 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52366, ups=3, wpb=15175.984, bsz=558.041, num_updates=20457, lr=0.000221095, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6098, train_wall=5490 | |
| epoch 003: 2750 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52365, ups=3, wpb=15175.315, bsz=557.894, num_updates=20467, lr=0.000221041, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6101, train_wall=5493 | |
| epoch 003: 2760 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52364, ups=3, wpb=15174.787, bsz=557.945, num_updates=20477, lr=0.000220987, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6104, train_wall=5495 | |
| epoch 003: 2770 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52361, ups=3, wpb=15174.410, bsz=558.085, num_updates=20487, lr=0.000220933, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6107, train_wall=5498 | |
| epoch 003: 2780 / 8862 loss=4.305, nll_loss=2.671, ppl=6.37, wps=52360, ups=3, wpb=15174.641, bsz=558.553, num_updates=20497, lr=0.000220879, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6110, train_wall=5501 | |
| epoch 003: 2790 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52362, ups=3, wpb=15175.145, bsz=558.280, num_updates=20507, lr=0.000220825, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6113, train_wall=5503 | |
| epoch 003: 2800 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52362, ups=3, wpb=15175.403, bsz=558.600, num_updates=20517, lr=0.000220772, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6115, train_wall=5506 | |
| epoch 003: 2810 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52363, ups=3, wpb=15175.966, bsz=558.670, num_updates=20527, lr=0.000220718, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6118, train_wall=5509 | |
| epoch 003: 2820 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52360, ups=3, wpb=15174.894, bsz=558.752, num_updates=20537, lr=0.000220664, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6121, train_wall=5511 | |
| epoch 003: 2830 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52361, ups=3, wpb=15174.673, bsz=558.680, num_updates=20547, lr=0.00022061, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6124, train_wall=5514 | |
| epoch 003: 2840 / 8862 loss=4.306, nll_loss=2.672, ppl=6.37, wps=52361, ups=3, wpb=15175.131, bsz=558.910, num_updates=20557, lr=0.000220557, gnorm=0.513, clip=0.000, oom=0.000, loss_scale=4.000, wall=6127, train_wall=5517 | |
| epoch 003: 2850 / 8862 loss=4.306, nll_loss=2.672, ppl=6.38, wps=52360, ups=3, wpb=15174.962, bsz=558.689, num_updates=20567, lr=0.000220503, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6130, train_wall=5519 | |
| epoch 003: 2860 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52363, ups=3, wpb=15175.494, bsz=559.035, num_updates=20577, lr=0.000220449, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6133, train_wall=5522 | |
| epoch 003: 2870 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52360, ups=3, wpb=15175.140, bsz=559.024, num_updates=20587, lr=0.000220396, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6136, train_wall=5525 | |
| epoch 003: 2880 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52362, ups=3, wpb=15175.278, bsz=558.922, num_updates=20597, lr=0.000220342, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6139, train_wall=5527 | |
| epoch 003: 2890 / 8862 loss=4.305, nll_loss=2.672, ppl=6.37, wps=52360, ups=3, wpb=15175.214, bsz=558.771, num_updates=20607, lr=0.000220289, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6142, train_wall=5530 | |
| epoch 003: 2900 / 8862 loss=4.305, nll_loss=2.671, ppl=6.37, wps=52359, ups=3, wpb=15175.012, bsz=558.742, num_updates=20617, lr=0.000220235, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6144, train_wall=5533 | |
| epoch 003: 2910 / 8862 loss=4.304, nll_loss=2.671, ppl=6.37, wps=52357, ups=3, wpb=15174.631, bsz=558.903, num_updates=20627, lr=0.000220182, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6147, train_wall=5536 | |
| epoch 003: 2920 / 8862 loss=4.304, nll_loss=2.671, ppl=6.37, wps=52360, ups=3, wpb=15175.158, bsz=559.005, num_updates=20637, lr=0.000220129, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6150, train_wall=5538 | |
| epoch 003: 2930 / 8862 loss=4.304, nll_loss=2.670, ppl=6.37, wps=52358, ups=3, wpb=15174.796, bsz=558.894, num_updates=20647, lr=0.000220075, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6153, train_wall=5541 | |
| epoch 003: 2940 / 8862 loss=4.304, nll_loss=2.670, ppl=6.37, wps=52359, ups=3, wpb=15175.002, bsz=558.925, num_updates=20657, lr=0.000220022, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6156, train_wall=5544 | |
| epoch 003: 2950 / 8862 loss=4.303, nll_loss=2.670, ppl=6.36, wps=52359, ups=3, wpb=15174.720, bsz=558.815, num_updates=20667, lr=0.000219969, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6159, train_wall=5546 | |
| epoch 003: 2960 / 8862 loss=4.303, nll_loss=2.670, ppl=6.36, wps=52358, ups=3, wpb=15174.781, bsz=558.832, num_updates=20677, lr=0.000219916, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6162, train_wall=5549 | |
| epoch 003: 2970 / 8862 loss=4.303, nll_loss=2.670, ppl=6.36, wps=52350, ups=3, wpb=15173.046, bsz=559.327, num_updates=20687, lr=0.000219863, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6165, train_wall=5552 | |
| epoch 003: 2980 / 8862 loss=4.303, nll_loss=2.670, ppl=6.36, wps=52344, ups=3, wpb=15172.891, bsz=559.820, num_updates=20697, lr=0.000219809, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6168, train_wall=5554 | |
| epoch 003: 2990 / 8862 loss=4.303, nll_loss=2.670, ppl=6.36, wps=52344, ups=3, wpb=15173.054, bsz=559.936, num_updates=20707, lr=0.000219756, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6171, train_wall=5557 | |
| epoch 003: 3000 / 8862 loss=4.303, nll_loss=2.669, ppl=6.36, wps=52339, ups=3, wpb=15173.013, bsz=560.547, num_updates=20717, lr=0.000219703, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6174, train_wall=5560 | |
| epoch 003: 3010 / 8862 loss=4.303, nll_loss=2.669, ppl=6.36, wps=52339, ups=3, wpb=15172.425, bsz=560.372, num_updates=20727, lr=0.00021965, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6177, train_wall=5562 | |
| epoch 003: 3020 / 8862 loss=4.303, nll_loss=2.669, ppl=6.36, wps=52340, ups=3, wpb=15173.162, bsz=560.403, num_updates=20737, lr=0.000219597, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6179, train_wall=5565 | |
| epoch 003: 3030 / 8862 loss=4.302, nll_loss=2.668, ppl=6.36, wps=52342, ups=3, wpb=15174.106, bsz=560.486, num_updates=20747, lr=0.000219544, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6182, train_wall=5568 | |
| epoch 003: 3040 / 8862 loss=4.302, nll_loss=2.668, ppl=6.36, wps=52343, ups=3, wpb=15174.623, bsz=560.558, num_updates=20757, lr=0.000219492, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6185, train_wall=5571 | |
| epoch 003: 3050 / 8862 loss=4.302, nll_loss=2.668, ppl=6.36, wps=52343, ups=3, wpb=15174.496, bsz=560.470, num_updates=20767, lr=0.000219439, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6188, train_wall=5573 | |
| epoch 003: 3060 / 8862 loss=4.302, nll_loss=2.668, ppl=6.36, wps=52336, ups=3, wpb=15172.944, bsz=560.617, num_updates=20777, lr=0.000219386, gnorm=0.512, clip=0.000, oom=0.000, loss_scale=4.000, wall=6191, train_wall=5576 | |
| epoch 003: 3070 / 8862 loss=4.302, nll_loss=2.668, ppl=6.35, wps=52339, ups=3, wpb=15174.127, bsz=560.750, num_updates=20787, lr=0.000219333, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6194, train_wall=5579 | |
| epoch 003: 3080 / 8862 loss=4.301, nll_loss=2.668, ppl=6.35, wps=52339, ups=3, wpb=15173.990, bsz=560.683, num_updates=20797, lr=0.00021928, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6197, train_wall=5581 | |
| epoch 003: 3090 / 8862 loss=4.301, nll_loss=2.667, ppl=6.35, wps=52342, ups=3, wpb=15174.723, bsz=560.694, num_updates=20807, lr=0.000219228, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6200, train_wall=5584 | |
| epoch 003: 3100 / 8862 loss=4.301, nll_loss=2.667, ppl=6.35, wps=52342, ups=3, wpb=15174.709, bsz=560.583, num_updates=20817, lr=0.000219175, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6203, train_wall=5587 | |
| epoch 003: 3110 / 8862 loss=4.301, nll_loss=2.667, ppl=6.35, wps=52346, ups=3, wpb=15175.901, bsz=560.885, num_updates=20827, lr=0.000219122, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6206, train_wall=5589 | |
| epoch 003: 3120 / 8862 loss=4.301, nll_loss=2.667, ppl=6.35, wps=52344, ups=3, wpb=15175.879, bsz=560.664, num_updates=20837, lr=0.00021907, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6209, train_wall=5592 | |
| epoch 003: 3130 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52347, ups=3, wpb=15176.566, bsz=560.644, num_updates=20847, lr=0.000219017, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6211, train_wall=5595 | |
| epoch 003: 3140 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52347, ups=3, wpb=15176.878, bsz=560.538, num_updates=20857, lr=0.000218965, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6214, train_wall=5597 | |
| epoch 003: 3150 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52348, ups=3, wpb=15177.663, bsz=560.510, num_updates=20867, lr=0.000218912, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6217, train_wall=5600 | |
| epoch 003: 3160 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52348, ups=3, wpb=15177.797, bsz=560.709, num_updates=20877, lr=0.00021886, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6220, train_wall=5603 | |
| epoch 003: 3170 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52352, ups=3, wpb=15179.210, bsz=560.719, num_updates=20887, lr=0.000218807, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6223, train_wall=5605 | |
| epoch 003: 3180 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52352, ups=3, wpb=15178.923, bsz=560.747, num_updates=20897, lr=0.000218755, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6226, train_wall=5608 | |
| epoch 003: 3190 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52351, ups=3, wpb=15179.227, bsz=561.061, num_updates=20907, lr=0.000218703, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6229, train_wall=5611 | |
| epoch 003: 3200 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52348, ups=3, wpb=15178.441, bsz=561.058, num_updates=20917, lr=0.00021865, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6232, train_wall=5614 | |
| epoch 003: 3210 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52349, ups=3, wpb=15178.978, bsz=561.089, num_updates=20927, lr=0.000218598, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6235, train_wall=5616 | |
| epoch 003: 3220 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52349, ups=3, wpb=15178.864, bsz=561.220, num_updates=20937, lr=0.000218546, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6238, train_wall=5619 | |
| epoch 003: 3230 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52351, ups=3, wpb=15179.558, bsz=561.070, num_updates=20947, lr=0.000218494, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6241, train_wall=5622 | |
| epoch 003: 3240 / 8862 loss=4.300, nll_loss=2.666, ppl=6.35, wps=52349, ups=3, wpb=15179.460, bsz=561.491, num_updates=20957, lr=0.000218442, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6243, train_wall=5624 | |
| epoch 003: 3250 / 8862 loss=4.299, nll_loss=2.665, ppl=6.34, wps=52346, ups=3, wpb=15178.967, bsz=561.735, num_updates=20967, lr=0.00021839, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6246, train_wall=5627 | |
| epoch 003: 3260 / 8862 loss=4.299, nll_loss=2.665, ppl=6.34, wps=52343, ups=3, wpb=15178.504, bsz=561.568, num_updates=20977, lr=0.000218337, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6249, train_wall=5630 | |
| epoch 003: 3270 / 8862 loss=4.299, nll_loss=2.665, ppl=6.34, wps=52346, ups=3, wpb=15179.181, bsz=561.556, num_updates=20987, lr=0.000218285, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6252, train_wall=5632 | |
| epoch 003: 3280 / 8862 loss=4.299, nll_loss=2.665, ppl=6.34, wps=52346, ups=3, wpb=15179.059, bsz=561.466, num_updates=20997, lr=0.000218233, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6255, train_wall=5635 | |
| epoch 003: 3290 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52349, ups=3, wpb=15180.383, bsz=561.773, num_updates=21007, lr=0.000218182, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6258, train_wall=5638 | |
| epoch 003: 3300 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52352, ups=3, wpb=15180.702, bsz=561.678, num_updates=21017, lr=0.00021813, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6261, train_wall=5640 | |
| epoch 003: 3310 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52351, ups=3, wpb=15180.298, bsz=561.774, num_updates=21027, lr=0.000218078, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6264, train_wall=5643 | |
| epoch 003: 3320 / 8862 loss=4.299, nll_loss=2.665, ppl=6.34, wps=52351, ups=3, wpb=15179.808, bsz=561.747, num_updates=21037, lr=0.000218026, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6267, train_wall=5646 | |
| epoch 003: 3330 / 8862 loss=4.298, nll_loss=2.665, ppl=6.34, wps=52348, ups=3, wpb=15179.311, bsz=561.641, num_updates=21047, lr=0.000217974, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6270, train_wall=5648 | |
| epoch 003: 3340 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52349, ups=3, wpb=15179.456, bsz=561.832, num_updates=21057, lr=0.000217922, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6272, train_wall=5651 | |
| epoch 003: 3350 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52348, ups=3, wpb=15178.964, bsz=561.922, num_updates=21067, lr=0.000217871, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6275, train_wall=5654 | |
| epoch 003: 3360 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52348, ups=3, wpb=15179.150, bsz=561.948, num_updates=21077, lr=0.000217819, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6278, train_wall=5656 | |
| epoch 003: 3370 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52346, ups=3, wpb=15178.471, bsz=561.871, num_updates=21087, lr=0.000217767, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6281, train_wall=5659 | |
| epoch 003: 3380 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52345, ups=3, wpb=15177.958, bsz=561.844, num_updates=21097, lr=0.000217716, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6284, train_wall=5662 | |
| epoch 003: 3390 / 8862 loss=4.297, nll_loss=2.663, ppl=6.34, wps=52344, ups=3, wpb=15177.700, bsz=561.784, num_updates=21107, lr=0.000217664, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6287, train_wall=5665 | |
| epoch 003: 3400 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52344, ups=3, wpb=15178.192, bsz=562.179, num_updates=21117, lr=0.000217613, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6290, train_wall=5667 | |
| epoch 003: 3410 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52345, ups=3, wpb=15178.270, bsz=562.013, num_updates=21127, lr=0.000217561, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6293, train_wall=5670 | |
| epoch 003: 3420 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52345, ups=3, wpb=15178.126, bsz=561.913, num_updates=21137, lr=0.00021751, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6296, train_wall=5673 | |
| epoch 003: 3430 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52343, ups=3, wpb=15177.710, bsz=562.031, num_updates=21147, lr=0.000217458, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6299, train_wall=5675 | |
| epoch 003: 3440 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52344, ups=3, wpb=15177.784, bsz=562.307, num_updates=21157, lr=0.000217407, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6301, train_wall=5678 | |
| epoch 003: 3450 / 8862 loss=4.297, nll_loss=2.663, ppl=6.34, wps=52343, ups=3, wpb=15177.342, bsz=562.191, num_updates=21167, lr=0.000217355, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6304, train_wall=5681 | |
| epoch 003: 3460 / 8862 loss=4.297, nll_loss=2.663, ppl=6.34, wps=52344, ups=3, wpb=15177.473, bsz=562.150, num_updates=21177, lr=0.000217304, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6307, train_wall=5683 | |
| epoch 003: 3470 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52344, ups=3, wpb=15177.402, bsz=562.086, num_updates=21187, lr=0.000217253, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6310, train_wall=5686 | |
| epoch 003: 3480 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52345, ups=3, wpb=15177.987, bsz=562.179, num_updates=21197, lr=0.000217201, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6313, train_wall=5689 | |
| epoch 003: 3490 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52347, ups=3, wpb=15178.275, bsz=562.033, num_updates=21207, lr=0.00021715, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6316, train_wall=5691 | |
| epoch 003: 3500 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52348, ups=3, wpb=15178.360, bsz=561.931, num_updates=21217, lr=0.000217099, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6319, train_wall=5694 | |
| epoch 003: 3510 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52347, ups=3, wpb=15178.369, bsz=561.887, num_updates=21227, lr=0.000217048, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6322, train_wall=5697 | |
| epoch 003: 3520 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52347, ups=3, wpb=15178.016, bsz=561.734, num_updates=21237, lr=0.000216997, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6325, train_wall=5699 | |
| epoch 003: 3530 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52347, ups=3, wpb=15177.523, bsz=561.686, num_updates=21247, lr=0.000216946, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6327, train_wall=5702 | |
| epoch 003: 3540 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52348, ups=3, wpb=15177.770, bsz=561.501, num_updates=21257, lr=0.000216895, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6330, train_wall=5705 | |
| epoch 003: 3550 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52351, ups=3, wpb=15178.543, bsz=561.305, num_updates=21267, lr=0.000216844, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6333, train_wall=5707 | |
| epoch 003: 3560 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52353, ups=3, wpb=15179.001, bsz=561.200, num_updates=21277, lr=0.000216793, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6336, train_wall=5710 | |
| epoch 003: 3570 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52353, ups=3, wpb=15178.810, bsz=561.266, num_updates=21287, lr=0.000216742, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6339, train_wall=5713 | |
| epoch 003: 3580 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52352, ups=3, wpb=15178.916, bsz=561.647, num_updates=21297, lr=0.000216691, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6342, train_wall=5715 | |
| epoch 003: 3590 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52348, ups=3, wpb=15177.408, bsz=561.571, num_updates=21307, lr=0.00021664, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6345, train_wall=5718 | |
| epoch 003: 3600 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52346, ups=3, wpb=15176.840, bsz=561.680, num_updates=21317, lr=0.000216589, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6348, train_wall=5721 | |
| epoch 003: 3610 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52349, ups=3, wpb=15177.418, bsz=561.498, num_updates=21327, lr=0.000216538, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6351, train_wall=5723 | |
| epoch 003: 3620 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52351, ups=3, wpb=15177.687, bsz=561.313, num_updates=21337, lr=0.000216488, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6353, train_wall=5726 | |
| epoch 003: 3630 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52353, ups=3, wpb=15178.397, bsz=561.289, num_updates=21347, lr=0.000216437, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6356, train_wall=5729 | |
| epoch 003: 3640 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52355, ups=3, wpb=15178.789, bsz=561.275, num_updates=21357, lr=0.000216386, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6359, train_wall=5731 | |
| epoch 003: 3650 / 8862 loss=4.297, nll_loss=2.664, ppl=6.34, wps=52357, ups=3, wpb=15179.018, bsz=560.949, num_updates=21367, lr=0.000216336, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6362, train_wall=5734 | |
| epoch 003: 3660 / 8862 loss=4.297, nll_loss=2.664, ppl=6.34, wps=52358, ups=3, wpb=15179.093, bsz=560.789, num_updates=21377, lr=0.000216285, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6365, train_wall=5737 | |
| epoch 003: 3670 / 8862 loss=4.297, nll_loss=2.663, ppl=6.34, wps=52360, ups=3, wpb=15179.399, bsz=560.604, num_updates=21387, lr=0.000216235, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6368, train_wall=5739 | |
| epoch 003: 3680 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52364, ups=3, wpb=15180.339, bsz=560.643, num_updates=21397, lr=0.000216184, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6371, train_wall=5742 | |
| epoch 003: 3690 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52362, ups=3, wpb=15179.776, bsz=560.356, num_updates=21407, lr=0.000216134, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6374, train_wall=5745 | |
| epoch 003: 3700 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52361, ups=3, wpb=15179.321, bsz=560.130, num_updates=21417, lr=0.000216083, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6377, train_wall=5747 | |
| epoch 003: 3710 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52362, ups=3, wpb=15179.554, bsz=560.108, num_updates=21427, lr=0.000216033, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6379, train_wall=5750 | |
| epoch 003: 3720 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52360, ups=3, wpb=15178.541, bsz=560.030, num_updates=21437, lr=0.000215982, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6382, train_wall=5753 | |
| epoch 003: 3730 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52359, ups=3, wpb=15178.592, bsz=559.957, num_updates=21447, lr=0.000215932, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6385, train_wall=5755 | |
| epoch 003: 3740 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52357, ups=3, wpb=15178.873, bsz=560.329, num_updates=21457, lr=0.000215882, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6388, train_wall=5758 | |
| epoch 003: 3750 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52356, ups=3, wpb=15178.336, bsz=560.286, num_updates=21467, lr=0.000215831, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6391, train_wall=5761 | |
| epoch 003: 3760 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52361, ups=3, wpb=15179.227, bsz=560.200, num_updates=21477, lr=0.000215781, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6394, train_wall=5763 | |
| epoch 003: 3770 / 8862 loss=4.297, nll_loss=2.664, ppl=6.34, wps=52360, ups=3, wpb=15178.915, bsz=560.074, num_updates=21487, lr=0.000215731, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6397, train_wall=5766 | |
| epoch 003: 3780 / 8862 loss=4.298, nll_loss=2.664, ppl=6.34, wps=52363, ups=3, wpb=15179.170, bsz=559.951, num_updates=21497, lr=0.000215681, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6400, train_wall=5769 | |
| epoch 003: 3790 / 8862 loss=4.297, nll_loss=2.664, ppl=6.34, wps=52365, ups=3, wpb=15179.680, bsz=559.983, num_updates=21507, lr=0.00021563, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6403, train_wall=5771 | |
| epoch 003: 3800 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52366, ups=3, wpb=15179.961, bsz=560.131, num_updates=21517, lr=0.00021558, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6406, train_wall=5774 | |
| epoch 003: 3810 / 8862 loss=4.297, nll_loss=2.664, ppl=6.34, wps=52367, ups=3, wpb=15179.674, bsz=560.143, num_updates=21527, lr=0.00021553, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6408, train_wall=5777 | |
| epoch 003: 3820 / 8862 loss=4.297, nll_loss=2.663, ppl=6.34, wps=52362, ups=3, wpb=15179.231, bsz=560.482, num_updates=21537, lr=0.00021548, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6411, train_wall=5779 | |
| epoch 003: 3830 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52363, ups=3, wpb=15179.506, bsz=560.411, num_updates=21547, lr=0.00021543, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6414, train_wall=5782 | |
| epoch 003: 3840 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52363, ups=3, wpb=15179.102, bsz=560.435, num_updates=21557, lr=0.00021538, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6417, train_wall=5785 | |
| epoch 003: 3850 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52364, ups=3, wpb=15179.596, bsz=560.553, num_updates=21567, lr=0.00021533, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6420, train_wall=5787 | |
| epoch 003: 3860 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52363, ups=3, wpb=15178.899, bsz=560.591, num_updates=21577, lr=0.00021528, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6423, train_wall=5790 | |
| epoch 003: 3870 / 8862 loss=4.296, nll_loss=2.663, ppl=6.33, wps=52363, ups=3, wpb=15179.147, bsz=560.922, num_updates=21587, lr=0.000215231, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6426, train_wall=5793 | |
| epoch 003: 3880 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52366, ups=3, wpb=15180.271, bsz=561.115, num_updates=21597, lr=0.000215181, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6429, train_wall=5795 | |
| epoch 003: 3890 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52368, ups=3, wpb=15180.881, bsz=561.135, num_updates=21607, lr=0.000215131, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6432, train_wall=5798 | |
| epoch 003: 3900 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52367, ups=3, wpb=15180.039, bsz=560.978, num_updates=21617, lr=0.000215081, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6435, train_wall=5801 | |
| epoch 003: 3910 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52369, ups=3, wpb=15180.598, bsz=560.939, num_updates=21627, lr=0.000215031, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6437, train_wall=5803 | |
| epoch 003: 3920 / 8862 loss=4.297, nll_loss=2.663, ppl=6.33, wps=52367, ups=3, wpb=15179.797, bsz=560.931, num_updates=21637, lr=0.000214982, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6440, train_wall=5806 | |
| epoch 003: 3930 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52369, ups=3, wpb=15180.121, bsz=560.824, num_updates=21647, lr=0.000214932, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6443, train_wall=5809 | |
| epoch 003: 3940 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52369, ups=3, wpb=15179.824, bsz=560.774, num_updates=21657, lr=0.000214882, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6446, train_wall=5811 | |
| epoch 003: 3950 / 8862 loss=4.296, nll_loss=2.663, ppl=6.33, wps=52367, ups=3, wpb=15178.912, bsz=560.747, num_updates=21667, lr=0.000214833, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6449, train_wall=5814 | |
| epoch 003: 3960 / 8862 loss=4.296, nll_loss=2.663, ppl=6.33, wps=52364, ups=3, wpb=15178.224, bsz=561.002, num_updates=21677, lr=0.000214783, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6452, train_wall=5817 | |
| epoch 003: 3970 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52365, ups=3, wpb=15178.374, bsz=560.955, num_updates=21687, lr=0.000214734, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6455, train_wall=5819 | |
| epoch 003: 3980 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52364, ups=3, wpb=15177.710, bsz=560.699, num_updates=21697, lr=0.000214684, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6458, train_wall=5822 | |
| epoch 003: 3990 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52364, ups=3, wpb=15177.869, bsz=560.794, num_updates=21707, lr=0.000214635, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6460, train_wall=5825 | |
| epoch 003: 4000 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52363, ups=3, wpb=15177.371, bsz=560.900, num_updates=21717, lr=0.000214585, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6463, train_wall=5827 | |
| epoch 003: 4010 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52365, ups=3, wpb=15177.580, bsz=560.886, num_updates=21727, lr=0.000214536, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6466, train_wall=5830 | |
| epoch 003: 4020 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52366, ups=3, wpb=15177.711, bsz=560.756, num_updates=21737, lr=0.000214487, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6469, train_wall=5833 | |
| epoch 003: 4030 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52364, ups=3, wpb=15176.858, bsz=560.786, num_updates=21747, lr=0.000214437, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6472, train_wall=5835 | |
| epoch 003: 4040 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52363, ups=3, wpb=15175.975, bsz=560.549, num_updates=21757, lr=0.000214388, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6475, train_wall=5838 | |
| epoch 003: 4050 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52359, ups=3, wpb=15176.296, bsz=561.215, num_updates=21767, lr=0.000214339, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6478, train_wall=5841 | |
| epoch 003: 4060 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52359, ups=3, wpb=15176.540, bsz=561.332, num_updates=21777, lr=0.00021429, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6481, train_wall=5844 | |
| epoch 003: 4070 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52360, ups=3, wpb=15176.688, bsz=561.236, num_updates=21787, lr=0.00021424, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6484, train_wall=5846 | |
| epoch 003: 4080 / 8862 loss=4.295, nll_loss=2.662, ppl=6.33, wps=52359, ups=3, wpb=15176.524, bsz=561.529, num_updates=21797, lr=0.000214191, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6487, train_wall=5849 | |
| epoch 003: 4090 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52359, ups=3, wpb=15176.642, bsz=561.577, num_updates=21807, lr=0.000214142, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6489, train_wall=5852 | |
| epoch 003: 4100 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52360, ups=3, wpb=15176.347, bsz=561.532, num_updates=21817, lr=0.000214093, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6492, train_wall=5854 | |
| epoch 003: 4110 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52356, ups=3, wpb=15175.500, bsz=561.777, num_updates=21827, lr=0.000214044, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6495, train_wall=5857 | |
| epoch 003: 4120 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52356, ups=3, wpb=15175.025, bsz=561.755, num_updates=21837, lr=0.000213995, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6498, train_wall=5860 | |
| epoch 003: 4130 / 8862 loss=4.295, nll_loss=2.662, ppl=6.33, wps=52355, ups=3, wpb=15174.716, bsz=561.885, num_updates=21847, lr=0.000213946, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6501, train_wall=5862 | |
| epoch 003: 4140 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52358, ups=3, wpb=15175.279, bsz=561.820, num_updates=21857, lr=0.000213897, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6504, train_wall=5865 | |
| epoch 003: 4150 / 8862 loss=4.295, nll_loss=2.662, ppl=6.33, wps=52360, ups=3, wpb=15175.286, bsz=561.625, num_updates=21867, lr=0.000213848, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6507, train_wall=5868 | |
| epoch 003: 4160 / 8862 loss=4.296, nll_loss=2.662, ppl=6.33, wps=52361, ups=3, wpb=15175.180, bsz=561.413, num_updates=21877, lr=0.000213799, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6510, train_wall=5870 | |
| epoch 003: 4170 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52358, ups=3, wpb=15174.486, bsz=561.667, num_updates=21887, lr=0.00021375, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6513, train_wall=5873 | |
| epoch 003: 4180 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52361, ups=3, wpb=15175.056, bsz=561.743, num_updates=21897, lr=0.000213702, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6515, train_wall=5876 | |
| epoch 003: 4190 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52361, ups=3, wpb=15174.825, bsz=561.577, num_updates=21907, lr=0.000213653, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6518, train_wall=5878 | |
| epoch 003: 4200 / 8862 loss=4.295, nll_loss=2.662, ppl=6.33, wps=52363, ups=3, wpb=15175.209, bsz=561.571, num_updates=21917, lr=0.000213604, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6521, train_wall=5881 | |
| epoch 003: 4210 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52367, ups=3, wpb=15175.971, bsz=561.520, num_updates=21927, lr=0.000213555, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6524, train_wall=5883 | |
| epoch 003: 4220 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52365, ups=3, wpb=15175.473, bsz=561.666, num_updates=21937, lr=0.000213507, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6527, train_wall=5886 | |
| epoch 003: 4230 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52363, ups=3, wpb=15174.577, bsz=561.685, num_updates=21947, lr=0.000213458, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6530, train_wall=5889 | |
| epoch 003: 4240 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52366, ups=3, wpb=15175.279, bsz=561.701, num_updates=21957, lr=0.000213409, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6533, train_wall=5891 | |
| epoch 003: 4250 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52367, ups=3, wpb=15175.378, bsz=561.760, num_updates=21967, lr=0.000213361, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6536, train_wall=5894 | |
| epoch 003: 4260 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52366, ups=3, wpb=15174.992, bsz=561.767, num_updates=21977, lr=0.000213312, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6538, train_wall=5897 | |
| epoch 003: 4270 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52369, ups=3, wpb=15175.403, bsz=561.555, num_updates=21987, lr=0.000213264, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6541, train_wall=5899 | |
| epoch 003: 4280 / 8862 loss=4.295, nll_loss=2.661, ppl=6.33, wps=52370, ups=3, wpb=15175.600, bsz=561.493, num_updates=21997, lr=0.000213215, gnorm=0.511, clip=0.000, oom=0.000, loss_scale=4.000, wall=6544, train_wall=5902 | |
| epoch 003: 4290 / 8862 loss=4.295, nll_loss=2.661, ppl=6.32, wps=52371, ups=3, wpb=15175.838, bsz=561.396, num_updates=22007, lr=0.000213167, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6547, train_wall=5905 | |
| epoch 003: 4300 / 8862 loss=4.295, nll_loss=2.661, ppl=6.32, wps=52371, ups=3, wpb=15175.746, bsz=561.387, num_updates=22017, lr=0.000213118, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6550, train_wall=5907 | |
| epoch 003: 4310 / 8862 loss=4.295, nll_loss=2.661, ppl=6.32, wps=52369, ups=3, wpb=15175.155, bsz=561.622, num_updates=22027, lr=0.00021307, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6553, train_wall=5910 | |
| epoch 003: 4320 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52370, ups=3, wpb=15175.708, bsz=561.655, num_updates=22037, lr=0.000213022, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6556, train_wall=5913 | |
| epoch 003: 4330 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52371, ups=3, wpb=15175.787, bsz=561.808, num_updates=22047, lr=0.000212973, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6559, train_wall=5916 | |
| epoch 003: 4340 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52373, ups=3, wpb=15175.710, bsz=561.598, num_updates=22057, lr=0.000212925, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6562, train_wall=5918 | |
| epoch 003: 4350 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52377, ups=3, wpb=15176.577, bsz=561.486, num_updates=22067, lr=0.000212877, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6564, train_wall=5921 | |
| epoch 003: 4360 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52381, ups=3, wpb=15177.649, bsz=561.324, num_updates=22077, lr=0.000212829, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6567, train_wall=5923 | |
| epoch 003: 4370 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52382, ups=3, wpb=15177.662, bsz=561.180, num_updates=22087, lr=0.00021278, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6570, train_wall=5926 | |
| epoch 003: 4380 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52381, ups=3, wpb=15177.360, bsz=561.205, num_updates=22097, lr=0.000212732, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6573, train_wall=5929 | |
| epoch 003: 4390 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52381, ups=3, wpb=15177.202, bsz=561.133, num_updates=22107, lr=0.000212684, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6576, train_wall=5931 | |
| epoch 003: 4400 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52380, ups=3, wpb=15176.783, bsz=561.065, num_updates=22117, lr=0.000212636, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6579, train_wall=5934 | |
| epoch 003: 4410 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52383, ups=3, wpb=15177.656, bsz=561.043, num_updates=22127, lr=0.000212588, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6582, train_wall=5937 | |
| epoch 003: 4420 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52385, ups=3, wpb=15177.960, bsz=561.167, num_updates=22137, lr=0.00021254, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6585, train_wall=5939 | |
| epoch 003: 4430 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52386, ups=3, wpb=15178.157, bsz=561.152, num_updates=22147, lr=0.000212492, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6588, train_wall=5942 | |
| epoch 003: 4440 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52385, ups=3, wpb=15177.633, bsz=561.025, num_updates=22157, lr=0.000212444, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6590, train_wall=5945 | |
| epoch 003: 4450 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52383, ups=3, wpb=15176.808, bsz=560.970, num_updates=22167, lr=0.000212396, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6593, train_wall=5947 | |
| epoch 003: 4460 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52383, ups=3, wpb=15176.822, bsz=561.047, num_updates=22177, lr=0.000212348, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6596, train_wall=5950 | |
| epoch 003: 4470 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52385, ups=3, wpb=15177.169, bsz=561.016, num_updates=22187, lr=0.0002123, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6599, train_wall=5953 | |
| epoch 003: 4480 / 8862 loss=4.293, nll_loss=2.660, ppl=6.32, wps=52385, ups=3, wpb=15177.300, bsz=561.032, num_updates=22197, lr=0.000212253, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6602, train_wall=5955 | |
| epoch 003: 4490 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52387, ups=3, wpb=15177.597, bsz=561.022, num_updates=22207, lr=0.000212205, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6605, train_wall=5958 | |
| epoch 003: 4500 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52388, ups=3, wpb=15177.469, bsz=560.862, num_updates=22217, lr=0.000212157, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6608, train_wall=5961 | |
| epoch 003: 4510 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52390, ups=3, wpb=15177.678, bsz=561.025, num_updates=22227, lr=0.000212109, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6611, train_wall=5963 | |
| epoch 003: 4520 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52391, ups=3, wpb=15177.676, bsz=560.851, num_updates=22237, lr=0.000212062, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6613, train_wall=5966 | |
| epoch 003: 4530 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52392, ups=3, wpb=15177.531, bsz=560.637, num_updates=22247, lr=0.000212014, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6616, train_wall=5969 | |
| epoch 003: 4540 / 8862 loss=4.294, nll_loss=2.660, ppl=6.32, wps=52392, ups=3, wpb=15177.419, bsz=560.551, num_updates=22257, lr=0.000211966, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6619, train_wall=5971 | |
| epoch 003: 4550 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52391, ups=3, wpb=15177.908, bsz=560.916, num_updates=22267, lr=0.000211919, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6622, train_wall=5974 | |
| epoch 003: 4560 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52392, ups=3, wpb=15178.338, bsz=561.145, num_updates=22277, lr=0.000211871, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6625, train_wall=5977 | |
| epoch 003: 4570 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52393, ups=3, wpb=15178.350, bsz=561.011, num_updates=22287, lr=0.000211824, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6628, train_wall=5979 | |
| epoch 003: 4580 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52392, ups=3, wpb=15177.584, bsz=560.772, num_updates=22297, lr=0.000211776, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6631, train_wall=5982 | |
| epoch 003: 4590 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52392, ups=3, wpb=15177.276, bsz=560.657, num_updates=22307, lr=0.000211729, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6634, train_wall=5985 | |
| epoch 003: 4600 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52393, ups=3, wpb=15177.790, bsz=560.998, num_updates=22317, lr=0.000211681, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6637, train_wall=5987 | |
| epoch 003: 4610 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52394, ups=3, wpb=15177.900, bsz=560.787, num_updates=22327, lr=0.000211634, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6639, train_wall=5990 | |
| epoch 003: 4620 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52394, ups=3, wpb=15177.848, bsz=560.767, num_updates=22337, lr=0.000211586, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6642, train_wall=5993 | |
| epoch 003: 4630 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52394, ups=3, wpb=15177.792, bsz=560.705, num_updates=22347, lr=0.000211539, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6645, train_wall=5995 | |
| epoch 003: 4640 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52393, ups=3, wpb=15177.081, bsz=560.543, num_updates=22357, lr=0.000211492, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6648, train_wall=5998 | |
| epoch 003: 4650 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52394, ups=3, wpb=15177.197, bsz=560.679, num_updates=22367, lr=0.000211444, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6651, train_wall=6001 | |
| epoch 003: 4660 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52394, ups=3, wpb=15177.148, bsz=560.808, num_updates=22377, lr=0.000211397, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6654, train_wall=6003 | |
| epoch 003: 4670 / 8862 loss=4.293, nll_loss=2.659, ppl=6.31, wps=52396, ups=3, wpb=15177.594, bsz=560.757, num_updates=22387, lr=0.00021135, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6657, train_wall=6006 | |
| epoch 003: 4680 / 8862 loss=4.293, nll_loss=2.659, ppl=6.32, wps=52396, ups=3, wpb=15176.990, bsz=560.810, num_updates=22397, lr=0.000211303, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6660, train_wall=6009 | |
| epoch 003: 4690 / 8862 loss=4.293, nll_loss=2.659, ppl=6.31, wps=52395, ups=3, wpb=15176.567, bsz=560.692, num_updates=22407, lr=0.000211256, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6662, train_wall=6011 | |
| epoch 003: 4700 / 8862 loss=4.293, nll_loss=2.659, ppl=6.31, wps=52396, ups=3, wpb=15176.609, bsz=560.599, num_updates=22417, lr=0.000211208, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6665, train_wall=6014 | |
| epoch 003: 4710 / 8862 loss=4.293, nll_loss=2.659, ppl=6.31, wps=52396, ups=3, wpb=15176.717, bsz=560.794, num_updates=22427, lr=0.000211161, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6668, train_wall=6017 | |
| epoch 003: 4720 / 8862 loss=4.293, nll_loss=2.659, ppl=6.31, wps=52398, ups=3, wpb=15177.096, bsz=560.784, num_updates=22437, lr=0.000211114, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6671, train_wall=6019 | |
| epoch 003: 4730 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52398, ups=3, wpb=15177.057, bsz=560.751, num_updates=22447, lr=0.000211067, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6674, train_wall=6022 | |
| epoch 003: 4740 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52398, ups=3, wpb=15176.713, bsz=560.570, num_updates=22457, lr=0.00021102, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6677, train_wall=6025 | |
| epoch 003: 4750 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52398, ups=3, wpb=15176.672, bsz=560.579, num_updates=22467, lr=0.000210973, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6680, train_wall=6027 | |
| epoch 003: 4760 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52397, ups=3, wpb=15176.530, bsz=560.561, num_updates=22477, lr=0.000210926, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6683, train_wall=6030 | |
| epoch 003: 4770 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52399, ups=3, wpb=15177.015, bsz=560.464, num_updates=22487, lr=0.000210879, gnorm=0.510, clip=0.000, oom=0.000, loss_scale=4.000, wall=6686, train_wall=6033 | |
| epoch 003: 4780 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52399, ups=3, wpb=15176.678, bsz=560.308, num_updates=22497, lr=0.000210833, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6688, train_wall=6035 | |
| epoch 003: 4790 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52403, ups=3, wpb=15177.407, bsz=560.198, num_updates=22507, lr=0.000210786, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6691, train_wall=6038 | |
| epoch 003: 4800 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52403, ups=3, wpb=15177.289, bsz=560.128, num_updates=22517, lr=0.000210739, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6694, train_wall=6041 | |
| epoch 003: 4810 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52403, ups=3, wpb=15177.474, bsz=560.168, num_updates=22527, lr=0.000210692, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6697, train_wall=6043 | |
| epoch 003: 4820 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52402, ups=3, wpb=15177.232, bsz=560.111, num_updates=22537, lr=0.000210645, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6700, train_wall=6046 | |
| epoch 003: 4830 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52403, ups=3, wpb=15177.479, bsz=559.970, num_updates=22547, lr=0.000210599, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6703, train_wall=6049 | |
| epoch 003: 4840 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52403, ups=3, wpb=15177.747, bsz=559.833, num_updates=22557, lr=0.000210552, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6706, train_wall=6051 | |
| epoch 003: 4850 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52404, ups=3, wpb=15178.020, bsz=559.864, num_updates=22567, lr=0.000210505, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6709, train_wall=6054 | |
| epoch 003: 4860 / 8862 loss=4.292, nll_loss=2.658, ppl=6.31, wps=52404, ups=3, wpb=15177.789, bsz=559.744, num_updates=22577, lr=0.000210459, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6712, train_wall=6057 | |
| epoch 003: 4870 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52404, ups=3, wpb=15177.915, bsz=559.518, num_updates=22587, lr=0.000210412, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6714, train_wall=6059 | |
| epoch 003: 4880 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52404, ups=3, wpb=15177.901, bsz=559.686, num_updates=22597, lr=0.000210366, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6717, train_wall=6062 | |
| epoch 003: 4890 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52406, ups=3, wpb=15178.452, bsz=559.800, num_updates=22607, lr=0.000210319, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6720, train_wall=6065 | |
| epoch 003: 4900 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52404, ups=3, wpb=15178.723, bsz=560.187, num_updates=22617, lr=0.000210273, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6723, train_wall=6068 | |
| epoch 003: 4910 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52401, ups=3, wpb=15178.090, bsz=560.283, num_updates=22627, lr=0.000210226, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6726, train_wall=6070 | |
| epoch 003: 4920 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52401, ups=3, wpb=15178.071, bsz=560.253, num_updates=22637, lr=0.00021018, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6729, train_wall=6073 | |
| epoch 003: 4930 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52403, ups=3, wpb=15178.605, bsz=560.173, num_updates=22647, lr=0.000210133, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6732, train_wall=6076 | |
| epoch 003: 4940 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52403, ups=3, wpb=15178.337, bsz=560.105, num_updates=22657, lr=0.000210087, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6735, train_wall=6078 | |
| epoch 003: 4950 / 8862 loss=4.291, nll_loss=2.656, ppl=6.30, wps=52405, ups=3, wpb=15178.687, bsz=560.255, num_updates=22667, lr=0.00021004, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6738, train_wall=6081 | |
| epoch 003: 4960 / 8862 loss=4.291, nll_loss=2.656, ppl=6.30, wps=52405, ups=3, wpb=15178.567, bsz=560.251, num_updates=22677, lr=0.000209994, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6741, train_wall=6084 | |
| epoch 003: 4970 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52404, ups=3, wpb=15178.489, bsz=560.281, num_updates=22687, lr=0.000209948, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6743, train_wall=6086 | |
| epoch 003: 4980 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52404, ups=3, wpb=15178.239, bsz=560.244, num_updates=22697, lr=0.000209902, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6746, train_wall=6089 | |
| epoch 003: 4990 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52406, ups=3, wpb=15178.514, bsz=560.131, num_updates=22707, lr=0.000209855, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6749, train_wall=6092 | |
| epoch 003: 5000 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52407, ups=3, wpb=15178.693, bsz=560.062, num_updates=22717, lr=0.000209809, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6752, train_wall=6094 | |
| epoch 003: 5010 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52408, ups=3, wpb=15178.623, bsz=560.027, num_updates=22727, lr=0.000209763, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6755, train_wall=6097 | |
| epoch 003: 5020 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52408, ups=3, wpb=15178.471, bsz=560.070, num_updates=22737, lr=0.000209717, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6758, train_wall=6100 | |
| epoch 003: 5030 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52406, ups=3, wpb=15178.022, bsz=560.157, num_updates=22747, lr=0.000209671, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6761, train_wall=6102 | |
| epoch 003: 5040 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52406, ups=3, wpb=15177.894, bsz=560.088, num_updates=22757, lr=0.000209625, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6764, train_wall=6105 | |
| epoch 003: 5050 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52407, ups=3, wpb=15177.998, bsz=559.938, num_updates=22767, lr=0.000209579, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6767, train_wall=6108 | |
| epoch 003: 5060 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52406, ups=3, wpb=15177.397, bsz=559.791, num_updates=22777, lr=0.000209533, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6769, train_wall=6110 | |
| epoch 003: 5070 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52406, ups=3, wpb=15176.834, bsz=559.583, num_updates=22787, lr=0.000209487, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6772, train_wall=6113 | |
| epoch 003: 5080 / 8862 loss=4.291, nll_loss=2.657, ppl=6.31, wps=52405, ups=3, wpb=15176.455, bsz=559.670, num_updates=22797, lr=0.000209441, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6775, train_wall=6115 | |
| epoch 003: 5090 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52406, ups=3, wpb=15176.829, bsz=559.899, num_updates=22807, lr=0.000209395, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6778, train_wall=6118 | |
| epoch 003: 5100 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52407, ups=3, wpb=15176.992, bsz=559.860, num_updates=22817, lr=0.000209349, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6781, train_wall=6121 | |
| epoch 003: 5110 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52408, ups=3, wpb=15177.390, bsz=559.924, num_updates=22827, lr=0.000209303, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6784, train_wall=6123 | |
| epoch 003: 5120 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52410, ups=3, wpb=15177.530, bsz=559.795, num_updates=22837, lr=0.000209257, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6787, train_wall=6126 | |
| epoch 003: 5130 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52411, ups=3, wpb=15177.696, bsz=559.719, num_updates=22847, lr=0.000209211, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6790, train_wall=6129 | |
| epoch 003: 5140 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52413, ups=3, wpb=15177.868, bsz=559.625, num_updates=22857, lr=0.000209166, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6792, train_wall=6131 | |
| epoch 003: 5150 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52413, ups=3, wpb=15178.287, bsz=559.799, num_updates=22867, lr=0.00020912, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6795, train_wall=6134 | |
| epoch 003: 5160 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52413, ups=3, wpb=15178.102, bsz=559.821, num_updates=22877, lr=0.000209074, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6798, train_wall=6137 | |
| epoch 003: 5170 / 8862 loss=4.290, nll_loss=2.656, ppl=6.30, wps=52411, ups=3, wpb=15177.606, bsz=559.924, num_updates=22887, lr=0.000209029, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6801, train_wall=6139 | |
| epoch 003: 5180 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52412, ups=3, wpb=15178.023, bsz=559.950, num_updates=22897, lr=0.000208983, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6804, train_wall=6142 | |
| epoch 003: 5190 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52412, ups=3, wpb=15178.036, bsz=559.818, num_updates=22907, lr=0.000208937, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6807, train_wall=6145 | |
| epoch 003: 5200 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52413, ups=3, wpb=15178.380, bsz=559.858, num_updates=22917, lr=0.000208892, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6810, train_wall=6148 | |
| epoch 003: 5210 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52413, ups=3, wpb=15178.434, bsz=559.998, num_updates=22927, lr=0.000208846, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6813, train_wall=6150 | |
| epoch 003: 5220 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52414, ups=3, wpb=15178.795, bsz=559.960, num_updates=22937, lr=0.000208801, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6816, train_wall=6153 | |
| epoch 003: 5230 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52413, ups=3, wpb=15178.358, bsz=560.068, num_updates=22947, lr=0.000208755, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6819, train_wall=6156 | |
| epoch 003: 5240 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52414, ups=3, wpb=15178.228, bsz=560.012, num_updates=22957, lr=0.00020871, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6821, train_wall=6158 | |
| epoch 003: 5250 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52414, ups=3, wpb=15177.837, bsz=559.930, num_updates=22967, lr=0.000208664, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6824, train_wall=6161 | |
| epoch 003: 5260 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52413, ups=3, wpb=15177.387, bsz=559.873, num_updates=22977, lr=0.000208619, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6827, train_wall=6163 | |
| epoch 003: 5270 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52415, ups=3, wpb=15177.735, bsz=559.915, num_updates=22987, lr=0.000208573, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6830, train_wall=6166 | |
| epoch 003: 5280 / 8862 loss=4.289, nll_loss=2.655, ppl=6.30, wps=52414, ups=3, wpb=15177.290, bsz=559.912, num_updates=22997, lr=0.000208528, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6833, train_wall=6169 | |
| epoch 003: 5290 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52415, ups=3, wpb=15177.671, bsz=560.006, num_updates=23007, lr=0.000208483, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6836, train_wall=6171 | |
| epoch 003: 5300 / 8862 loss=4.288, nll_loss=2.654, ppl=6.30, wps=52416, ups=3, wpb=15177.849, bsz=559.951, num_updates=23017, lr=0.000208437, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6839, train_wall=6174 | |
| epoch 003: 5310 / 8862 loss=4.289, nll_loss=2.654, ppl=6.30, wps=52417, ups=3, wpb=15177.897, bsz=559.908, num_updates=23027, lr=0.000208392, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6842, train_wall=6177 | |
| epoch 003: 5320 / 8862 loss=4.289, nll_loss=2.654, ppl=6.30, wps=52418, ups=3, wpb=15178.211, bsz=559.982, num_updates=23037, lr=0.000208347, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6844, train_wall=6179 | |
| epoch 003: 5330 / 8862 loss=4.288, nll_loss=2.654, ppl=6.30, wps=52419, ups=3, wpb=15178.204, bsz=559.869, num_updates=23047, lr=0.000208302, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6847, train_wall=6182 | |
| epoch 003: 5340 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52420, ups=3, wpb=15178.223, bsz=559.862, num_updates=23057, lr=0.000208257, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6850, train_wall=6185 | |
| epoch 003: 5350 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52421, ups=3, wpb=15178.293, bsz=559.681, num_updates=23067, lr=0.000208211, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6853, train_wall=6187 | |
| epoch 003: 5360 / 8862 loss=4.288, nll_loss=2.654, ppl=6.30, wps=52423, ups=3, wpb=15178.651, bsz=559.634, num_updates=23077, lr=0.000208166, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6856, train_wall=6190 | |
| epoch 003: 5370 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52424, ups=3, wpb=15178.843, bsz=559.661, num_updates=23087, lr=0.000208121, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6859, train_wall=6193 | |
| epoch 003: 5380 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52423, ups=3, wpb=15178.558, bsz=559.613, num_updates=23097, lr=0.000208076, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6862, train_wall=6195 | |
| epoch 003: 5390 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52421, ups=3, wpb=15178.191, bsz=559.830, num_updates=23107, lr=0.000208031, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6865, train_wall=6198 | |
| epoch 003: 5400 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52420, ups=3, wpb=15177.545, bsz=559.780, num_updates=23117, lr=0.000207986, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6867, train_wall=6201 | |
| epoch 003: 5410 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52422, ups=3, wpb=15177.708, bsz=559.803, num_updates=23127, lr=0.000207941, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6870, train_wall=6203 | |
| epoch 003: 5420 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52422, ups=3, wpb=15177.814, bsz=559.918, num_updates=23137, lr=0.000207896, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6873, train_wall=6206 | |
| epoch 003: 5430 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52422, ups=3, wpb=15177.613, bsz=559.854, num_updates=23147, lr=0.000207851, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6876, train_wall=6209 | |
| epoch 003: 5440 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52423, ups=3, wpb=15177.560, bsz=559.797, num_updates=23157, lr=0.000207806, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6879, train_wall=6211 | |
| epoch 003: 5450 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52423, ups=3, wpb=15177.262, bsz=559.663, num_updates=23167, lr=0.000207762, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6882, train_wall=6214 | |
| epoch 003: 5460 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52421, ups=3, wpb=15176.522, bsz=559.673, num_updates=23177, lr=0.000207717, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6885, train_wall=6217 | |
| epoch 003: 5470 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52420, ups=3, wpb=15176.017, bsz=559.543, num_updates=23187, lr=0.000207672, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6888, train_wall=6219 | |
| epoch 003: 5480 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52421, ups=3, wpb=15176.243, bsz=559.633, num_updates=23197, lr=0.000207627, gnorm=0.509, clip=0.000, oom=0.000, loss_scale=4.000, wall=6890, train_wall=6222 | |
| epoch 003: 5490 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52421, ups=3, wpb=15176.223, bsz=559.704, num_updates=23207, lr=0.000207582, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6893, train_wall=6225 | |
| epoch 003: 5500 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52421, ups=3, wpb=15176.024, bsz=559.793, num_updates=23217, lr=0.000207538, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6896, train_wall=6227 | |
| epoch 003: 5510 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52420, ups=3, wpb=15175.679, bsz=559.764, num_updates=23227, lr=0.000207493, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6899, train_wall=6230 | |
| epoch 003: 5520 / 8862 loss=4.288, nll_loss=2.654, ppl=6.29, wps=52423, ups=3, wpb=15176.279, bsz=559.727, num_updates=23237, lr=0.000207448, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6902, train_wall=6233 | |
| epoch 003: 5530 / 8862 loss=4.288, nll_loss=2.653, ppl=6.29, wps=52423, ups=3, wpb=15176.400, bsz=559.835, num_updates=23247, lr=0.000207404, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6905, train_wall=6235 | |
| epoch 003: 5540 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52424, ups=3, wpb=15176.363, bsz=559.847, num_updates=23257, lr=0.000207359, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6908, train_wall=6238 | |
| epoch 003: 5550 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52424, ups=3, wpb=15176.529, bsz=559.815, num_updates=23267, lr=0.000207315, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6911, train_wall=6241 | |
| epoch 003: 5560 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52427, ups=3, wpb=15177.215, bsz=559.755, num_updates=23277, lr=0.00020727, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6914, train_wall=6243 | |
| epoch 003: 5570 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52426, ups=3, wpb=15176.492, bsz=559.603, num_updates=23287, lr=0.000207226, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6916, train_wall=6246 | |
| epoch 003: 5580 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52423, ups=3, wpb=15175.527, bsz=559.505, num_updates=23297, lr=0.000207181, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6919, train_wall=6249 | |
| epoch 003: 5590 / 8862 loss=4.288, nll_loss=2.653, ppl=6.29, wps=52422, ups=3, wpb=15174.577, bsz=559.434, num_updates=23307, lr=0.000207137, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6922, train_wall=6251 | |
| epoch 003: 5600 / 8862 loss=4.288, nll_loss=2.653, ppl=6.29, wps=52423, ups=3, wpb=15174.972, bsz=559.498, num_updates=23317, lr=0.000207092, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6925, train_wall=6254 | |
| epoch 003: 5610 / 8862 loss=4.288, nll_loss=2.653, ppl=6.29, wps=52423, ups=3, wpb=15174.927, bsz=559.432, num_updates=23327, lr=0.000207048, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6928, train_wall=6257 | |
| epoch 003: 5620 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52426, ups=3, wpb=15175.558, bsz=559.472, num_updates=23337, lr=0.000207003, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6931, train_wall=6259 | |
| epoch 003: 5630 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52423, ups=3, wpb=15174.565, bsz=559.519, num_updates=23347, lr=0.000206959, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6934, train_wall=6262 | |
| epoch 003: 5640 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52424, ups=3, wpb=15174.824, bsz=559.673, num_updates=23357, lr=0.000206915, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6937, train_wall=6265 | |
| epoch 003: 5650 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52424, ups=3, wpb=15174.527, bsz=559.610, num_updates=23367, lr=0.00020687, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6939, train_wall=6267 | |
| epoch 003: 5660 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52423, ups=3, wpb=15174.329, bsz=559.676, num_updates=23377, lr=0.000206826, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6942, train_wall=6270 | |
| epoch 003: 5670 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52424, ups=3, wpb=15174.354, bsz=559.760, num_updates=23387, lr=0.000206782, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6945, train_wall=6272 | |
| epoch 003: 5680 / 8862 loss=4.287, nll_loss=2.653, ppl=6.29, wps=52424, ups=3, wpb=15174.261, bsz=559.825, num_updates=23397, lr=0.000206738, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6948, train_wall=6275 | |
| epoch 003: 5690 / 8862 loss=4.287, nll_loss=2.652, ppl=6.29, wps=52424, ups=3, wpb=15174.376, bsz=559.955, num_updates=23407, lr=0.000206694, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6951, train_wall=6278 | |
| epoch 003: 5700 / 8862 loss=4.286, nll_loss=2.652, ppl=6.29, wps=52425, ups=3, wpb=15174.800, bsz=560.113, num_updates=23417, lr=0.00020665, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6954, train_wall=6281 | |
| epoch 003: 5710 / 8862 loss=4.286, nll_loss=2.652, ppl=6.28, wps=52424, ups=3, wpb=15174.615, bsz=560.256, num_updates=23427, lr=0.000206605, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6957, train_wall=6283 | |
| epoch 003: 5720 / 8862 loss=4.286, nll_loss=2.652, ppl=6.29, wps=52424, ups=3, wpb=15174.536, bsz=560.064, num_updates=23437, lr=0.000206561, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6960, train_wall=6286 | |
| epoch 003: 5730 / 8862 loss=4.286, nll_loss=2.652, ppl=6.29, wps=52426, ups=3, wpb=15174.785, bsz=560.055, num_updates=23447, lr=0.000206517, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6963, train_wall=6289 | |
| epoch 003: 5740 / 8862 loss=4.286, nll_loss=2.652, ppl=6.28, wps=52427, ups=3, wpb=15175.235, bsz=560.179, num_updates=23457, lr=0.000206473, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6965, train_wall=6291 | |
| epoch 003: 5750 / 8862 loss=4.286, nll_loss=2.651, ppl=6.28, wps=52425, ups=3, wpb=15175.177, bsz=560.463, num_updates=23467, lr=0.000206429, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6968, train_wall=6294 | |
| epoch 003: 5760 / 8862 loss=4.286, nll_loss=2.651, ppl=6.28, wps=52423, ups=3, wpb=15174.985, bsz=560.516, num_updates=23477, lr=0.000206385, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6971, train_wall=6297 | |
| epoch 003: 5770 / 8862 loss=4.286, nll_loss=2.651, ppl=6.28, wps=52423, ups=3, wpb=15174.819, bsz=560.551, num_updates=23487, lr=0.000206341, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6974, train_wall=6299 | |
| epoch 003: 5780 / 8862 loss=4.285, nll_loss=2.651, ppl=6.28, wps=52424, ups=3, wpb=15175.193, bsz=560.580, num_updates=23497, lr=0.000206297, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6977, train_wall=6302 | |
| epoch 003: 5790 / 8862 loss=4.285, nll_loss=2.651, ppl=6.28, wps=52425, ups=3, wpb=15175.473, bsz=560.478, num_updates=23507, lr=0.000206254, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6980, train_wall=6305 | |
| epoch 003: 5800 / 8862 loss=4.285, nll_loss=2.651, ppl=6.28, wps=52426, ups=3, wpb=15175.717, bsz=560.597, num_updates=23517, lr=0.00020621, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6983, train_wall=6307 | |
| epoch 003: 5810 / 8862 loss=4.285, nll_loss=2.651, ppl=6.28, wps=52425, ups=3, wpb=15175.312, bsz=560.501, num_updates=23527, lr=0.000206166, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6986, train_wall=6310 | |
| epoch 003: 5820 / 8862 loss=4.285, nll_loss=2.651, ppl=6.28, wps=52425, ups=3, wpb=15175.065, bsz=560.452, num_updates=23537, lr=0.000206122, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6989, train_wall=6313 | |
| epoch 003: 5830 / 8862 loss=4.285, nll_loss=2.650, ppl=6.28, wps=52426, ups=3, wpb=15175.538, bsz=560.446, num_updates=23547, lr=0.000206078, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6992, train_wall=6315 | |
| epoch 003: 5840 / 8862 loss=4.285, nll_loss=2.650, ppl=6.28, wps=52426, ups=3, wpb=15175.428, bsz=560.527, num_updates=23557, lr=0.000206035, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6994, train_wall=6318 | |
| epoch 003: 5850 / 8862 loss=4.284, nll_loss=2.650, ppl=6.28, wps=52427, ups=3, wpb=15175.754, bsz=560.502, num_updates=23567, lr=0.000205991, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=6997, train_wall=6321 | |
| epoch 003: 5860 / 8862 loss=4.284, nll_loss=2.650, ppl=6.28, wps=52427, ups=3, wpb=15175.844, bsz=560.588, num_updates=23577, lr=0.000205947, gnorm=0.508, clip=0.000, oom=0.000, loss_scale=4.000, wall=7000, train_wall=6323 | |
| epoch 003: 5870 / 8862 loss=4.284, nll_loss=2.650, ppl=6.28, wps=52428, ups=3, wpb=15175.993, bsz=560.537, num_updates=23587, lr=0.000205903, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7003, train_wall=6326 | |
| epoch 003: 5880 / 8862 loss=4.285, nll_loss=2.650, ppl=6.28, wps=52428, ups=3, wpb=15176.092, bsz=560.484, num_updates=23597, lr=0.00020586, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7006, train_wall=6329 | |
| epoch 003: 5890 / 8862 loss=4.285, nll_loss=2.650, ppl=6.28, wps=52425, ups=3, wpb=15175.345, bsz=560.362, num_updates=23607, lr=0.000205816, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7009, train_wall=6331 | |
| epoch 003: 5900 / 8862 loss=4.285, nll_loss=2.650, ppl=6.28, wps=52426, ups=3, wpb=15175.371, bsz=560.379, num_updates=23617, lr=0.000205773, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7012, train_wall=6334 | |
| epoch 003: 5910 / 8862 loss=4.285, nll_loss=2.651, ppl=6.28, wps=52425, ups=3, wpb=15175.208, bsz=560.334, num_updates=23627, lr=0.000205729, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7015, train_wall=6337 | |
| epoch 003: 5920 / 8862 loss=4.285, nll_loss=2.650, ppl=6.28, wps=52421, ups=3, wpb=15174.813, bsz=560.597, num_updates=23637, lr=0.000205686, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7018, train_wall=6340 | |
| epoch 003: 5930 / 8862 loss=4.284, nll_loss=2.650, ppl=6.28, wps=52420, ups=3, wpb=15175.136, bsz=560.877, num_updates=23647, lr=0.000205642, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7021, train_wall=6342 | |
| epoch 003: 5940 / 8862 loss=4.284, nll_loss=2.650, ppl=6.28, wps=52422, ups=3, wpb=15175.597, bsz=560.981, num_updates=23657, lr=0.000205599, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7024, train_wall=6345 | |
| epoch 003: 5950 / 8862 loss=4.284, nll_loss=2.650, ppl=6.28, wps=52423, ups=3, wpb=15175.666, bsz=560.898, num_updates=23667, lr=0.000205555, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7026, train_wall=6348 | |
| epoch 003: 5960 / 8862 loss=4.284, nll_loss=2.650, ppl=6.28, wps=52422, ups=3, wpb=15175.203, bsz=560.871, num_updates=23677, lr=0.000205512, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7029, train_wall=6350 | |
| epoch 003: 5970 / 8862 loss=4.284, nll_loss=2.649, ppl=6.27, wps=52423, ups=3, wpb=15175.356, bsz=560.801, num_updates=23687, lr=0.000205468, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7032, train_wall=6353 | |
| epoch 003: 5980 / 8862 loss=4.284, nll_loss=2.649, ppl=6.27, wps=52422, ups=3, wpb=15175.411, bsz=560.962, num_updates=23697, lr=0.000205425, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7035, train_wall=6356 | |
| epoch 003: 5990 / 8862 loss=4.284, nll_loss=2.649, ppl=6.27, wps=52422, ups=3, wpb=15175.247, bsz=560.829, num_updates=23707, lr=0.000205382, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7038, train_wall=6358 | |
| epoch 003: 6000 / 8862 loss=4.284, nll_loss=2.649, ppl=6.27, wps=52423, ups=3, wpb=15175.366, bsz=560.777, num_updates=23717, lr=0.000205338, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7041, train_wall=6361 | |
| epoch 003: 6010 / 8862 loss=4.284, nll_loss=2.649, ppl=6.27, wps=52424, ups=3, wpb=15175.560, bsz=560.845, num_updates=23727, lr=0.000205295, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7044, train_wall=6364 | |
| epoch 003: 6020 / 8862 loss=4.284, nll_loss=2.649, ppl=6.27, wps=52423, ups=3, wpb=15175.283, bsz=560.758, num_updates=23737, lr=0.000205252, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7047, train_wall=6366 | |
| epoch 003: 6030 / 8862 loss=4.284, nll_loss=2.649, ppl=6.27, wps=52423, ups=3, wpb=15175.313, bsz=560.684, num_updates=23747, lr=0.000205209, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7050, train_wall=6369 | |
| epoch 003: 6040 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52423, ups=3, wpb=15175.636, bsz=560.927, num_updates=23757, lr=0.000205165, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7052, train_wall=6372 | |
| epoch 003: 6050 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52423, ups=3, wpb=15175.579, bsz=560.849, num_updates=23767, lr=0.000205122, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7055, train_wall=6374 | |
| epoch 003: 6060 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52423, ups=3, wpb=15175.805, bsz=560.781, num_updates=23777, lr=0.000205079, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7058, train_wall=6377 | |
| epoch 003: 6070 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52422, ups=3, wpb=15175.741, bsz=560.800, num_updates=23787, lr=0.000205036, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7061, train_wall=6380 | |
| epoch 003: 6080 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52420, ups=3, wpb=15175.420, bsz=560.887, num_updates=23797, lr=0.000204993, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7064, train_wall=6382 | |
| epoch 003: 6090 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52421, ups=3, wpb=15175.569, bsz=560.792, num_updates=23807, lr=0.00020495, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7067, train_wall=6385 | |
| epoch 003: 6100 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52422, ups=3, wpb=15175.679, bsz=560.632, num_updates=23817, lr=0.000204907, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7070, train_wall=6388 | |
| epoch 003: 6110 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52419, ups=3, wpb=15174.903, bsz=560.620, num_updates=23827, lr=0.000204864, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7073, train_wall=6390 | |
| epoch 003: 6120 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52418, ups=3, wpb=15174.969, bsz=560.798, num_updates=23837, lr=0.000204821, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7076, train_wall=6393 | |
| epoch 003: 6130 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52418, ups=3, wpb=15174.946, bsz=560.737, num_updates=23847, lr=0.000204778, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7079, train_wall=6396 | |
| epoch 003: 6140 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52419, ups=3, wpb=15175.436, bsz=560.646, num_updates=23857, lr=0.000204735, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7082, train_wall=6399 | |
| epoch 003: 6150 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52419, ups=3, wpb=15175.907, bsz=560.807, num_updates=23867, lr=0.000204692, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7084, train_wall=6401 | |
| epoch 003: 6160 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52418, ups=3, wpb=15176.029, bsz=560.788, num_updates=23877, lr=0.000204649, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7087, train_wall=6404 | |
| epoch 003: 6170 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52418, ups=3, wpb=15176.007, bsz=560.717, num_updates=23887, lr=0.000204606, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7090, train_wall=6407 | |
| epoch 003: 6180 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52415, ups=3, wpb=15175.044, bsz=560.672, num_updates=23897, lr=0.000204564, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7093, train_wall=6409 | |
| epoch 003: 6190 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52414, ups=3, wpb=15174.878, bsz=560.620, num_updates=23907, lr=0.000204521, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7096, train_wall=6412 | |
| epoch 003: 6200 / 8862 loss=4.283, nll_loss=2.649, ppl=6.27, wps=52413, ups=3, wpb=15175.034, bsz=560.766, num_updates=23917, lr=0.000204478, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7099, train_wall=6415 | |
| epoch 003: 6210 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52413, ups=3, wpb=15175.083, bsz=560.815, num_updates=23927, lr=0.000204435, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7102, train_wall=6417 | |
| epoch 003: 6220 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52415, ups=3, wpb=15175.708, bsz=560.909, num_updates=23937, lr=0.000204393, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7105, train_wall=6420 | |
| epoch 003: 6230 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52414, ups=3, wpb=15175.603, bsz=560.900, num_updates=23947, lr=0.00020435, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7108, train_wall=6423 | |
| epoch 003: 6240 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52416, ups=3, wpb=15176.324, bsz=560.854, num_updates=23957, lr=0.000204307, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7111, train_wall=6426 | |
| epoch 003: 6250 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52414, ups=3, wpb=15175.577, bsz=560.759, num_updates=23967, lr=0.000204265, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7114, train_wall=6428 | |
| epoch 003: 6260 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52410, ups=3, wpb=15175.399, bsz=560.625, num_updates=23977, lr=0.000204222, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7117, train_wall=6431 | |
| epoch 003: 6270 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52411, ups=3, wpb=15175.541, bsz=560.511, num_updates=23987, lr=0.000204179, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7119, train_wall=6434 | |
| epoch 003: 6280 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52411, ups=3, wpb=15175.322, bsz=560.463, num_updates=23997, lr=0.000204137, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7122, train_wall=6436 | |
| epoch 003: 6290 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52410, ups=3, wpb=15175.052, bsz=560.377, num_updates=24007, lr=0.000204094, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7125, train_wall=6439 | |
| epoch 003: 6300 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52410, ups=3, wpb=15175.105, bsz=560.285, num_updates=24017, lr=0.000204052, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7128, train_wall=6442 | |
| epoch 003: 6310 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52410, ups=3, wpb=15175.068, bsz=560.284, num_updates=24027, lr=0.000204009, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7131, train_wall=6444 | |
| epoch 003: 6320 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52411, ups=3, wpb=15175.506, bsz=560.171, num_updates=24037, lr=0.000203967, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7134, train_wall=6447 | |
| epoch 003: 6330 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52412, ups=3, wpb=15176.002, bsz=560.269, num_updates=24047, lr=0.000203925, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7137, train_wall=6450 | |
| epoch 003: 6340 / 8862 loss=4.283, nll_loss=2.648, ppl=6.27, wps=52411, ups=3, wpb=15176.081, bsz=560.276, num_updates=24057, lr=0.000203882, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7140, train_wall=6452 | |
| epoch 003: 6350 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52409, ups=3, wpb=15175.998, bsz=560.335, num_updates=24067, lr=0.00020384, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7143, train_wall=6455 | |
| epoch 003: 6360 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52409, ups=3, wpb=15175.819, bsz=560.393, num_updates=24077, lr=0.000203797, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7146, train_wall=6458 | |
| epoch 003: 6370 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52409, ups=3, wpb=15176.145, bsz=560.309, num_updates=24087, lr=0.000203755, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7149, train_wall=6461 | |
| epoch 003: 6380 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52406, ups=3, wpb=15175.552, bsz=560.214, num_updates=24097, lr=0.000203713, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7151, train_wall=6463 | |
| epoch 003: 6390 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52407, ups=3, wpb=15175.487, bsz=560.139, num_updates=24107, lr=0.000203671, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7154, train_wall=6466 | |
| epoch 003: 6400 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52406, ups=3, wpb=15175.394, bsz=560.048, num_updates=24117, lr=0.000203628, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7157, train_wall=6469 | |
| epoch 003: 6410 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52407, ups=3, wpb=15175.914, bsz=560.007, num_updates=24127, lr=0.000203586, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7160, train_wall=6471 | |
| epoch 003: 6420 / 8862 loss=4.282, nll_loss=2.647, ppl=6.27, wps=52408, ups=3, wpb=15176.324, bsz=559.990, num_updates=24137, lr=0.000203544, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7163, train_wall=6474 | |
| epoch 003: 6430 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52409, ups=3, wpb=15176.829, bsz=559.850, num_updates=24147, lr=0.000203502, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7166, train_wall=6477 | |
| epoch 003: 6440 / 8862 loss=4.282, nll_loss=2.647, ppl=6.27, wps=52407, ups=3, wpb=15176.689, bsz=559.918, num_updates=24157, lr=0.00020346, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7169, train_wall=6479 | |
| epoch 003: 6450 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52405, ups=3, wpb=15176.228, bsz=559.815, num_updates=24167, lr=0.000203418, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7172, train_wall=6482 | |
| epoch 003: 6460 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52406, ups=3, wpb=15176.389, bsz=559.726, num_updates=24177, lr=0.000203376, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7175, train_wall=6485 | |
| epoch 003: 6470 / 8862 loss=4.282, nll_loss=2.648, ppl=6.27, wps=52405, ups=3, wpb=15176.428, bsz=559.652, num_updates=24187, lr=0.000203334, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7178, train_wall=6488 | |
| epoch 003: 6480 / 8862 loss=4.282, nll_loss=2.647, ppl=6.26, wps=52405, ups=3, wpb=15176.581, bsz=559.727, num_updates=24197, lr=0.000203292, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7181, train_wall=6490 | |
| epoch 003: 6490 / 8862 loss=4.282, nll_loss=2.647, ppl=6.26, wps=52407, ups=3, wpb=15177.323, bsz=559.666, num_updates=24207, lr=0.00020325, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7184, train_wall=6493 | |
| epoch 003: 6500 / 8862 loss=4.281, nll_loss=2.647, ppl=6.26, wps=52406, ups=3, wpb=15177.375, bsz=559.805, num_updates=24217, lr=0.000203208, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7186, train_wall=6496 | |
| epoch 003: 6510 / 8862 loss=4.281, nll_loss=2.647, ppl=6.26, wps=52406, ups=3, wpb=15177.361, bsz=559.632, num_updates=24227, lr=0.000203166, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7189, train_wall=6498 | |
| epoch 003: 6520 / 8862 loss=4.281, nll_loss=2.647, ppl=6.26, wps=52405, ups=3, wpb=15177.240, bsz=559.676, num_updates=24237, lr=0.000203124, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7192, train_wall=6501 | |
| epoch 003: 6530 / 8862 loss=4.281, nll_loss=2.647, ppl=6.26, wps=52403, ups=3, wpb=15176.492, bsz=559.648, num_updates=24247, lr=0.000203082, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7195, train_wall=6504 | |
| epoch 003: 6540 / 8862 loss=4.281, nll_loss=2.647, ppl=6.26, wps=52402, ups=3, wpb=15176.299, bsz=559.654, num_updates=24257, lr=0.00020304, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7198, train_wall=6506 | |
| epoch 003: 6550 / 8862 loss=4.281, nll_loss=2.647, ppl=6.26, wps=52401, ups=3, wpb=15175.854, bsz=559.635, num_updates=24267, lr=0.000202998, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7201, train_wall=6509 | |
| epoch 003: 6560 / 8862 loss=4.281, nll_loss=2.647, ppl=6.26, wps=52402, ups=3, wpb=15176.296, bsz=559.533, num_updates=24277, lr=0.000202956, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7204, train_wall=6512 | |
| epoch 003: 6570 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52404, ups=3, wpb=15176.664, bsz=559.516, num_updates=24287, lr=0.000202914, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7207, train_wall=6514 | |
| epoch 003: 6580 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52402, ups=3, wpb=15176.372, bsz=559.477, num_updates=24297, lr=0.000202873, gnorm=0.507, clip=0.000, oom=0.000, loss_scale=4.000, wall=7210, train_wall=6517 | |
| epoch 003: 6590 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52403, ups=3, wpb=15176.642, bsz=559.463, num_updates=24307, lr=0.000202831, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7213, train_wall=6520 | |
| epoch 003: 6600 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52403, ups=3, wpb=15176.735, bsz=559.548, num_updates=24317, lr=0.000202789, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7215, train_wall=6522 | |
| epoch 003: 6610 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52402, ups=3, wpb=15177.040, bsz=559.684, num_updates=24327, lr=0.000202748, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7218, train_wall=6525 | |
| epoch 003: 6620 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52404, ups=3, wpb=15177.430, bsz=559.702, num_updates=24337, lr=0.000202706, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7221, train_wall=6528 | |
| epoch 003: 6630 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52403, ups=3, wpb=15177.244, bsz=559.643, num_updates=24347, lr=0.000202664, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7224, train_wall=6531 | |
| epoch 003: 6640 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52404, ups=3, wpb=15177.452, bsz=559.629, num_updates=24357, lr=0.000202623, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7227, train_wall=6533 | |
| epoch 003: 6650 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52403, ups=3, wpb=15177.230, bsz=559.563, num_updates=24367, lr=0.000202581, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7230, train_wall=6536 | |
| epoch 003: 6660 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52403, ups=3, wpb=15177.061, bsz=559.571, num_updates=24377, lr=0.00020254, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7233, train_wall=6539 | |
| epoch 003: 6670 / 8862 loss=4.281, nll_loss=2.646, ppl=6.26, wps=52402, ups=3, wpb=15176.739, bsz=559.726, num_updates=24387, lr=0.000202498, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7236, train_wall=6541 | |
| epoch 003: 6680 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52401, ups=3, wpb=15176.641, bsz=559.710, num_updates=24397, lr=0.000202457, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7239, train_wall=6544 | |
| epoch 003: 6690 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52399, ups=3, wpb=15176.707, bsz=560.158, num_updates=24407, lr=0.000202415, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7242, train_wall=6547 | |
| epoch 003: 6700 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52397, ups=3, wpb=15176.202, bsz=560.067, num_updates=24417, lr=0.000202374, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7245, train_wall=6549 | |
| epoch 003: 6710 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52395, ups=3, wpb=15175.645, bsz=560.057, num_updates=24427, lr=0.000202332, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7247, train_wall=6552 | |
| epoch 003: 6720 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52394, ups=3, wpb=15175.445, bsz=560.047, num_updates=24437, lr=0.000202291, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7250, train_wall=6555 | |
| epoch 003: 6730 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52393, ups=3, wpb=15175.336, bsz=559.977, num_updates=24447, lr=0.000202249, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7253, train_wall=6557 | |
| epoch 003: 6740 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52392, ups=3, wpb=15175.151, bsz=559.924, num_updates=24457, lr=0.000202208, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7256, train_wall=6560 | |
| epoch 003: 6750 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52390, ups=3, wpb=15174.713, bsz=559.913, num_updates=24467, lr=0.000202167, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=4.000, wall=7259, train_wall=6563 | |
| epoch 003: 6760 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52389, ups=3, wpb=15174.625, bsz=559.769, num_updates=24477, lr=0.000202125, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7262, train_wall=6566 | |
| epoch 003: 6770 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52390, ups=3, wpb=15175.180, bsz=559.763, num_updates=24487, lr=0.000202084, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7265, train_wall=6568 | |
| epoch 003: 6780 / 8862 loss=4.280, nll_loss=2.646, ppl=6.26, wps=52388, ups=3, wpb=15175.017, bsz=559.794, num_updates=24497, lr=0.000202043, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7268, train_wall=6571 | |
| epoch 003: 6790 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52388, ups=3, wpb=15175.223, bsz=559.930, num_updates=24507, lr=0.000202002, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7271, train_wall=6574 | |
| epoch 003: 6800 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52389, ups=3, wpb=15176.070, bsz=559.931, num_updates=24517, lr=0.00020196, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7274, train_wall=6576 | |
| epoch 003: 6810 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52388, ups=3, wpb=15175.948, bsz=559.879, num_updates=24527, lr=0.000201919, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7277, train_wall=6579 | |
| epoch 003: 6820 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52386, ups=3, wpb=15175.738, bsz=559.885, num_updates=24537, lr=0.000201878, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7280, train_wall=6582 | |
| epoch 003: 6830 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52381, ups=3, wpb=15174.931, bsz=560.048, num_updates=24547, lr=0.000201837, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7283, train_wall=6585 | |
| epoch 003: 6840 / 8862 loss=4.280, nll_loss=2.645, ppl=6.26, wps=52380, ups=3, wpb=15174.901, bsz=559.863, num_updates=24557, lr=0.000201796, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7286, train_wall=6587 | |
| epoch 003: 6850 / 8862 loss=4.279, nll_loss=2.645, ppl=6.25, wps=52380, ups=3, wpb=15175.100, bsz=559.976, num_updates=24567, lr=0.000201755, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7289, train_wall=6590 | |
| epoch 003: 6860 / 8862 loss=4.279, nll_loss=2.645, ppl=6.25, wps=52378, ups=3, wpb=15175.126, bsz=559.943, num_updates=24577, lr=0.000201714, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7291, train_wall=6593 | |
| epoch 003: 6870 / 8862 loss=4.279, nll_loss=2.645, ppl=6.25, wps=52379, ups=3, wpb=15175.426, bsz=559.816, num_updates=24587, lr=0.000201673, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7294, train_wall=6596 | |
| epoch 003: 6880 / 8862 loss=4.279, nll_loss=2.645, ppl=6.25, wps=52378, ups=3, wpb=15175.573, bsz=559.752, num_updates=24597, lr=0.000201632, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7297, train_wall=6598 | |
| epoch 003: 6890 / 8862 loss=4.279, nll_loss=2.645, ppl=6.25, wps=52377, ups=3, wpb=15175.426, bsz=559.797, num_updates=24607, lr=0.000201591, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7300, train_wall=6601 | |
| epoch 003: 6900 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52376, ups=3, wpb=15175.500, bsz=559.768, num_updates=24617, lr=0.00020155, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7303, train_wall=6604 | |
| epoch 003: 6910 / 8862 loss=4.279, nll_loss=2.645, ppl=6.25, wps=52374, ups=3, wpb=15174.996, bsz=559.770, num_updates=24627, lr=0.000201509, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7306, train_wall=6606 | |
| epoch 003: 6920 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52373, ups=3, wpb=15175.177, bsz=559.815, num_updates=24637, lr=0.000201468, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7309, train_wall=6609 | |
| epoch 003: 6930 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52373, ups=3, wpb=15175.501, bsz=559.792, num_updates=24647, lr=0.000201427, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7312, train_wall=6612 | |
| epoch 003: 6940 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52371, ups=3, wpb=15175.417, bsz=559.803, num_updates=24657, lr=0.000201386, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7315, train_wall=6615 | |
| epoch 003: 6950 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52369, ups=3, wpb=15175.009, bsz=559.707, num_updates=24667, lr=0.000201345, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7318, train_wall=6617 | |
| epoch 003: 6960 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52368, ups=3, wpb=15174.837, bsz=559.643, num_updates=24677, lr=0.000201305, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7321, train_wall=6620 | |
| epoch 003: 6970 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52367, ups=3, wpb=15175.060, bsz=559.830, num_updates=24687, lr=0.000201264, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7324, train_wall=6623 | |
| epoch 003: 6980 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52364, ups=3, wpb=15174.462, bsz=559.898, num_updates=24697, lr=0.000201223, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7327, train_wall=6625 | |
| epoch 003: 6990 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52362, ups=3, wpb=15174.135, bsz=559.822, num_updates=24707, lr=0.000201182, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7330, train_wall=6628 | |
| epoch 003: 7000 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52361, ups=3, wpb=15174.276, bsz=559.741, num_updates=24717, lr=0.000201142, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7333, train_wall=6631 | |
| epoch 003: 7010 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52361, ups=3, wpb=15174.519, bsz=559.650, num_updates=24727, lr=0.000201101, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7336, train_wall=6634 | |
| epoch 003: 7020 / 8862 loss=4.278, nll_loss=2.644, ppl=6.25, wps=52361, ups=3, wpb=15174.923, bsz=559.584, num_updates=24737, lr=0.00020106, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7338, train_wall=6636 | |
| epoch 003: 7030 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52359, ups=3, wpb=15174.660, bsz=559.496, num_updates=24747, lr=0.00020102, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7341, train_wall=6639 | |
| epoch 003: 7040 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52359, ups=3, wpb=15174.815, bsz=559.491, num_updates=24757, lr=0.000200979, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7344, train_wall=6642 | |
| epoch 003: 7050 / 8862 loss=4.279, nll_loss=2.644, ppl=6.25, wps=52357, ups=3, wpb=15174.806, bsz=559.461, num_updates=24767, lr=0.000200939, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7347, train_wall=6644 | |
| epoch 003: 7060 / 8862 loss=4.278, nll_loss=2.644, ppl=6.25, wps=52357, ups=3, wpb=15175.277, bsz=559.643, num_updates=24777, lr=0.000200898, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7350, train_wall=6647 | |
| epoch 003: 7070 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52357, ups=3, wpb=15175.744, bsz=559.618, num_updates=24787, lr=0.000200857, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7353, train_wall=6650 | |
| epoch 003: 7080 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52356, ups=3, wpb=15175.839, bsz=559.591, num_updates=24797, lr=0.000200817, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7356, train_wall=6653 | |
| epoch 003: 7090 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52355, ups=3, wpb=15175.776, bsz=559.606, num_updates=24807, lr=0.000200776, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7359, train_wall=6655 | |
| epoch 003: 7100 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52354, ups=3, wpb=15175.863, bsz=559.596, num_updates=24817, lr=0.000200736, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7362, train_wall=6658 | |
| epoch 003: 7110 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52354, ups=3, wpb=15175.854, bsz=559.477, num_updates=24827, lr=0.000200696, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7365, train_wall=6661 | |
| epoch 003: 7120 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52352, ups=3, wpb=15175.994, bsz=559.681, num_updates=24837, lr=0.000200655, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7368, train_wall=6664 | |
| epoch 003: 7130 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52351, ups=3, wpb=15176.080, bsz=559.864, num_updates=24847, lr=0.000200615, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7371, train_wall=6666 | |
| epoch 003: 7140 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52350, ups=3, wpb=15176.400, bsz=559.907, num_updates=24857, lr=0.000200574, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7374, train_wall=6669 | |
| epoch 003: 7150 / 8862 loss=4.278, nll_loss=2.643, ppl=6.25, wps=52347, ups=3, wpb=15175.842, bsz=560.039, num_updates=24867, lr=0.000200534, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7377, train_wall=6672 | |
| epoch 003: 7160 / 8862 loss=4.277, nll_loss=2.643, ppl=6.24, wps=52346, ups=3, wpb=15175.792, bsz=559.962, num_updates=24877, lr=0.000200494, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7380, train_wall=6675 | |
| epoch 003: 7170 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52346, ups=3, wpb=15175.691, bsz=559.833, num_updates=24887, lr=0.000200454, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7383, train_wall=6677 | |
| epoch 003: 7180 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52345, ups=3, wpb=15175.978, bsz=559.996, num_updates=24897, lr=0.000200413, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7386, train_wall=6680 | |
| epoch 003: 7190 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52344, ups=3, wpb=15175.908, bsz=559.932, num_updates=24907, lr=0.000200373, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7389, train_wall=6683 | |
| epoch 003: 7200 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52342, ups=3, wpb=15175.734, bsz=559.860, num_updates=24917, lr=0.000200333, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7391, train_wall=6685 | |
| epoch 003: 7210 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52341, ups=3, wpb=15175.761, bsz=559.878, num_updates=24927, lr=0.000200293, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7394, train_wall=6688 | |
| epoch 003: 7220 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52340, ups=3, wpb=15175.471, bsz=559.820, num_updates=24937, lr=0.000200252, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7397, train_wall=6691 | |
| epoch 003: 7230 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52338, ups=3, wpb=15175.350, bsz=559.824, num_updates=24947, lr=0.000200212, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7400, train_wall=6694 | |
| epoch 003: 7240 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52335, ups=3, wpb=15175.036, bsz=559.988, num_updates=24957, lr=0.000200172, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7403, train_wall=6696 | |
| epoch 003: 7250 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52332, ups=3, wpb=15174.668, bsz=560.036, num_updates=24967, lr=0.000200132, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7406, train_wall=6699 | |
| epoch 003: 7260 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52330, ups=3, wpb=15174.414, bsz=559.980, num_updates=24977, lr=0.000200092, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7409, train_wall=6702 | |
| epoch 003: 7270 / 8862 loss=4.277, nll_loss=2.642, ppl=6.24, wps=52329, ups=3, wpb=15174.518, bsz=560.009, num_updates=24987, lr=0.000200052, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7412, train_wall=6705 | |
| epoch 003: 7280 / 8862 loss=4.276, nll_loss=2.642, ppl=6.24, wps=52329, ups=3, wpb=15174.909, bsz=560.014, num_updates=24997, lr=0.000200012, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7415, train_wall=6707 | |
| epoch 003: 7290 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52327, ups=3, wpb=15174.683, bsz=560.064, num_updates=25007, lr=0.000199972, gnorm=0.506, clip=0.000, oom=0.000, loss_scale=8.000, wall=7418, train_wall=6710 | |
| epoch 003: 7300 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52327, ups=3, wpb=15175.060, bsz=560.098, num_updates=25017, lr=0.000199932, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7421, train_wall=6713 | |
| epoch 003: 7310 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52325, ups=3, wpb=15174.983, bsz=560.049, num_updates=25027, lr=0.000199892, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7424, train_wall=6715 | |
| epoch 003: 7320 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52325, ups=3, wpb=15175.191, bsz=560.066, num_updates=25037, lr=0.000199852, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7427, train_wall=6718 | |
| epoch 003: 7330 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52323, ups=3, wpb=15175.017, bsz=560.082, num_updates=25047, lr=0.000199812, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7430, train_wall=6721 | |
| epoch 003: 7340 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52323, ups=3, wpb=15175.417, bsz=560.140, num_updates=25057, lr=0.000199772, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7433, train_wall=6724 | |
| epoch 003: 7350 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52322, ups=3, wpb=15175.715, bsz=560.253, num_updates=25067, lr=0.000199733, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7436, train_wall=6726 | |
| epoch 003: 7360 / 8862 loss=4.276, nll_loss=2.641, ppl=6.24, wps=52322, ups=3, wpb=15175.974, bsz=560.200, num_updates=25077, lr=0.000199693, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7439, train_wall=6729 | |
| epoch 003: 7370 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52321, ups=3, wpb=15176.170, bsz=560.211, num_updates=25087, lr=0.000199653, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7442, train_wall=6732 | |
| epoch 003: 7380 / 8862 loss=4.275, nll_loss=2.640, ppl=6.24, wps=52320, ups=3, wpb=15175.974, bsz=560.163, num_updates=25097, lr=0.000199613, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7445, train_wall=6735 | |
| epoch 003: 7390 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52319, ups=3, wpb=15176.138, bsz=560.156, num_updates=25107, lr=0.000199573, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7448, train_wall=6737 | |
| epoch 003: 7400 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52319, ups=3, wpb=15176.373, bsz=560.136, num_updates=25117, lr=0.000199534, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7451, train_wall=6740 | |
| epoch 003: 7410 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52320, ups=3, wpb=15177.079, bsz=560.140, num_updates=25127, lr=0.000199494, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7453, train_wall=6743 | |
| epoch 003: 7420 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52318, ups=3, wpb=15176.841, bsz=560.098, num_updates=25137, lr=0.000199454, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7456, train_wall=6746 | |
| epoch 003: 7430 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52316, ups=3, wpb=15176.924, bsz=560.194, num_updates=25147, lr=0.000199415, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7459, train_wall=6748 | |
| epoch 003: 7440 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52314, ups=3, wpb=15176.712, bsz=560.113, num_updates=25157, lr=0.000199375, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7462, train_wall=6751 | |
| epoch 003: 7450 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52314, ups=3, wpb=15176.794, bsz=560.154, num_updates=25167, lr=0.000199335, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7465, train_wall=6754 | |
| epoch 003: 7460 / 8862 loss=4.275, nll_loss=2.640, ppl=6.23, wps=52311, ups=3, wpb=15176.226, bsz=560.139, num_updates=25177, lr=0.000199296, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7468, train_wall=6756 | |
| epoch 003: 7470 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52310, ups=3, wpb=15176.615, bsz=560.152, num_updates=25187, lr=0.000199256, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7471, train_wall=6759 | |
| epoch 003: 7480 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52309, ups=3, wpb=15176.463, bsz=560.053, num_updates=25197, lr=0.000199217, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7474, train_wall=6762 | |
| epoch 003: 7490 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52310, ups=3, wpb=15177.112, bsz=559.990, num_updates=25207, lr=0.000199177, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7477, train_wall=6765 | |
| epoch 003: 7500 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52308, ups=3, wpb=15176.777, bsz=559.979, num_updates=25217, lr=0.000199138, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7480, train_wall=6767 | |
| epoch 003: 7510 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52308, ups=3, wpb=15176.936, bsz=559.846, num_updates=25227, lr=0.000199098, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7483, train_wall=6770 | |
| epoch 003: 7520 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52307, ups=3, wpb=15176.928, bsz=559.858, num_updates=25237, lr=0.000199059, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7486, train_wall=6773 | |
| epoch 003: 7530 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52305, ups=3, wpb=15176.613, bsz=559.764, num_updates=25247, lr=0.000199019, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7489, train_wall=6776 | |
| epoch 003: 7540 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52304, ups=3, wpb=15176.514, bsz=559.722, num_updates=25257, lr=0.00019898, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7492, train_wall=6778 | |
| epoch 003: 7550 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52304, ups=3, wpb=15176.545, bsz=559.609, num_updates=25267, lr=0.00019894, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7495, train_wall=6781 | |
| epoch 003: 7560 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52301, ups=3, wpb=15176.043, bsz=559.557, num_updates=25277, lr=0.000198901, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7498, train_wall=6784 | |
| epoch 003: 7570 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52300, ups=3, wpb=15175.964, bsz=559.508, num_updates=25287, lr=0.000198862, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7501, train_wall=6786 | |
| epoch 003: 7580 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52299, ups=3, wpb=15175.928, bsz=559.601, num_updates=25297, lr=0.000198822, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7504, train_wall=6789 | |
| epoch 003: 7590 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52299, ups=3, wpb=15176.039, bsz=559.649, num_updates=25307, lr=0.000198783, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7506, train_wall=6792 | |
| epoch 003: 7600 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52299, ups=3, wpb=15176.384, bsz=559.640, num_updates=25317, lr=0.000198744, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7509, train_wall=6795 | |
| epoch 003: 7610 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52296, ups=3, wpb=15175.717, bsz=559.555, num_updates=25327, lr=0.000198705, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7512, train_wall=6797 | |
| epoch 003: 7620 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52294, ups=3, wpb=15175.526, bsz=559.451, num_updates=25337, lr=0.000198665, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7515, train_wall=6800 | |
| epoch 003: 7630 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52294, ups=3, wpb=15175.722, bsz=559.374, num_updates=25347, lr=0.000198626, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7518, train_wall=6803 | |
| epoch 003: 7640 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52293, ups=3, wpb=15175.738, bsz=559.353, num_updates=25357, lr=0.000198587, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7521, train_wall=6805 | |
| epoch 003: 7650 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52293, ups=3, wpb=15176.079, bsz=559.347, num_updates=25367, lr=0.000198548, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7524, train_wall=6808 | |
| epoch 003: 7660 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52291, ups=3, wpb=15176.005, bsz=559.270, num_updates=25377, lr=0.000198509, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7527, train_wall=6811 | |
| epoch 003: 7670 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52291, ups=3, wpb=15176.171, bsz=559.331, num_updates=25387, lr=0.00019847, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7530, train_wall=6814 | |
| epoch 003: 7680 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52290, ups=3, wpb=15176.415, bsz=559.335, num_updates=25397, lr=0.000198431, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7533, train_wall=6816 | |
| epoch 003: 7690 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52289, ups=3, wpb=15176.500, bsz=559.425, num_updates=25407, lr=0.000198392, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7536, train_wall=6819 | |
| epoch 003: 7700 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52289, ups=3, wpb=15176.844, bsz=559.335, num_updates=25417, lr=0.000198353, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7539, train_wall=6822 | |
| epoch 003: 7710 / 8862 loss=4.274, nll_loss=2.638, ppl=6.23, wps=52288, ups=3, wpb=15176.941, bsz=559.310, num_updates=25427, lr=0.000198314, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7542, train_wall=6825 | |
| epoch 003: 7720 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52286, ups=3, wpb=15176.645, bsz=559.270, num_updates=25437, lr=0.000198275, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7545, train_wall=6827 | |
| epoch 003: 7730 / 8862 loss=4.274, nll_loss=2.639, ppl=6.23, wps=52285, ups=3, wpb=15176.713, bsz=559.191, num_updates=25447, lr=0.000198236, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7548, train_wall=6830 | |
| epoch 003: 7740 / 8862 loss=4.273, nll_loss=2.638, ppl=6.23, wps=52284, ups=3, wpb=15176.639, bsz=559.214, num_updates=25457, lr=0.000198197, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7551, train_wall=6833 | |
| epoch 003: 7750 / 8862 loss=4.273, nll_loss=2.638, ppl=6.23, wps=52283, ups=3, wpb=15176.836, bsz=559.191, num_updates=25467, lr=0.000198158, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7554, train_wall=6835 | |
| epoch 003: 7760 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52283, ups=3, wpb=15177.258, bsz=559.353, num_updates=25477, lr=0.000198119, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7557, train_wall=6838 | |
| epoch 003: 7770 / 8862 loss=4.273, nll_loss=2.638, ppl=6.23, wps=52281, ups=3, wpb=15176.967, bsz=559.292, num_updates=25487, lr=0.00019808, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7560, train_wall=6841 | |
| epoch 003: 7780 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52281, ups=3, wpb=15177.106, bsz=559.281, num_updates=25497, lr=0.000198041, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7562, train_wall=6844 | |
| epoch 003: 7790 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52280, ups=3, wpb=15176.977, bsz=559.283, num_updates=25507, lr=0.000198002, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7565, train_wall=6846 | |
| epoch 003: 7800 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52281, ups=3, wpb=15177.460, bsz=559.400, num_updates=25517, lr=0.000197964, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7568, train_wall=6849 | |
| epoch 003: 7810 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52280, ups=3, wpb=15177.339, bsz=559.276, num_updates=25527, lr=0.000197925, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7571, train_wall=6852 | |
| epoch 003: 7820 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52278, ups=3, wpb=15176.954, bsz=559.198, num_updates=25537, lr=0.000197886, gnorm=0.505, clip=0.000, oom=0.000, loss_scale=8.000, wall=7574, train_wall=6855 | |
| epoch 003: 7830 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52277, ups=3, wpb=15177.004, bsz=559.261, num_updates=25547, lr=0.000197847, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7577, train_wall=6857 | |
| epoch 003: 7840 / 8862 loss=4.273, nll_loss=2.638, ppl=6.22, wps=52276, ups=3, wpb=15176.780, bsz=559.171, num_updates=25557, lr=0.000197809, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7580, train_wall=6860 | |
| epoch 003: 7850 / 8862 loss=4.273, nll_loss=2.637, ppl=6.22, wps=52273, ups=3, wpb=15176.651, bsz=559.317, num_updates=25567, lr=0.00019777, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7583, train_wall=6863 | |
| epoch 003: 7860 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52273, ups=3, wpb=15176.808, bsz=559.356, num_updates=25577, lr=0.000197731, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7586, train_wall=6865 | |
| epoch 003: 7870 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52272, ups=3, wpb=15176.795, bsz=559.350, num_updates=25587, lr=0.000197693, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7589, train_wall=6868 | |
| epoch 003: 7880 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52271, ups=3, wpb=15176.816, bsz=559.313, num_updates=25597, lr=0.000197654, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7592, train_wall=6871 | |
| epoch 003: 7890 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52269, ups=3, wpb=15176.561, bsz=559.335, num_updates=25607, lr=0.000197615, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7595, train_wall=6874 | |
| epoch 003: 7900 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52269, ups=3, wpb=15176.895, bsz=559.301, num_updates=25617, lr=0.000197577, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7598, train_wall=6876 | |
| epoch 003: 7910 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52267, ups=3, wpb=15176.862, bsz=559.398, num_updates=25627, lr=0.000197538, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7601, train_wall=6879 | |
| epoch 003: 7920 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52266, ups=3, wpb=15176.796, bsz=559.384, num_updates=25637, lr=0.0001975, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7604, train_wall=6882 | |
| epoch 003: 7930 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52264, ups=3, wpb=15176.114, bsz=559.301, num_updates=25647, lr=0.000197461, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7607, train_wall=6885 | |
| epoch 003: 7940 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52262, ups=3, wpb=15176.094, bsz=559.375, num_updates=25657, lr=0.000197423, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7610, train_wall=6887 | |
| epoch 003: 7950 / 8862 loss=4.272, nll_loss=2.637, ppl=6.22, wps=52263, ups=3, wpb=15176.563, bsz=559.375, num_updates=25667, lr=0.000197384, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7613, train_wall=6890 | |
| epoch 003: 7960 / 8862 loss=4.272, nll_loss=2.636, ppl=6.22, wps=52262, ups=3, wpb=15176.817, bsz=559.323, num_updates=25677, lr=0.000197346, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7616, train_wall=6893 | |
| epoch 003: 7970 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52262, ups=3, wpb=15176.910, bsz=559.286, num_updates=25687, lr=0.000197307, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7618, train_wall=6895 | |
| epoch 003: 7980 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52261, ups=3, wpb=15177.029, bsz=559.247, num_updates=25697, lr=0.000197269, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7621, train_wall=6898 | |
| epoch 003: 7990 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52260, ups=3, wpb=15177.083, bsz=559.285, num_updates=25707, lr=0.000197231, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7624, train_wall=6901 | |
| epoch 003: 8000 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52260, ups=3, wpb=15177.239, bsz=559.265, num_updates=25717, lr=0.000197192, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7627, train_wall=6904 | |
| epoch 003: 8010 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52261, ups=3, wpb=15177.644, bsz=559.186, num_updates=25727, lr=0.000197154, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7630, train_wall=6906 | |
| epoch 003: 8020 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52260, ups=3, wpb=15177.608, bsz=559.207, num_updates=25737, lr=0.000197116, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7633, train_wall=6909 | |
| epoch 003: 8030 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52258, ups=3, wpb=15177.400, bsz=559.167, num_updates=25747, lr=0.000197077, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7636, train_wall=6912 | |
| epoch 003: 8040 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52257, ups=3, wpb=15177.506, bsz=559.143, num_updates=25757, lr=0.000197039, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7639, train_wall=6915 | |
| epoch 003: 8050 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52256, ups=3, wpb=15177.309, bsz=559.066, num_updates=25767, lr=0.000197001, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7642, train_wall=6917 | |
| epoch 003: 8060 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52255, ups=3, wpb=15177.408, bsz=559.096, num_updates=25777, lr=0.000196963, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7645, train_wall=6920 | |
| epoch 003: 8070 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52255, ups=3, wpb=15177.568, bsz=559.016, num_updates=25787, lr=0.000196924, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7648, train_wall=6923 | |
| epoch 003: 8080 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52254, ups=3, wpb=15177.661, bsz=559.068, num_updates=25797, lr=0.000196886, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7651, train_wall=6925 | |
| epoch 003: 8090 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52253, ups=3, wpb=15177.700, bsz=559.087, num_updates=25807, lr=0.000196848, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7654, train_wall=6928 | |
| epoch 003: 8100 / 8862 loss=4.271, nll_loss=2.636, ppl=6.22, wps=52252, ups=3, wpb=15177.821, bsz=559.118, num_updates=25817, lr=0.00019681, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7657, train_wall=6931 | |
| epoch 003: 8110 / 8862 loss=4.271, nll_loss=2.636, ppl=6.21, wps=52252, ups=3, wpb=15178.066, bsz=559.190, num_updates=25827, lr=0.000196772, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7660, train_wall=6934 | |
| epoch 003: 8120 / 8862 loss=4.271, nll_loss=2.636, ppl=6.21, wps=52252, ups=3, wpb=15178.400, bsz=559.098, num_updates=25837, lr=0.000196734, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7663, train_wall=6936 | |
| epoch 003: 8130 / 8862 loss=4.271, nll_loss=2.636, ppl=6.21, wps=52250, ups=3, wpb=15178.759, bsz=559.387, num_updates=25847, lr=0.000196696, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7666, train_wall=6939 | |
| epoch 003: 8140 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52249, ups=3, wpb=15178.807, bsz=559.453, num_updates=25857, lr=0.000196658, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7669, train_wall=6942 | |
| epoch 003: 8150 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52248, ups=3, wpb=15178.873, bsz=559.373, num_updates=25867, lr=0.00019662, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7672, train_wall=6945 | |
| epoch 003: 8160 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52247, ups=3, wpb=15178.816, bsz=559.414, num_updates=25877, lr=0.000196582, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7675, train_wall=6947 | |
| epoch 003: 8170 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52246, ups=3, wpb=15178.737, bsz=559.570, num_updates=25887, lr=0.000196544, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7678, train_wall=6950 | |
| epoch 003: 8180 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52245, ups=3, wpb=15178.643, bsz=559.433, num_updates=25897, lr=0.000196506, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7680, train_wall=6953 | |
| epoch 003: 8190 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52242, ups=3, wpb=15177.987, bsz=559.394, num_updates=25907, lr=0.000196468, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7683, train_wall=6956 | |
| epoch 003: 8200 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52242, ups=3, wpb=15178.240, bsz=559.370, num_updates=25917, lr=0.00019643, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7686, train_wall=6958 | |
| epoch 003: 8210 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52238, ups=3, wpb=15177.255, bsz=559.296, num_updates=25927, lr=0.000196392, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7689, train_wall=6961 | |
| epoch 003: 8220 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52238, ups=3, wpb=15177.176, bsz=559.235, num_updates=25937, lr=0.000196354, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7692, train_wall=6964 | |
| epoch 003: 8230 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52237, ups=3, wpb=15177.284, bsz=559.225, num_updates=25947, lr=0.000196316, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7695, train_wall=6966 | |
| epoch 003: 8240 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52236, ups=3, wpb=15177.344, bsz=559.222, num_updates=25957, lr=0.000196279, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7698, train_wall=6969 | |
| epoch 003: 8250 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52236, ups=3, wpb=15177.419, bsz=559.219, num_updates=25967, lr=0.000196241, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7701, train_wall=6972 | |
| epoch 003: 8260 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52236, ups=3, wpb=15177.605, bsz=559.170, num_updates=25977, lr=0.000196203, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7704, train_wall=6975 | |
| epoch 003: 8270 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52234, ups=3, wpb=15177.437, bsz=559.108, num_updates=25987, lr=0.000196165, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7707, train_wall=6977 | |
| epoch 003: 8280 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52232, ups=3, wpb=15177.231, bsz=559.004, num_updates=25997, lr=0.000196127, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7710, train_wall=6980 | |
| epoch 003: 8290 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52231, ups=3, wpb=15177.469, bsz=558.973, num_updates=26007, lr=0.00019609, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7713, train_wall=6983 | |
| epoch 003: 8300 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52230, ups=3, wpb=15177.508, bsz=558.866, num_updates=26017, lr=0.000196052, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7716, train_wall=6986 | |
| epoch 003: 8310 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52229, ups=3, wpb=15177.446, bsz=558.915, num_updates=26027, lr=0.000196014, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7719, train_wall=6988 | |
| epoch 003: 8320 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52227, ups=3, wpb=15177.312, bsz=558.859, num_updates=26037, lr=0.000195977, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7722, train_wall=6991 | |
| epoch 003: 8330 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52227, ups=3, wpb=15177.432, bsz=558.792, num_updates=26047, lr=0.000195939, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7725, train_wall=6994 | |
| epoch 003: 8340 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52226, ups=3, wpb=15177.513, bsz=558.692, num_updates=26057, lr=0.000195902, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7728, train_wall=6997 | |
| epoch 003: 8350 / 8862 loss=4.271, nll_loss=2.635, ppl=6.21, wps=52225, ups=3, wpb=15177.242, bsz=558.666, num_updates=26067, lr=0.000195864, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7731, train_wall=6999 | |
| epoch 003: 8360 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52223, ups=3, wpb=15177.233, bsz=558.802, num_updates=26077, lr=0.000195826, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7734, train_wall=7002 | |
| epoch 003: 8370 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52221, ups=3, wpb=15176.950, bsz=558.755, num_updates=26087, lr=0.000195789, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7737, train_wall=7005 | |
| epoch 003: 8380 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52219, ups=3, wpb=15176.717, bsz=558.706, num_updates=26097, lr=0.000195751, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7739, train_wall=7008 | |
| epoch 003: 8390 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52219, ups=3, wpb=15177.083, bsz=558.764, num_updates=26107, lr=0.000195714, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7742, train_wall=7010 | |
| epoch 003: 8400 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52218, ups=3, wpb=15176.936, bsz=558.703, num_updates=26117, lr=0.000195676, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7745, train_wall=7013 | |
| epoch 003: 8410 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52216, ups=3, wpb=15176.874, bsz=558.763, num_updates=26127, lr=0.000195639, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7748, train_wall=7016 | |
| epoch 003: 8420 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52215, ups=3, wpb=15176.883, bsz=558.831, num_updates=26137, lr=0.000195601, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7751, train_wall=7019 | |
| epoch 003: 8430 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52215, ups=3, wpb=15177.025, bsz=558.882, num_updates=26147, lr=0.000195564, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7754, train_wall=7021 | |
| epoch 003: 8440 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52214, ups=3, wpb=15176.893, bsz=558.949, num_updates=26157, lr=0.000195527, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7757, train_wall=7024 | |
| epoch 003: 8450 / 8862 loss=4.270, nll_loss=2.635, ppl=6.21, wps=52213, ups=3, wpb=15176.664, bsz=558.966, num_updates=26167, lr=0.000195489, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7760, train_wall=7027 | |
| epoch 003: 8460 / 8862 loss=4.270, nll_loss=2.634, ppl=6.21, wps=52213, ups=3, wpb=15176.677, bsz=558.996, num_updates=26177, lr=0.000195452, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7763, train_wall=7029 | |
| epoch 003: 8470 / 8862 loss=4.270, nll_loss=2.634, ppl=6.21, wps=52213, ups=3, wpb=15176.806, bsz=558.953, num_updates=26187, lr=0.000195415, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7766, train_wall=7032 | |
| epoch 003: 8480 / 8862 loss=4.269, nll_loss=2.634, ppl=6.21, wps=52212, ups=3, wpb=15176.459, bsz=559.019, num_updates=26197, lr=0.000195377, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7769, train_wall=7035 | |
| epoch 003: 8490 / 8862 loss=4.269, nll_loss=2.634, ppl=6.21, wps=52212, ups=3, wpb=15176.522, bsz=558.931, num_updates=26207, lr=0.00019534, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7772, train_wall=7037 | |
| epoch 003: 8500 / 8862 loss=4.269, nll_loss=2.634, ppl=6.21, wps=52212, ups=3, wpb=15176.515, bsz=558.877, num_updates=26217, lr=0.000195303, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7775, train_wall=7040 | |
| epoch 003: 8510 / 8862 loss=4.269, nll_loss=2.634, ppl=6.21, wps=52213, ups=3, wpb=15176.747, bsz=558.839, num_updates=26227, lr=0.000195266, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7778, train_wall=7043 | |
| epoch 003: 8520 / 8862 loss=4.269, nll_loss=2.634, ppl=6.21, wps=52213, ups=3, wpb=15176.856, bsz=558.897, num_updates=26237, lr=0.000195228, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7780, train_wall=7045 | |
| epoch 003: 8530 / 8862 loss=4.269, nll_loss=2.633, ppl=6.21, wps=52212, ups=3, wpb=15176.703, bsz=558.864, num_updates=26247, lr=0.000195191, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7783, train_wall=7048 | |
| epoch 003: 8540 / 8862 loss=4.269, nll_loss=2.633, ppl=6.20, wps=52213, ups=3, wpb=15176.866, bsz=558.861, num_updates=26257, lr=0.000195154, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7786, train_wall=7051 | |
| epoch 003: 8550 / 8862 loss=4.269, nll_loss=2.633, ppl=6.20, wps=52213, ups=3, wpb=15176.920, bsz=558.922, num_updates=26267, lr=0.000195117, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7789, train_wall=7054 | |
| epoch 003: 8560 / 8862 loss=4.269, nll_loss=2.633, ppl=6.20, wps=52213, ups=3, wpb=15176.826, bsz=558.902, num_updates=26277, lr=0.00019508, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7792, train_wall=7056 | |
| epoch 003: 8570 / 8862 loss=4.269, nll_loss=2.633, ppl=6.20, wps=52213, ups=3, wpb=15176.889, bsz=558.794, num_updates=26287, lr=0.000195043, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7795, train_wall=7059 | |
| epoch 003: 8580 / 8862 loss=4.269, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15177.032, bsz=558.725, num_updates=26297, lr=0.000195006, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7798, train_wall=7062 | |
| epoch 003: 8590 / 8862 loss=4.269, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15177.143, bsz=558.678, num_updates=26307, lr=0.000194968, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7801, train_wall=7064 | |
| epoch 003: 8600 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52215, ups=3, wpb=15177.583, bsz=558.696, num_updates=26317, lr=0.000194931, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7804, train_wall=7067 | |
| epoch 003: 8610 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52215, ups=3, wpb=15177.488, bsz=558.605, num_updates=26327, lr=0.000194894, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7807, train_wall=7070 | |
| epoch 003: 8620 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52216, ups=3, wpb=15177.566, bsz=558.507, num_updates=26337, lr=0.000194857, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7810, train_wall=7072 | |
| epoch 003: 8630 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52215, ups=3, wpb=15177.194, bsz=558.498, num_updates=26347, lr=0.00019482, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7812, train_wall=7075 | |
| epoch 003: 8640 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52213, ups=3, wpb=15176.693, bsz=558.463, num_updates=26357, lr=0.000194783, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7815, train_wall=7078 | |
| epoch 003: 8650 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52213, ups=3, wpb=15176.737, bsz=558.407, num_updates=26367, lr=0.000194746, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7818, train_wall=7080 | |
| epoch 003: 8660 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15176.861, bsz=558.432, num_updates=26377, lr=0.00019471, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7821, train_wall=7083 | |
| epoch 003: 8670 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52215, ups=3, wpb=15177.273, bsz=558.403, num_updates=26387, lr=0.000194673, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7824, train_wall=7086 | |
| epoch 003: 8680 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52215, ups=3, wpb=15177.357, bsz=558.409, num_updates=26397, lr=0.000194636, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7827, train_wall=7088 | |
| epoch 003: 8690 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15177.259, bsz=558.423, num_updates=26407, lr=0.000194599, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7830, train_wall=7091 | |
| epoch 003: 8700 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15177.426, bsz=558.478, num_updates=26417, lr=0.000194562, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7833, train_wall=7094 | |
| epoch 003: 8710 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15177.674, bsz=558.452, num_updates=26427, lr=0.000194525, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7836, train_wall=7097 | |
| epoch 003: 8720 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15177.648, bsz=558.453, num_updates=26437, lr=0.000194488, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7839, train_wall=7099 | |
| epoch 003: 8730 / 8862 loss=4.268, nll_loss=2.633, ppl=6.20, wps=52214, ups=3, wpb=15178.066, bsz=558.499, num_updates=26447, lr=0.000194452, gnorm=0.504, clip=0.000, oom=0.000, loss_scale=8.000, wall=7842, train_wall=7102 | |
| epoch 003: 8740 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52214, ups=3, wpb=15178.246, bsz=558.522, num_updates=26457, lr=0.000194415, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7845, train_wall=7105 | |
| epoch 003: 8750 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52214, ups=3, wpb=15178.523, bsz=558.487, num_updates=26467, lr=0.000194378, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7848, train_wall=7108 | |
| epoch 003: 8760 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52213, ups=3, wpb=15178.413, bsz=558.505, num_updates=26477, lr=0.000194342, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7851, train_wall=7110 | |
| epoch 003: 8770 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52212, ups=3, wpb=15178.545, bsz=558.447, num_updates=26487, lr=0.000194305, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7853, train_wall=7113 | |
| epoch 003: 8780 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52212, ups=3, wpb=15178.825, bsz=558.460, num_updates=26497, lr=0.000194268, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7856, train_wall=7116 | |
| epoch 003: 8790 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52211, ups=3, wpb=15178.658, bsz=558.449, num_updates=26507, lr=0.000194232, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7859, train_wall=7118 | |
| epoch 003: 8800 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52208, ups=3, wpb=15177.954, bsz=558.443, num_updates=26517, lr=0.000194195, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7862, train_wall=7121 | |
| epoch 003: 8810 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52207, ups=3, wpb=15177.406, bsz=558.349, num_updates=26527, lr=0.000194158, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7865, train_wall=7124 | |
| epoch 003: 8820 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52206, ups=3, wpb=15177.415, bsz=558.255, num_updates=26537, lr=0.000194122, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7868, train_wall=7127 | |
| epoch 003: 8830 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52206, ups=3, wpb=15177.664, bsz=558.233, num_updates=26547, lr=0.000194085, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7871, train_wall=7129 | |
| epoch 003: 8840 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52206, ups=3, wpb=15177.908, bsz=558.190, num_updates=26557, lr=0.000194049, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7874, train_wall=7132 | |
| epoch 003: 8850 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52206, ups=3, wpb=15177.940, bsz=558.145, num_updates=26567, lr=0.000194012, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7877, train_wall=7135 | |
| epoch 003: 8860 / 8862 loss=4.268, nll_loss=2.632, ppl=6.20, wps=52206, ups=3, wpb=15178.191, bsz=558.088, num_updates=26577, lr=0.000193976, gnorm=0.503, clip=0.000, oom=0.000, loss_scale=8.000, wall=7880, train_wall=7137 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 003 | loss 4.268 | nll_loss 2.632 | ppl 6.20 | wps 52199 | ups 3 | wpb 15177.818 | bsz 558.064 | num_updates 26578 | lr 0.000193972 | gnorm 0.503 | clip 0.000 | oom 0.000 | loss_scale 8.000 | wall 7880 | train_wall 7138 | |
| WARNING: 2459 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[34935, 29199, 25522, 50610, 31640, 50522, 29514, 23772, 21318, 30173] | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 003 | valid on 'valid' subset | loss 4.157 | nll_loss 2.394 | ppl 5.25 | num_updates 26578 | best_loss 4.15746 | |
| epoch 004: 10 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=51242, ups=0, wpb=15124.455, bsz=506.182, num_updates=26589, lr=0.000193932, gnorm=0.515, clip=0.000, oom=0.000, loss_scale=8.000, wall=7930, train_wall=7144 | |
| epoch 004: 20 / 8862 loss=4.158, nll_loss=2.511, ppl=5.70, wps=51126, ups=0, wpb=15050.429, bsz=580.571, num_updates=26599, lr=0.000193895, gnorm=0.498, clip=0.000, oom=0.000, loss_scale=8.000, wall=7933, train_wall=7146 | |
| epoch 004: 30 / 8862 loss=4.197, nll_loss=2.555, ppl=5.88, wps=51030, ups=1, wpb=15060.000, bsz=548.903, num_updates=26609, lr=0.000193859, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=7936, train_wall=7149 | |
| epoch 004: 40 / 8862 loss=4.214, nll_loss=2.574, ppl=5.96, wps=51010, ups=1, wpb=15047.122, bsz=547.707, num_updates=26619, lr=0.000193822, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=7939, train_wall=7152 | |
| epoch 004: 50 / 8862 loss=4.192, nll_loss=2.550, ppl=5.86, wps=51061, ups=1, wpb=15063.667, bsz=558.275, num_updates=26629, lr=0.000193786, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=7941, train_wall=7154 | |
| epoch 004: 60 / 8862 loss=4.182, nll_loss=2.538, ppl=5.81, wps=51124, ups=1, wpb=15058.344, bsz=547.934, num_updates=26639, lr=0.00019375, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=7944, train_wall=7157 | |
| epoch 004: 70 / 8862 loss=4.176, nll_loss=2.532, ppl=5.78, wps=51152, ups=1, wpb=15062.606, bsz=551.887, num_updates=26649, lr=0.000193713, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=8.000, wall=7947, train_wall=7160 | |
| epoch 004: 80 / 8862 loss=4.179, nll_loss=2.534, ppl=5.79, wps=51264, ups=1, wpb=15093.568, bsz=553.284, num_updates=26659, lr=0.000193677, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=8.000, wall=7950, train_wall=7163 | |
| epoch 004: 90 / 8862 loss=4.165, nll_loss=2.519, ppl=5.73, wps=51297, ups=1, wpb=15116.604, bsz=564.484, num_updates=26669, lr=0.000193641, gnorm=0.483, clip=0.000, oom=0.000, loss_scale=8.000, wall=7953, train_wall=7165 | |
| epoch 004: 100 / 8862 loss=4.163, nll_loss=2.516, ppl=5.72, wps=51361, ups=1, wpb=15131.931, bsz=563.485, num_updates=26679, lr=0.000193604, gnorm=0.484, clip=0.000, oom=0.000, loss_scale=8.000, wall=7956, train_wall=7168 | |
| epoch 004: 110 / 8862 loss=4.176, nll_loss=2.532, ppl=5.78, wps=51385, ups=1, wpb=15128.252, bsz=553.586, num_updates=26689, lr=0.000193568, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=8.000, wall=7959, train_wall=7171 | |
| epoch 004: 120 / 8862 loss=4.181, nll_loss=2.537, ppl=5.80, wps=51527, ups=1, wpb=15157.876, bsz=549.421, num_updates=26699, lr=0.000193532, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=8.000, wall=7962, train_wall=7173 | |
| epoch 004: 130 / 8862 loss=4.178, nll_loss=2.534, ppl=5.79, wps=51535, ups=2, wpb=15148.015, bsz=555.908, num_updates=26709, lr=0.000193496, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=7965, train_wall=7176 | |
| epoch 004: 140 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51644, ups=2, wpb=15166.965, bsz=554.270, num_updates=26719, lr=0.000193459, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=7968, train_wall=7179 | |
| epoch 004: 150 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=51700, ups=2, wpb=15174.238, bsz=555.762, num_updates=26729, lr=0.000193423, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=7971, train_wall=7182 | |
| epoch 004: 160 / 8862 loss=4.164, nll_loss=2.517, ppl=5.73, wps=51696, ups=2, wpb=15174.621, bsz=564.373, num_updates=26739, lr=0.000193387, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=7974, train_wall=7184 | |
| epoch 004: 170 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=51730, ups=2, wpb=15176.146, bsz=562.667, num_updates=26749, lr=0.000193351, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=7977, train_wall=7187 | |
| epoch 004: 180 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=51793, ups=2, wpb=15184.536, bsz=558.983, num_updates=26759, lr=0.000193315, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=7979, train_wall=7190 | |
| epoch 004: 190 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51855, ups=2, wpb=15204.686, bsz=563.560, num_updates=26769, lr=0.000193279, gnorm=0.497, clip=0.000, oom=0.000, loss_scale=8.000, wall=7982, train_wall=7192 | |
| epoch 004: 200 / 8862 loss=4.173, nll_loss=2.527, ppl=5.77, wps=51848, ups=2, wpb=15195.478, bsz=562.269, num_updates=26779, lr=0.000193243, gnorm=0.496, clip=0.000, oom=0.000, loss_scale=8.000, wall=7985, train_wall=7195 | |
| epoch 004: 210 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51891, ups=2, wpb=15201.118, bsz=558.028, num_updates=26789, lr=0.000193207, gnorm=0.496, clip=0.000, oom=0.000, loss_scale=8.000, wall=7988, train_wall=7198 | |
| epoch 004: 220 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=51869, ups=2, wpb=15187.855, bsz=556.706, num_updates=26799, lr=0.00019317, gnorm=0.496, clip=0.000, oom=0.000, loss_scale=8.000, wall=7991, train_wall=7200 | |
| epoch 004: 230 / 8862 loss=4.163, nll_loss=2.516, ppl=5.72, wps=51778, ups=2, wpb=15185.251, bsz=569.905, num_updates=26809, lr=0.000193134, gnorm=0.496, clip=0.000, oom=0.000, loss_scale=8.000, wall=7994, train_wall=7203 | |
| epoch 004: 240 / 8862 loss=4.162, nll_loss=2.515, ppl=5.72, wps=51803, ups=2, wpb=15187.494, bsz=571.917, num_updates=26819, lr=0.000193098, gnorm=0.496, clip=0.000, oom=0.000, loss_scale=8.000, wall=7997, train_wall=7206 | |
| epoch 004: 250 / 8862 loss=4.164, nll_loss=2.518, ppl=5.73, wps=51795, ups=2, wpb=15179.685, bsz=569.275, num_updates=26829, lr=0.000193062, gnorm=0.496, clip=0.000, oom=0.000, loss_scale=8.000, wall=8000, train_wall=7209 | |
| epoch 004: 260 / 8862 loss=4.164, nll_loss=2.518, ppl=5.73, wps=51804, ups=2, wpb=15175.563, bsz=569.226, num_updates=26839, lr=0.000193026, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8003, train_wall=7211 | |
| epoch 004: 270 / 8862 loss=4.164, nll_loss=2.518, ppl=5.73, wps=51836, ups=2, wpb=15180.923, bsz=569.801, num_updates=26849, lr=0.000192991, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8006, train_wall=7214 | |
| epoch 004: 280 / 8862 loss=4.160, nll_loss=2.513, ppl=5.71, wps=51872, ups=2, wpb=15186.249, bsz=568.399, num_updates=26859, lr=0.000192955, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8009, train_wall=7217 | |
| epoch 004: 290 / 8862 loss=4.160, nll_loss=2.513, ppl=5.71, wps=51907, ups=2, wpb=15189.162, bsz=566.378, num_updates=26869, lr=0.000192919, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8012, train_wall=7219 | |
| epoch 004: 300 / 8862 loss=4.157, nll_loss=2.510, ppl=5.70, wps=51881, ups=2, wpb=15181.870, bsz=566.113, num_updates=26879, lr=0.000192883, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8014, train_wall=7222 | |
| epoch 004: 310 / 8862 loss=4.162, nll_loss=2.515, ppl=5.72, wps=51828, ups=2, wpb=15161.839, bsz=567.871, num_updates=26889, lr=0.000192847, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8017, train_wall=7225 | |
| epoch 004: 320 / 8862 loss=4.160, nll_loss=2.513, ppl=5.71, wps=51859, ups=2, wpb=15169.340, bsz=567.153, num_updates=26899, lr=0.000192811, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8020, train_wall=7227 | |
| epoch 004: 330 / 8862 loss=4.157, nll_loss=2.510, ppl=5.70, wps=51885, ups=2, wpb=15176.453, bsz=569.523, num_updates=26909, lr=0.000192775, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8023, train_wall=7230 | |
| epoch 004: 340 / 8862 loss=4.158, nll_loss=2.511, ppl=5.70, wps=51876, ups=2, wpb=15168.850, bsz=568.352, num_updates=26919, lr=0.000192739, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8026, train_wall=7233 | |
| epoch 004: 350 / 8862 loss=4.157, nll_loss=2.510, ppl=5.70, wps=51889, ups=2, wpb=15169.077, bsz=568.729, num_updates=26929, lr=0.000192704, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8029, train_wall=7235 | |
| epoch 004: 360 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51856, ups=2, wpb=15160.003, bsz=567.601, num_updates=26939, lr=0.000192668, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8032, train_wall=7238 | |
| epoch 004: 370 / 8862 loss=4.156, nll_loss=2.509, ppl=5.69, wps=51857, ups=2, wpb=15165.199, bsz=570.544, num_updates=26949, lr=0.000192632, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8035, train_wall=7241 | |
| epoch 004: 380 / 8862 loss=4.155, nll_loss=2.508, ppl=5.69, wps=51844, ups=2, wpb=15157.798, bsz=569.449, num_updates=26959, lr=0.000192596, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8038, train_wall=7244 | |
| epoch 004: 390 / 8862 loss=4.156, nll_loss=2.509, ppl=5.69, wps=51847, ups=2, wpb=15155.432, bsz=567.877, num_updates=26969, lr=0.000192561, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8041, train_wall=7246 | |
| epoch 004: 400 / 8862 loss=4.157, nll_loss=2.510, ppl=5.70, wps=51856, ups=2, wpb=15154.369, bsz=568.219, num_updates=26979, lr=0.000192525, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8044, train_wall=7249 | |
| epoch 004: 410 / 8862 loss=4.156, nll_loss=2.509, ppl=5.69, wps=51848, ups=2, wpb=15150.389, bsz=567.280, num_updates=26989, lr=0.000192489, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8047, train_wall=7252 | |
| epoch 004: 420 / 8862 loss=4.157, nll_loss=2.510, ppl=5.69, wps=51851, ups=2, wpb=15151.758, bsz=567.582, num_updates=26999, lr=0.000192454, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8049, train_wall=7254 | |
| epoch 004: 430 / 8862 loss=4.158, nll_loss=2.511, ppl=5.70, wps=51898, ups=3, wpb=15159.770, bsz=565.309, num_updates=27009, lr=0.000192418, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8052, train_wall=7257 | |
| epoch 004: 440 / 8862 loss=4.160, nll_loss=2.513, ppl=5.71, wps=51906, ups=3, wpb=15160.342, bsz=563.737, num_updates=27019, lr=0.000192382, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8055, train_wall=7260 | |
| epoch 004: 450 / 8862 loss=4.160, nll_loss=2.513, ppl=5.71, wps=51925, ups=3, wpb=15166.204, bsz=562.678, num_updates=27029, lr=0.000192347, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8058, train_wall=7262 | |
| epoch 004: 460 / 8862 loss=4.161, nll_loss=2.514, ppl=5.71, wps=51944, ups=3, wpb=15168.555, bsz=561.735, num_updates=27039, lr=0.000192311, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8061, train_wall=7265 | |
| epoch 004: 470 / 8862 loss=4.163, nll_loss=2.516, ppl=5.72, wps=51949, ups=3, wpb=15167.811, bsz=560.679, num_updates=27049, lr=0.000192276, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8064, train_wall=7268 | |
| epoch 004: 480 / 8862 loss=4.162, nll_loss=2.515, ppl=5.72, wps=51983, ups=3, wpb=15176.225, bsz=562.012, num_updates=27059, lr=0.00019224, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8067, train_wall=7270 | |
| epoch 004: 490 / 8862 loss=4.161, nll_loss=2.514, ppl=5.71, wps=51992, ups=3, wpb=15179.248, bsz=560.978, num_updates=27069, lr=0.000192205, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8070, train_wall=7273 | |
| epoch 004: 500 / 8862 loss=4.161, nll_loss=2.514, ppl=5.71, wps=52010, ups=3, wpb=15182.936, bsz=559.888, num_updates=27079, lr=0.000192169, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8073, train_wall=7276 | |
| epoch 004: 510 / 8862 loss=4.162, nll_loss=2.515, ppl=5.72, wps=52007, ups=3, wpb=15181.172, bsz=559.656, num_updates=27089, lr=0.000192134, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8076, train_wall=7278 | |
| epoch 004: 520 / 8862 loss=4.163, nll_loss=2.517, ppl=5.72, wps=52034, ups=3, wpb=15187.983, bsz=558.188, num_updates=27099, lr=0.000192098, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8078, train_wall=7281 | |
| epoch 004: 530 / 8862 loss=4.165, nll_loss=2.519, ppl=5.73, wps=52022, ups=3, wpb=15182.452, bsz=558.102, num_updates=27109, lr=0.000192063, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8081, train_wall=7284 | |
| epoch 004: 540 / 8862 loss=4.166, nll_loss=2.520, ppl=5.74, wps=52025, ups=3, wpb=15180.850, bsz=557.516, num_updates=27119, lr=0.000192027, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8084, train_wall=7287 | |
| epoch 004: 550 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=52040, ups=3, wpb=15182.806, bsz=557.067, num_updates=27129, lr=0.000191992, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8087, train_wall=7289 | |
| epoch 004: 560 / 8862 loss=4.168, nll_loss=2.522, ppl=5.74, wps=52030, ups=3, wpb=15177.451, bsz=558.146, num_updates=27139, lr=0.000191957, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8090, train_wall=7292 | |
| epoch 004: 570 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=52033, ups=3, wpb=15174.963, bsz=557.562, num_updates=27149, lr=0.000191921, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8093, train_wall=7295 | |
| epoch 004: 580 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52029, ups=3, wpb=15174.193, bsz=556.489, num_updates=27159, lr=0.000191886, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8096, train_wall=7297 | |
| epoch 004: 590 / 8862 loss=4.169, nll_loss=2.523, ppl=5.75, wps=52016, ups=3, wpb=15176.355, bsz=557.171, num_updates=27169, lr=0.000191851, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8099, train_wall=7300 | |
| epoch 004: 600 / 8862 loss=4.169, nll_loss=2.523, ppl=5.75, wps=52002, ups=3, wpb=15173.075, bsz=556.326, num_updates=27179, lr=0.000191815, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8102, train_wall=7303 | |
| epoch 004: 610 / 8862 loss=4.168, nll_loss=2.522, ppl=5.74, wps=52006, ups=3, wpb=15173.542, bsz=557.329, num_updates=27189, lr=0.00019178, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8105, train_wall=7305 | |
| epoch 004: 620 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=52011, ups=3, wpb=15173.253, bsz=557.101, num_updates=27199, lr=0.000191745, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8108, train_wall=7308 | |
| epoch 004: 630 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=52010, ups=3, wpb=15171.079, bsz=557.033, num_updates=27209, lr=0.00019171, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8110, train_wall=7311 | |
| epoch 004: 640 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=52037, ups=3, wpb=15177.696, bsz=556.406, num_updates=27219, lr=0.000191674, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8113, train_wall=7313 | |
| epoch 004: 650 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=52047, ups=3, wpb=15179.545, bsz=555.871, num_updates=27229, lr=0.000191639, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8116, train_wall=7316 | |
| epoch 004: 660 / 8862 loss=4.171, nll_loss=2.525, ppl=5.76, wps=52052, ups=3, wpb=15180.204, bsz=555.498, num_updates=27239, lr=0.000191604, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8119, train_wall=7319 | |
| epoch 004: 670 / 8862 loss=4.169, nll_loss=2.523, ppl=5.75, wps=52066, ups=3, wpb=15186.306, bsz=556.686, num_updates=27249, lr=0.000191569, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8122, train_wall=7322 | |
| epoch 004: 680 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=52063, ups=3, wpb=15184.065, bsz=556.488, num_updates=27259, lr=0.000191534, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8125, train_wall=7324 | |
| epoch 004: 690 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52052, ups=3, wpb=15181.860, bsz=556.515, num_updates=27269, lr=0.000191499, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8128, train_wall=7327 | |
| epoch 004: 700 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52047, ups=3, wpb=15177.970, bsz=555.960, num_updates=27279, lr=0.000191463, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8131, train_wall=7330 | |
| epoch 004: 710 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52040, ups=3, wpb=15174.730, bsz=556.231, num_updates=27289, lr=0.000191428, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8134, train_wall=7332 | |
| epoch 004: 720 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=52053, ups=3, wpb=15177.954, bsz=556.327, num_updates=27299, lr=0.000191393, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8137, train_wall=7335 | |
| epoch 004: 730 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=52066, ups=3, wpb=15182.603, bsz=557.133, num_updates=27309, lr=0.000191358, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8140, train_wall=7338 | |
| epoch 004: 740 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=52060, ups=3, wpb=15178.719, bsz=556.707, num_updates=27319, lr=0.000191323, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8142, train_wall=7340 | |
| epoch 004: 750 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=52059, ups=3, wpb=15176.883, bsz=556.197, num_updates=27329, lr=0.000191288, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8145, train_wall=7343 | |
| epoch 004: 760 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52064, ups=3, wpb=15176.388, bsz=556.026, num_updates=27339, lr=0.000191253, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8148, train_wall=7346 | |
| epoch 004: 770 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52063, ups=3, wpb=15176.821, bsz=555.071, num_updates=27349, lr=0.000191218, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8151, train_wall=7348 | |
| epoch 004: 780 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=52069, ups=3, wpb=15176.988, bsz=553.362, num_updates=27359, lr=0.000191183, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8154, train_wall=7351 | |
| epoch 004: 790 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=52085, ups=3, wpb=15181.556, bsz=553.153, num_updates=27369, lr=0.000191148, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8157, train_wall=7354 | |
| epoch 004: 800 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=52087, ups=3, wpb=15181.527, bsz=552.739, num_updates=27379, lr=0.000191113, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8160, train_wall=7356 | |
| epoch 004: 810 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52082, ups=3, wpb=15178.531, bsz=552.888, num_updates=27389, lr=0.000191079, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8163, train_wall=7359 | |
| epoch 004: 820 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52068, ups=3, wpb=15174.993, bsz=553.140, num_updates=27399, lr=0.000191044, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8166, train_wall=7362 | |
| epoch 004: 830 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52052, ups=3, wpb=15173.176, bsz=554.965, num_updates=27409, lr=0.000191009, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8169, train_wall=7365 | |
| epoch 004: 840 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52049, ups=3, wpb=15170.898, bsz=554.112, num_updates=27419, lr=0.000190974, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8172, train_wall=7367 | |
| epoch 004: 850 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=52053, ups=3, wpb=15171.806, bsz=553.908, num_updates=27429, lr=0.000190939, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8174, train_wall=7370 | |
| epoch 004: 860 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=52053, ups=3, wpb=15171.576, bsz=554.258, num_updates=27439, lr=0.000190904, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8177, train_wall=7373 | |
| epoch 004: 870 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52059, ups=3, wpb=15172.230, bsz=553.984, num_updates=27449, lr=0.00019087, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8180, train_wall=7375 | |
| epoch 004: 880 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52074, ups=3, wpb=15175.501, bsz=553.471, num_updates=27459, lr=0.000190835, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8183, train_wall=7378 | |
| epoch 004: 890 / 8862 loss=4.173, nll_loss=2.527, ppl=5.77, wps=52080, ups=3, wpb=15177.257, bsz=554.604, num_updates=27469, lr=0.0001908, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8186, train_wall=7381 | |
| epoch 004: 900 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=52067, ups=3, wpb=15175.275, bsz=555.898, num_updates=27479, lr=0.000190765, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8189, train_wall=7383 | |
| epoch 004: 910 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52067, ups=3, wpb=15173.752, bsz=555.232, num_updates=27489, lr=0.000190731, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8192, train_wall=7386 | |
| epoch 004: 920 / 8862 loss=4.173, nll_loss=2.527, ppl=5.76, wps=52075, ups=3, wpb=15175.446, bsz=555.084, num_updates=27499, lr=0.000190696, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8195, train_wall=7389 | |
| epoch 004: 930 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52075, ups=3, wpb=15174.077, bsz=554.569, num_updates=27509, lr=0.000190661, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8198, train_wall=7391 | |
| epoch 004: 940 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=52070, ups=3, wpb=15174.209, bsz=555.579, num_updates=27519, lr=0.000190627, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8201, train_wall=7394 | |
| epoch 004: 950 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52077, ups=3, wpb=15175.858, bsz=554.885, num_updates=27529, lr=0.000190592, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8204, train_wall=7397 | |
| epoch 004: 960 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=52080, ups=3, wpb=15175.273, bsz=554.114, num_updates=27539, lr=0.000190557, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8206, train_wall=7399 | |
| epoch 004: 970 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52075, ups=3, wpb=15174.842, bsz=554.142, num_updates=27549, lr=0.000190523, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8209, train_wall=7402 | |
| epoch 004: 980 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=52075, ups=3, wpb=15173.702, bsz=554.879, num_updates=27559, lr=0.000190488, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8212, train_wall=7405 | |
| epoch 004: 990 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=52072, ups=3, wpb=15172.410, bsz=555.043, num_updates=27569, lr=0.000190454, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8215, train_wall=7408 | |
| epoch 004: 1000 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=52080, ups=3, wpb=15175.089, bsz=555.077, num_updates=27579, lr=0.000190419, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8218, train_wall=7410 | |
| epoch 004: 1010 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=52066, ups=3, wpb=15169.812, bsz=554.651, num_updates=27589, lr=0.000190385, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8221, train_wall=7413 | |
| epoch 004: 1020 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52066, ups=3, wpb=15168.228, bsz=554.805, num_updates=27599, lr=0.00019035, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8224, train_wall=7416 | |
| epoch 004: 1030 / 8862 loss=4.173, nll_loss=2.527, ppl=5.77, wps=52067, ups=3, wpb=15169.279, bsz=554.468, num_updates=27609, lr=0.000190316, gnorm=0.495, clip=0.000, oom=0.000, loss_scale=8.000, wall=8227, train_wall=7418 | |
| epoch 004: 1040 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52067, ups=3, wpb=15169.012, bsz=554.144, num_updates=27619, lr=0.000190281, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8230, train_wall=7421 | |
| epoch 004: 1050 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52063, ups=3, wpb=15171.231, bsz=554.428, num_updates=27629, lr=0.000190247, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8233, train_wall=7424 | |
| epoch 004: 1060 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52067, ups=3, wpb=15173.416, bsz=554.066, num_updates=27639, lr=0.000190212, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8236, train_wall=7426 | |
| epoch 004: 1070 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52057, ups=3, wpb=15171.744, bsz=553.337, num_updates=27649, lr=0.000190178, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8239, train_wall=7429 | |
| epoch 004: 1080 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=52051, ups=3, wpb=15171.038, bsz=553.532, num_updates=27659, lr=0.000190144, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8241, train_wall=7432 | |
| epoch 004: 1090 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=52044, ups=3, wpb=15168.752, bsz=552.785, num_updates=27669, lr=0.000190109, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8244, train_wall=7435 | |
| epoch 004: 1100 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=52037, ups=3, wpb=15169.550, bsz=553.446, num_updates=27679, lr=0.000190075, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8247, train_wall=7437 | |
| epoch 004: 1110 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=52034, ups=3, wpb=15169.156, bsz=552.857, num_updates=27689, lr=0.000190041, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8250, train_wall=7440 | |
| epoch 004: 1120 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=52032, ups=3, wpb=15170.394, bsz=553.563, num_updates=27699, lr=0.000190006, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8253, train_wall=7443 | |
| epoch 004: 1130 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=52022, ups=3, wpb=15167.215, bsz=553.245, num_updates=27709, lr=0.000189972, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8256, train_wall=7445 | |
| epoch 004: 1140 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=52017, ups=3, wpb=15166.046, bsz=552.862, num_updates=27719, lr=0.000189938, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8259, train_wall=7448 | |
| epoch 004: 1150 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=52008, ups=3, wpb=15164.385, bsz=552.744, num_updates=27729, lr=0.000189903, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8262, train_wall=7451 | |
| epoch 004: 1160 / 8862 loss=4.176, nll_loss=2.531, ppl=5.78, wps=52004, ups=3, wpb=15162.711, bsz=552.675, num_updates=27739, lr=0.000189869, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8265, train_wall=7454 | |
| epoch 004: 1170 / 8862 loss=4.176, nll_loss=2.531, ppl=5.78, wps=51994, ups=3, wpb=15161.754, bsz=553.004, num_updates=27749, lr=0.000189835, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8268, train_wall=7456 | |
| epoch 004: 1180 / 8862 loss=4.176, nll_loss=2.531, ppl=5.78, wps=51999, ups=3, wpb=15162.383, bsz=552.603, num_updates=27759, lr=0.000189801, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8271, train_wall=7459 | |
| epoch 004: 1190 / 8862 loss=4.175, nll_loss=2.531, ppl=5.78, wps=51995, ups=3, wpb=15162.107, bsz=553.263, num_updates=27769, lr=0.000189767, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8274, train_wall=7462 | |
| epoch 004: 1200 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=51976, ups=3, wpb=15161.026, bsz=555.271, num_updates=27779, lr=0.000189732, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8277, train_wall=7465 | |
| epoch 004: 1210 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=51965, ups=3, wpb=15158.985, bsz=556.010, num_updates=27789, lr=0.000189698, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8280, train_wall=7467 | |
| epoch 004: 1220 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=51963, ups=3, wpb=15158.145, bsz=556.390, num_updates=27799, lr=0.000189664, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8283, train_wall=7470 | |
| epoch 004: 1230 / 8862 loss=4.175, nll_loss=2.531, ppl=5.78, wps=51957, ups=3, wpb=15156.680, bsz=556.744, num_updates=27809, lr=0.00018963, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8286, train_wall=7473 | |
| epoch 004: 1240 / 8862 loss=4.175, nll_loss=2.531, ppl=5.78, wps=51956, ups=3, wpb=15157.515, bsz=556.519, num_updates=27819, lr=0.000189596, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8288, train_wall=7475 | |
| epoch 004: 1250 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51960, ups=3, wpb=15159.990, bsz=557.340, num_updates=27829, lr=0.000189562, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8291, train_wall=7478 | |
| epoch 004: 1260 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51956, ups=3, wpb=15159.780, bsz=556.980, num_updates=27839, lr=0.000189528, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8294, train_wall=7481 | |
| epoch 004: 1270 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51950, ups=3, wpb=15159.355, bsz=557.489, num_updates=27849, lr=0.000189494, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8297, train_wall=7484 | |
| epoch 004: 1280 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51948, ups=3, wpb=15160.241, bsz=558.027, num_updates=27859, lr=0.00018946, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8300, train_wall=7486 | |
| epoch 004: 1290 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51947, ups=3, wpb=15161.465, bsz=557.757, num_updates=27869, lr=0.000189426, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8303, train_wall=7489 | |
| epoch 004: 1300 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51940, ups=3, wpb=15160.590, bsz=557.442, num_updates=27879, lr=0.000189392, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8306, train_wall=7492 | |
| epoch 004: 1310 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=51943, ups=3, wpb=15160.314, bsz=556.784, num_updates=27889, lr=0.000189358, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8309, train_wall=7494 | |
| epoch 004: 1320 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=51944, ups=3, wpb=15162.480, bsz=557.075, num_updates=27899, lr=0.000189324, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8312, train_wall=7497 | |
| epoch 004: 1330 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=51941, ups=3, wpb=15161.367, bsz=557.283, num_updates=27909, lr=0.00018929, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8315, train_wall=7500 | |
| epoch 004: 1340 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51933, ups=3, wpb=15161.375, bsz=556.862, num_updates=27919, lr=0.000189256, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8318, train_wall=7503 | |
| epoch 004: 1350 / 8862 loss=4.175, nll_loss=2.530, ppl=5.78, wps=51929, ups=3, wpb=15159.045, bsz=556.021, num_updates=27929, lr=0.000189222, gnorm=0.494, clip=0.000, oom=0.000, loss_scale=8.000, wall=8321, train_wall=7505 | |
| epoch 004: 1360 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51928, ups=3, wpb=15159.669, bsz=556.191, num_updates=27939, lr=0.000189188, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8324, train_wall=7508 | |
| epoch 004: 1370 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51926, ups=3, wpb=15159.531, bsz=556.254, num_updates=27949, lr=0.000189155, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8327, train_wall=7511 | |
| epoch 004: 1380 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=51928, ups=3, wpb=15160.755, bsz=556.495, num_updates=27959, lr=0.000189121, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8330, train_wall=7513 | |
| epoch 004: 1390 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=51925, ups=3, wpb=15161.474, bsz=556.664, num_updates=27969, lr=0.000189087, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8333, train_wall=7516 | |
| epoch 004: 1400 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=51922, ups=3, wpb=15160.755, bsz=556.397, num_updates=27979, lr=0.000189053, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8335, train_wall=7519 | |
| epoch 004: 1410 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51920, ups=3, wpb=15159.821, bsz=556.184, num_updates=27989, lr=0.000189019, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8338, train_wall=7522 | |
| epoch 004: 1420 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51914, ups=3, wpb=15158.327, bsz=555.901, num_updates=27999, lr=0.000188986, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8341, train_wall=7524 | |
| epoch 004: 1430 / 8862 loss=4.174, nll_loss=2.529, ppl=5.77, wps=51921, ups=3, wpb=15161.396, bsz=555.885, num_updates=28009, lr=0.000188952, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8344, train_wall=7527 | |
| epoch 004: 1440 / 8862 loss=4.173, nll_loss=2.529, ppl=5.77, wps=51922, ups=3, wpb=15162.286, bsz=555.381, num_updates=28019, lr=0.000188918, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8347, train_wall=7530 | |
| epoch 004: 1450 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=51925, ups=3, wpb=15164.271, bsz=555.088, num_updates=28029, lr=0.000188884, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8350, train_wall=7532 | |
| epoch 004: 1460 / 8862 loss=4.172, nll_loss=2.527, ppl=5.77, wps=51924, ups=3, wpb=15164.584, bsz=555.034, num_updates=28039, lr=0.000188851, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8353, train_wall=7535 | |
| epoch 004: 1470 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51919, ups=3, wpb=15164.611, bsz=554.692, num_updates=28049, lr=0.000188817, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8356, train_wall=7538 | |
| epoch 004: 1480 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51919, ups=3, wpb=15164.980, bsz=554.555, num_updates=28059, lr=0.000188783, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8359, train_wall=7541 | |
| epoch 004: 1490 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51917, ups=3, wpb=15164.533, bsz=554.586, num_updates=28069, lr=0.00018875, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8362, train_wall=7543 | |
| epoch 004: 1500 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51908, ups=3, wpb=15162.891, bsz=554.564, num_updates=28079, lr=0.000188716, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8365, train_wall=7546 | |
| epoch 004: 1510 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51912, ups=3, wpb=15164.248, bsz=554.525, num_updates=28089, lr=0.000188683, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8368, train_wall=7549 | |
| epoch 004: 1520 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51909, ups=3, wpb=15164.029, bsz=553.978, num_updates=28099, lr=0.000188649, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8371, train_wall=7552 | |
| epoch 004: 1530 / 8862 loss=4.172, nll_loss=2.527, ppl=5.77, wps=51904, ups=3, wpb=15162.863, bsz=553.902, num_updates=28109, lr=0.000188615, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8374, train_wall=7554 | |
| epoch 004: 1540 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51912, ups=3, wpb=15165.555, bsz=553.786, num_updates=28119, lr=0.000188582, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8377, train_wall=7557 | |
| epoch 004: 1550 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51907, ups=3, wpb=15164.453, bsz=553.609, num_updates=28129, lr=0.000188548, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8380, train_wall=7560 | |
| epoch 004: 1560 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51909, ups=3, wpb=15164.241, bsz=553.461, num_updates=28139, lr=0.000188515, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8382, train_wall=7562 | |
| epoch 004: 1570 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51901, ups=3, wpb=15164.232, bsz=553.349, num_updates=28149, lr=0.000188481, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8385, train_wall=7565 | |
| epoch 004: 1580 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51900, ups=3, wpb=15164.033, bsz=553.230, num_updates=28159, lr=0.000188448, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8388, train_wall=7568 | |
| epoch 004: 1590 / 8862 loss=4.173, nll_loss=2.528, ppl=5.77, wps=51899, ups=3, wpb=15162.828, bsz=552.593, num_updates=28169, lr=0.000188414, gnorm=0.493, clip=0.000, oom=0.000, loss_scale=8.000, wall=8391, train_wall=7570 | |
| epoch 004: 1600 / 8862 loss=4.172, nll_loss=2.527, ppl=5.77, wps=51896, ups=3, wpb=15163.683, bsz=553.329, num_updates=28179, lr=0.000188381, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8394, train_wall=7573 | |
| epoch 004: 1610 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51891, ups=3, wpb=15162.073, bsz=553.197, num_updates=28189, lr=0.000188348, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8397, train_wall=7576 | |
| epoch 004: 1620 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51887, ups=3, wpb=15161.448, bsz=553.634, num_updates=28199, lr=0.000188314, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8400, train_wall=7579 | |
| epoch 004: 1630 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=51888, ups=3, wpb=15162.028, bsz=553.359, num_updates=28209, lr=0.000188281, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8403, train_wall=7581 | |
| epoch 004: 1640 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51890, ups=3, wpb=15162.820, bsz=553.082, num_updates=28219, lr=0.000188247, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8406, train_wall=7584 | |
| epoch 004: 1650 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51883, ups=3, wpb=15162.227, bsz=552.979, num_updates=28229, lr=0.000188214, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8409, train_wall=7587 | |
| epoch 004: 1660 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51884, ups=3, wpb=15161.968, bsz=552.722, num_updates=28239, lr=0.000188181, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8412, train_wall=7590 | |
| epoch 004: 1670 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51893, ups=3, wpb=15164.653, bsz=552.306, num_updates=28249, lr=0.000188148, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8415, train_wall=7592 | |
| epoch 004: 1680 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51893, ups=3, wpb=15165.300, bsz=551.753, num_updates=28259, lr=0.000188114, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8418, train_wall=7595 | |
| epoch 004: 1690 / 8862 loss=4.172, nll_loss=2.526, ppl=5.76, wps=51894, ups=3, wpb=15167.152, bsz=552.222, num_updates=28269, lr=0.000188081, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8421, train_wall=7598 | |
| epoch 004: 1700 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51891, ups=3, wpb=15167.174, bsz=552.677, num_updates=28279, lr=0.000188048, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8424, train_wall=7600 | |
| epoch 004: 1710 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51885, ups=3, wpb=15166.246, bsz=552.781, num_updates=28289, lr=0.000188014, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8427, train_wall=7603 | |
| epoch 004: 1720 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51882, ups=3, wpb=15165.229, bsz=552.581, num_updates=28299, lr=0.000187981, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8429, train_wall=7606 | |
| epoch 004: 1730 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51882, ups=3, wpb=15164.676, bsz=552.143, num_updates=28309, lr=0.000187948, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8432, train_wall=7609 | |
| epoch 004: 1740 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51880, ups=3, wpb=15165.862, bsz=551.986, num_updates=28319, lr=0.000187915, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8435, train_wall=7611 | |
| epoch 004: 1750 / 8862 loss=4.171, nll_loss=2.525, ppl=5.76, wps=51875, ups=3, wpb=15165.874, bsz=552.279, num_updates=28329, lr=0.000187882, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8438, train_wall=7614 | |
| epoch 004: 1760 / 8862 loss=4.171, nll_loss=2.525, ppl=5.76, wps=51876, ups=3, wpb=15165.809, bsz=552.413, num_updates=28339, lr=0.000187849, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8441, train_wall=7617 | |
| epoch 004: 1770 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51871, ups=3, wpb=15165.338, bsz=552.714, num_updates=28349, lr=0.000187815, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8444, train_wall=7619 | |
| epoch 004: 1780 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51868, ups=3, wpb=15166.061, bsz=553.464, num_updates=28359, lr=0.000187782, gnorm=0.492, clip=0.000, oom=0.000, loss_scale=8.000, wall=8447, train_wall=7622 | |
| epoch 004: 1790 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51868, ups=3, wpb=15166.968, bsz=553.635, num_updates=28369, lr=0.000187749, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8450, train_wall=7625 | |
| epoch 004: 1800 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51869, ups=3, wpb=15167.225, bsz=553.275, num_updates=28379, lr=0.000187716, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8453, train_wall=7628 | |
| epoch 004: 1810 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51877, ups=3, wpb=15169.023, bsz=553.109, num_updates=28389, lr=0.000187683, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8456, train_wall=7630 | |
| epoch 004: 1820 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51868, ups=3, wpb=15168.749, bsz=553.063, num_updates=28399, lr=0.00018765, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8459, train_wall=7633 | |
| epoch 004: 1830 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51866, ups=3, wpb=15168.396, bsz=552.839, num_updates=28409, lr=0.000187617, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8462, train_wall=7636 | |
| epoch 004: 1840 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51867, ups=3, wpb=15168.778, bsz=552.643, num_updates=28419, lr=0.000187584, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8465, train_wall=7639 | |
| epoch 004: 1850 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51865, ups=3, wpb=15170.212, bsz=553.413, num_updates=28429, lr=0.000187551, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8468, train_wall=7641 | |
| epoch 004: 1860 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51858, ups=3, wpb=15169.539, bsz=554.072, num_updates=28439, lr=0.000187518, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8471, train_wall=7644 | |
| epoch 004: 1870 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51859, ups=3, wpb=15170.172, bsz=554.386, num_updates=28449, lr=0.000187485, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8474, train_wall=7647 | |
| epoch 004: 1880 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51854, ups=3, wpb=15169.620, bsz=554.526, num_updates=28459, lr=0.000187452, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8477, train_wall=7650 | |
| epoch 004: 1890 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51846, ups=3, wpb=15169.198, bsz=555.431, num_updates=28469, lr=0.000187419, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8480, train_wall=7652 | |
| epoch 004: 1900 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51845, ups=3, wpb=15169.846, bsz=555.089, num_updates=28479, lr=0.000187386, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8483, train_wall=7655 | |
| epoch 004: 1910 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51839, ups=3, wpb=15168.724, bsz=555.136, num_updates=28489, lr=0.000187353, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8486, train_wall=7658 | |
| epoch 004: 1920 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51835, ups=3, wpb=15167.231, bsz=555.082, num_updates=28499, lr=0.00018732, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8489, train_wall=7660 | |
| epoch 004: 1930 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51832, ups=3, wpb=15167.556, bsz=555.476, num_updates=28509, lr=0.000187288, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8491, train_wall=7663 | |
| epoch 004: 1940 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51827, ups=3, wpb=15167.143, bsz=555.495, num_updates=28519, lr=0.000187255, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8494, train_wall=7666 | |
| epoch 004: 1950 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51831, ups=3, wpb=15168.060, bsz=555.243, num_updates=28529, lr=0.000187222, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8497, train_wall=7669 | |
| epoch 004: 1960 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51830, ups=3, wpb=15168.511, bsz=555.215, num_updates=28539, lr=0.000187189, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8500, train_wall=7671 | |
| epoch 004: 1970 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51829, ups=3, wpb=15169.597, bsz=555.941, num_updates=28549, lr=0.000187156, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8503, train_wall=7674 | |
| epoch 004: 1980 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51829, ups=3, wpb=15170.236, bsz=555.836, num_updates=28559, lr=0.000187124, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=8.000, wall=8506, train_wall=7677 | |
| epoch 004: 1990 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51825, ups=3, wpb=15169.692, bsz=556.211, num_updates=28569, lr=0.000187091, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8509, train_wall=7680 | |
| epoch 004: 2000 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51824, ups=3, wpb=15169.898, bsz=556.206, num_updates=28579, lr=0.000187058, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8512, train_wall=7682 | |
| epoch 004: 2010 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51821, ups=3, wpb=15169.277, bsz=556.129, num_updates=28589, lr=0.000187025, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8515, train_wall=7685 | |
| epoch 004: 2020 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51814, ups=3, wpb=15168.029, bsz=555.954, num_updates=28599, lr=0.000186993, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8518, train_wall=7688 | |
| epoch 004: 2030 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51813, ups=3, wpb=15167.020, bsz=555.844, num_updates=28609, lr=0.00018696, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8521, train_wall=7691 | |
| epoch 004: 2040 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51814, ups=3, wpb=15166.472, bsz=555.528, num_updates=28619, lr=0.000186927, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8524, train_wall=7693 | |
| epoch 004: 2050 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51816, ups=3, wpb=15168.933, bsz=555.737, num_updates=28629, lr=0.000186895, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8527, train_wall=7696 | |
| epoch 004: 2060 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51807, ups=3, wpb=15169.495, bsz=555.897, num_updates=28639, lr=0.000186862, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8530, train_wall=7699 | |
| epoch 004: 2070 / 8862 loss=4.171, nll_loss=2.525, ppl=5.76, wps=51806, ups=3, wpb=15169.617, bsz=555.507, num_updates=28649, lr=0.000186829, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8533, train_wall=7702 | |
| epoch 004: 2080 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51800, ups=3, wpb=15167.431, bsz=555.556, num_updates=28659, lr=0.000186797, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8536, train_wall=7704 | |
| epoch 004: 2090 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51798, ups=3, wpb=15166.164, bsz=555.631, num_updates=28669, lr=0.000186764, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8539, train_wall=7707 | |
| epoch 004: 2100 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51800, ups=3, wpb=15166.826, bsz=556.021, num_updates=28679, lr=0.000186732, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8542, train_wall=7710 | |
| epoch 004: 2110 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51792, ups=3, wpb=15165.269, bsz=556.146, num_updates=28689, lr=0.000186699, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8545, train_wall=7712 | |
| epoch 004: 2120 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51789, ups=3, wpb=15164.728, bsz=556.341, num_updates=28699, lr=0.000186667, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8547, train_wall=7715 | |
| epoch 004: 2130 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51789, ups=3, wpb=15164.187, bsz=555.927, num_updates=28709, lr=0.000186634, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8550, train_wall=7718 | |
| epoch 004: 2140 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51793, ups=3, wpb=15165.898, bsz=555.553, num_updates=28719, lr=0.000186602, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8553, train_wall=7720 | |
| epoch 004: 2150 / 8862 loss=4.172, nll_loss=2.527, ppl=5.76, wps=51790, ups=3, wpb=15165.076, bsz=555.481, num_updates=28729, lr=0.000186569, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8556, train_wall=7723 | |
| epoch 004: 2160 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51792, ups=3, wpb=15165.535, bsz=555.732, num_updates=28739, lr=0.000186537, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8559, train_wall=7726 | |
| epoch 004: 2170 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51787, ups=3, wpb=15165.113, bsz=555.519, num_updates=28749, lr=0.000186504, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8562, train_wall=7729 | |
| epoch 004: 2180 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51790, ups=3, wpb=15165.898, bsz=555.290, num_updates=28759, lr=0.000186472, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8565, train_wall=7731 | |
| epoch 004: 2190 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51795, ups=3, wpb=15167.114, bsz=555.301, num_updates=28769, lr=0.000186439, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8568, train_wall=7734 | |
| epoch 004: 2200 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51795, ups=3, wpb=15168.730, bsz=555.642, num_updates=28779, lr=0.000186407, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8571, train_wall=7737 | |
| epoch 004: 2210 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51801, ups=3, wpb=15169.667, bsz=555.723, num_updates=28789, lr=0.000186375, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8574, train_wall=7740 | |
| epoch 004: 2220 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51795, ups=3, wpb=15168.965, bsz=555.703, num_updates=28799, lr=0.000186342, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8577, train_wall=7742 | |
| epoch 004: 2230 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51799, ups=3, wpb=15169.403, bsz=555.428, num_updates=28809, lr=0.00018631, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8580, train_wall=7745 | |
| epoch 004: 2240 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51795, ups=3, wpb=15169.270, bsz=555.438, num_updates=28819, lr=0.000186278, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8583, train_wall=7748 | |
| epoch 004: 2250 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51793, ups=3, wpb=15168.155, bsz=555.678, num_updates=28829, lr=0.000186245, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8586, train_wall=7750 | |
| epoch 004: 2260 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51794, ups=3, wpb=15168.232, bsz=555.425, num_updates=28839, lr=0.000186213, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8589, train_wall=7753 | |
| epoch 004: 2270 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51794, ups=3, wpb=15169.144, bsz=555.568, num_updates=28849, lr=0.000186181, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8592, train_wall=7756 | |
| epoch 004: 2280 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51793, ups=3, wpb=15169.967, bsz=555.788, num_updates=28859, lr=0.000186148, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8595, train_wall=7759 | |
| epoch 004: 2290 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51794, ups=3, wpb=15171.258, bsz=555.722, num_updates=28869, lr=0.000186116, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8597, train_wall=7761 | |
| epoch 004: 2300 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51789, ups=3, wpb=15170.390, bsz=555.887, num_updates=28879, lr=0.000186084, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8600, train_wall=7764 | |
| epoch 004: 2310 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51790, ups=3, wpb=15170.573, bsz=555.811, num_updates=28889, lr=0.000186052, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8603, train_wall=7767 | |
| epoch 004: 2320 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51790, ups=3, wpb=15171.476, bsz=556.291, num_updates=28899, lr=0.00018602, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8606, train_wall=7770 | |
| epoch 004: 2330 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51796, ups=3, wpb=15172.636, bsz=556.166, num_updates=28909, lr=0.000185987, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8609, train_wall=7772 | |
| epoch 004: 2340 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51795, ups=3, wpb=15173.231, bsz=556.456, num_updates=28919, lr=0.000185955, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8612, train_wall=7775 | |
| epoch 004: 2350 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51793, ups=3, wpb=15172.966, bsz=556.539, num_updates=28929, lr=0.000185923, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8615, train_wall=7778 | |
| epoch 004: 2360 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51796, ups=3, wpb=15173.799, bsz=556.459, num_updates=28939, lr=0.000185891, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8618, train_wall=7780 | |
| epoch 004: 2370 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51792, ups=3, wpb=15173.518, bsz=557.230, num_updates=28949, lr=0.000185859, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8621, train_wall=7783 | |
| epoch 004: 2380 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51788, ups=3, wpb=15173.961, bsz=557.349, num_updates=28959, lr=0.000185827, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8624, train_wall=7786 | |
| epoch 004: 2390 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51791, ups=3, wpb=15174.210, bsz=557.092, num_updates=28969, lr=0.000185795, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8627, train_wall=7789 | |
| epoch 004: 2400 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51790, ups=3, wpb=15173.734, bsz=556.995, num_updates=28979, lr=0.000185763, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8630, train_wall=7791 | |
| epoch 004: 2410 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51789, ups=3, wpb=15174.616, bsz=557.803, num_updates=28989, lr=0.000185731, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8633, train_wall=7794 | |
| epoch 004: 2420 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51795, ups=3, wpb=15175.539, bsz=557.413, num_updates=28999, lr=0.000185699, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8636, train_wall=7797 | |
| epoch 004: 2430 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51788, ups=3, wpb=15175.789, bsz=558.506, num_updates=29009, lr=0.000185667, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8639, train_wall=7800 | |
| epoch 004: 2440 / 8862 loss=4.169, nll_loss=2.523, ppl=5.75, wps=51785, ups=3, wpb=15175.525, bsz=559.056, num_updates=29019, lr=0.000185635, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8642, train_wall=7802 | |
| epoch 004: 2450 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51787, ups=3, wpb=15175.800, bsz=559.063, num_updates=29029, lr=0.000185603, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8645, train_wall=7805 | |
| epoch 004: 2460 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51783, ups=3, wpb=15175.863, bsz=559.272, num_updates=29039, lr=0.000185571, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8648, train_wall=7808 | |
| epoch 004: 2470 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51786, ups=3, wpb=15176.172, bsz=559.184, num_updates=29049, lr=0.000185539, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8651, train_wall=7810 | |
| epoch 004: 2480 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51781, ups=3, wpb=15175.429, bsz=558.949, num_updates=29059, lr=0.000185507, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8654, train_wall=7813 | |
| epoch 004: 2490 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51779, ups=3, wpb=15175.562, bsz=558.931, num_updates=29069, lr=0.000185475, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8656, train_wall=7816 | |
| epoch 004: 2500 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51778, ups=3, wpb=15174.408, bsz=558.724, num_updates=29079, lr=0.000185443, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8659, train_wall=7819 | |
| epoch 004: 2510 / 8862 loss=4.170, nll_loss=2.524, ppl=5.75, wps=51774, ups=3, wpb=15173.558, bsz=558.726, num_updates=29089, lr=0.000185411, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8662, train_wall=7821 | |
| epoch 004: 2520 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51772, ups=3, wpb=15173.678, bsz=558.670, num_updates=29099, lr=0.000185379, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8665, train_wall=7824 | |
| epoch 004: 2530 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51774, ups=3, wpb=15173.834, bsz=558.521, num_updates=29109, lr=0.000185347, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8668, train_wall=7827 | |
| epoch 004: 2540 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51774, ups=3, wpb=15174.877, bsz=558.470, num_updates=29119, lr=0.000185316, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8671, train_wall=7830 | |
| epoch 004: 2550 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51777, ups=3, wpb=15176.236, bsz=558.191, num_updates=29129, lr=0.000185284, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8674, train_wall=7832 | |
| epoch 004: 2560 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51780, ups=3, wpb=15176.886, bsz=557.938, num_updates=29139, lr=0.000185252, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8677, train_wall=7835 | |
| epoch 004: 2570 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51781, ups=3, wpb=15176.674, bsz=557.669, num_updates=29149, lr=0.00018522, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8680, train_wall=7838 | |
| epoch 004: 2580 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51780, ups=3, wpb=15176.904, bsz=557.638, num_updates=29159, lr=0.000185188, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8683, train_wall=7840 | |
| epoch 004: 2590 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51776, ups=3, wpb=15175.305, bsz=557.567, num_updates=29169, lr=0.000185157, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8686, train_wall=7843 | |
| epoch 004: 2600 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51769, ups=3, wpb=15173.148, bsz=557.696, num_updates=29179, lr=0.000185125, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8689, train_wall=7846 | |
| epoch 004: 2610 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51765, ups=3, wpb=15172.362, bsz=557.187, num_updates=29189, lr=0.000185093, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8692, train_wall=7849 | |
| epoch 004: 2620 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51766, ups=3, wpb=15172.518, bsz=557.436, num_updates=29199, lr=0.000185061, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8695, train_wall=7851 | |
| epoch 004: 2630 / 8862 loss=4.170, nll_loss=2.526, ppl=5.76, wps=51762, ups=3, wpb=15171.720, bsz=557.315, num_updates=29209, lr=0.00018503, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8698, train_wall=7854 | |
| epoch 004: 2640 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51761, ups=3, wpb=15171.473, bsz=557.274, num_updates=29219, lr=0.000184998, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8700, train_wall=7857 | |
| epoch 004: 2650 / 8862 loss=4.170, nll_loss=2.526, ppl=5.76, wps=51755, ups=3, wpb=15169.859, bsz=557.260, num_updates=29229, lr=0.000184966, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8703, train_wall=7859 | |
| epoch 004: 2660 / 8862 loss=4.170, nll_loss=2.526, ppl=5.76, wps=51756, ups=3, wpb=15170.156, bsz=557.108, num_updates=29239, lr=0.000184935, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8706, train_wall=7862 | |
| epoch 004: 2670 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51758, ups=3, wpb=15170.321, bsz=556.882, num_updates=29249, lr=0.000184903, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8709, train_wall=7865 | |
| epoch 004: 2680 / 8862 loss=4.170, nll_loss=2.526, ppl=5.76, wps=51761, ups=3, wpb=15171.627, bsz=556.721, num_updates=29259, lr=0.000184872, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8712, train_wall=7868 | |
| epoch 004: 2690 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51764, ups=3, wpb=15172.278, bsz=556.739, num_updates=29269, lr=0.00018484, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8715, train_wall=7870 | |
| epoch 004: 2700 / 8862 loss=4.170, nll_loss=2.526, ppl=5.76, wps=51764, ups=3, wpb=15172.685, bsz=556.757, num_updates=29279, lr=0.000184808, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8718, train_wall=7873 | |
| epoch 004: 2710 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51762, ups=3, wpb=15171.598, bsz=556.633, num_updates=29289, lr=0.000184777, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8721, train_wall=7876 | |
| epoch 004: 2720 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51753, ups=3, wpb=15170.379, bsz=556.772, num_updates=29299, lr=0.000184745, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8724, train_wall=7878 | |
| epoch 004: 2730 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51754, ups=3, wpb=15170.776, bsz=556.716, num_updates=29309, lr=0.000184714, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8727, train_wall=7881 | |
| epoch 004: 2740 / 8862 loss=4.170, nll_loss=2.526, ppl=5.76, wps=51756, ups=3, wpb=15171.371, bsz=556.807, num_updates=29319, lr=0.000184682, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8730, train_wall=7884 | |
| epoch 004: 2750 / 8862 loss=4.171, nll_loss=2.526, ppl=5.76, wps=51758, ups=3, wpb=15171.871, bsz=556.720, num_updates=29329, lr=0.000184651, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8733, train_wall=7887 | |
| epoch 004: 2760 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51756, ups=3, wpb=15171.541, bsz=556.853, num_updates=29339, lr=0.000184619, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8736, train_wall=7889 | |
| epoch 004: 2770 / 8862 loss=4.170, nll_loss=2.526, ppl=5.76, wps=51753, ups=3, wpb=15170.848, bsz=556.824, num_updates=29349, lr=0.000184588, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8739, train_wall=7892 | |
| epoch 004: 2780 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51759, ups=3, wpb=15172.251, bsz=556.772, num_updates=29359, lr=0.000184557, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8742, train_wall=7895 | |
| epoch 004: 2790 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51753, ups=3, wpb=15170.711, bsz=556.870, num_updates=29369, lr=0.000184525, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8745, train_wall=7897 | |
| epoch 004: 2800 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51753, ups=3, wpb=15171.623, bsz=557.258, num_updates=29379, lr=0.000184494, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8748, train_wall=7900 | |
| epoch 004: 2810 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51750, ups=3, wpb=15171.064, bsz=557.228, num_updates=29389, lr=0.000184462, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8750, train_wall=7903 | |
| epoch 004: 2820 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51746, ups=3, wpb=15170.357, bsz=557.221, num_updates=29399, lr=0.000184431, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8753, train_wall=7906 | |
| epoch 004: 2830 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51750, ups=3, wpb=15171.040, bsz=557.064, num_updates=29409, lr=0.0001844, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8756, train_wall=7908 | |
| epoch 004: 2840 / 8862 loss=4.170, nll_loss=2.525, ppl=5.76, wps=51748, ups=3, wpb=15171.195, bsz=557.167, num_updates=29419, lr=0.000184368, gnorm=0.491, clip=0.000, oom=0.000, loss_scale=16.000, wall=8759, train_wall=7911 | |
| epoch 004: 2850 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51751, ups=3, wpb=15172.199, bsz=557.287, num_updates=29429, lr=0.000184337, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8762, train_wall=7914 | |
| epoch 004: 2860 / 8862 loss=4.169, nll_loss=2.525, ppl=5.75, wps=51746, ups=3, wpb=15173.029, bsz=557.805, num_updates=29439, lr=0.000184306, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8765, train_wall=7917 | |
| epoch 004: 2870 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51747, ups=3, wpb=15173.773, bsz=557.860, num_updates=29449, lr=0.000184274, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8768, train_wall=7919 | |
| epoch 004: 2880 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51745, ups=3, wpb=15173.189, bsz=557.876, num_updates=29459, lr=0.000184243, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8771, train_wall=7922 | |
| epoch 004: 2890 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51749, ups=3, wpb=15174.632, bsz=558.019, num_updates=29469, lr=0.000184212, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8774, train_wall=7925 | |
| epoch 004: 2900 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51749, ups=3, wpb=15174.181, bsz=557.775, num_updates=29479, lr=0.00018418, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8777, train_wall=7928 | |
| epoch 004: 2910 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51744, ups=3, wpb=15172.407, bsz=557.961, num_updates=29489, lr=0.000184149, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8780, train_wall=7930 | |
| epoch 004: 2920 / 8862 loss=4.169, nll_loss=2.525, ppl=5.75, wps=51740, ups=3, wpb=15171.473, bsz=557.913, num_updates=29499, lr=0.000184118, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8783, train_wall=7933 | |
| epoch 004: 2930 / 8862 loss=4.170, nll_loss=2.525, ppl=5.75, wps=51740, ups=3, wpb=15171.339, bsz=557.655, num_updates=29509, lr=0.000184087, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8786, train_wall=7936 | |
| epoch 004: 2940 / 8862 loss=4.169, nll_loss=2.525, ppl=5.75, wps=51738, ups=3, wpb=15170.705, bsz=557.750, num_updates=29519, lr=0.000184056, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8789, train_wall=7938 | |
| epoch 004: 2950 / 8862 loss=4.169, nll_loss=2.525, ppl=5.75, wps=51736, ups=3, wpb=15171.024, bsz=557.831, num_updates=29529, lr=0.000184024, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8792, train_wall=7941 | |
| epoch 004: 2960 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51737, ups=3, wpb=15171.779, bsz=558.355, num_updates=29539, lr=0.000183993, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8795, train_wall=7944 | |
| epoch 004: 2970 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51740, ups=3, wpb=15172.925, bsz=558.481, num_updates=29549, lr=0.000183962, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8798, train_wall=7947 | |
| epoch 004: 2980 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51736, ups=3, wpb=15173.382, bsz=558.484, num_updates=29559, lr=0.000183931, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8801, train_wall=7949 | |
| epoch 004: 2990 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51735, ups=3, wpb=15172.927, bsz=558.312, num_updates=29569, lr=0.0001839, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8804, train_wall=7952 | |
| epoch 004: 3000 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51738, ups=3, wpb=15173.674, bsz=558.315, num_updates=29579, lr=0.000183869, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8807, train_wall=7955 | |
| epoch 004: 3010 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51735, ups=3, wpb=15172.614, bsz=557.999, num_updates=29589, lr=0.000183838, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8809, train_wall=7958 | |
| epoch 004: 3020 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51735, ups=3, wpb=15173.211, bsz=558.083, num_updates=29599, lr=0.000183807, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8812, train_wall=7960 | |
| epoch 004: 3030 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51732, ups=3, wpb=15173.044, bsz=558.348, num_updates=29609, lr=0.000183776, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8815, train_wall=7963 | |
| epoch 004: 3040 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51734, ups=3, wpb=15172.899, bsz=558.001, num_updates=29619, lr=0.000183745, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8818, train_wall=7966 | |
| epoch 004: 3050 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51730, ups=3, wpb=15172.118, bsz=558.039, num_updates=29629, lr=0.000183714, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8821, train_wall=7968 | |
| epoch 004: 3060 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51732, ups=3, wpb=15172.751, bsz=558.037, num_updates=29639, lr=0.000183683, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8824, train_wall=7971 | |
| epoch 004: 3070 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51728, ups=3, wpb=15171.389, bsz=557.741, num_updates=29649, lr=0.000183652, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8827, train_wall=7974 | |
| epoch 004: 3080 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51731, ups=3, wpb=15171.929, bsz=557.702, num_updates=29659, lr=0.000183621, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8830, train_wall=7977 | |
| epoch 004: 3090 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51727, ups=3, wpb=15171.836, bsz=557.904, num_updates=29669, lr=0.00018359, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8833, train_wall=7979 | |
| epoch 004: 3100 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51727, ups=3, wpb=15171.997, bsz=557.787, num_updates=29679, lr=0.000183559, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8836, train_wall=7982 | |
| epoch 004: 3110 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51730, ups=3, wpb=15172.782, bsz=557.678, num_updates=29689, lr=0.000183528, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8839, train_wall=7985 | |
| epoch 004: 3120 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51733, ups=3, wpb=15174.101, bsz=558.001, num_updates=29699, lr=0.000183497, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8842, train_wall=7988 | |
| epoch 004: 3130 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51731, ups=3, wpb=15174.080, bsz=558.224, num_updates=29709, lr=0.000183466, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8845, train_wall=7990 | |
| epoch 004: 3140 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51729, ups=3, wpb=15172.885, bsz=558.220, num_updates=29719, lr=0.000183435, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8848, train_wall=7993 | |
| epoch 004: 3150 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51728, ups=3, wpb=15173.883, bsz=557.966, num_updates=29729, lr=0.000183404, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8851, train_wall=7996 | |
| epoch 004: 3160 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51730, ups=3, wpb=15173.996, bsz=557.983, num_updates=29739, lr=0.000183374, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8854, train_wall=7998 | |
| epoch 004: 3170 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51728, ups=3, wpb=15173.848, bsz=557.823, num_updates=29749, lr=0.000183343, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8857, train_wall=8001 | |
| epoch 004: 3180 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51729, ups=3, wpb=15173.310, bsz=557.505, num_updates=29759, lr=0.000183312, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8859, train_wall=8004 | |
| epoch 004: 3190 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51727, ups=3, wpb=15173.558, bsz=557.834, num_updates=29769, lr=0.000183281, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8862, train_wall=8007 | |
| epoch 004: 3200 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51729, ups=3, wpb=15173.858, bsz=557.636, num_updates=29779, lr=0.00018325, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8865, train_wall=8009 | |
| epoch 004: 3210 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51723, ups=3, wpb=15172.695, bsz=557.636, num_updates=29789, lr=0.00018322, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8868, train_wall=8012 | |
| epoch 004: 3220 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51723, ups=3, wpb=15172.553, bsz=557.645, num_updates=29799, lr=0.000183189, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8871, train_wall=8015 | |
| epoch 004: 3230 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51723, ups=3, wpb=15172.491, bsz=557.630, num_updates=29809, lr=0.000183158, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8874, train_wall=8017 | |
| epoch 004: 3240 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51723, ups=3, wpb=15172.197, bsz=557.754, num_updates=29819, lr=0.000183127, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8877, train_wall=8020 | |
| epoch 004: 3250 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51723, ups=3, wpb=15172.809, bsz=557.702, num_updates=29829, lr=0.000183097, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8880, train_wall=8023 | |
| epoch 004: 3260 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51723, ups=3, wpb=15173.007, bsz=557.647, num_updates=29839, lr=0.000183066, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8883, train_wall=8026 | |
| epoch 004: 3270 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51722, ups=3, wpb=15172.651, bsz=557.865, num_updates=29849, lr=0.000183035, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8886, train_wall=8028 | |
| epoch 004: 3280 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51722, ups=3, wpb=15173.162, bsz=557.728, num_updates=29859, lr=0.000183005, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8889, train_wall=8031 | |
| epoch 004: 3290 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51724, ups=3, wpb=15173.785, bsz=557.613, num_updates=29869, lr=0.000182974, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8892, train_wall=8034 | |
| epoch 004: 3300 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51718, ups=3, wpb=15173.877, bsz=557.964, num_updates=29879, lr=0.000182943, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8895, train_wall=8037 | |
| epoch 004: 3310 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51718, ups=3, wpb=15173.693, bsz=557.799, num_updates=29889, lr=0.000182913, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8898, train_wall=8039 | |
| epoch 004: 3320 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51718, ups=3, wpb=15173.714, bsz=557.654, num_updates=29899, lr=0.000182882, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8901, train_wall=8042 | |
| epoch 004: 3330 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51717, ups=3, wpb=15173.983, bsz=557.915, num_updates=29909, lr=0.000182852, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8904, train_wall=8045 | |
| epoch 004: 3340 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51715, ups=3, wpb=15173.856, bsz=558.218, num_updates=29919, lr=0.000182821, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=16.000, wall=8907, train_wall=8048 | |
| epoch 004: 3350 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51718, ups=3, wpb=15174.549, bsz=558.140, num_updates=29929, lr=0.000182791, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8910, train_wall=8050 | |
| epoch 004: 3360 / 8862 loss=4.169, nll_loss=2.524, ppl=5.75, wps=51715, ups=3, wpb=15174.906, bsz=558.358, num_updates=29939, lr=0.00018276, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8913, train_wall=8053 | |
| epoch 004: 3370 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51718, ups=3, wpb=15175.317, bsz=558.355, num_updates=29949, lr=0.00018273, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8916, train_wall=8056 | |
| epoch 004: 3380 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51717, ups=3, wpb=15175.282, bsz=558.254, num_updates=29959, lr=0.000182699, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8919, train_wall=8058 | |
| epoch 004: 3390 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51714, ups=3, wpb=15175.331, bsz=558.363, num_updates=29969, lr=0.000182669, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8921, train_wall=8061 | |
| epoch 004: 3400 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51713, ups=3, wpb=15174.741, bsz=558.353, num_updates=29979, lr=0.000182638, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8924, train_wall=8064 | |
| epoch 004: 3410 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51713, ups=3, wpb=15174.597, bsz=558.497, num_updates=29989, lr=0.000182608, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8927, train_wall=8067 | |
| epoch 004: 3420 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51716, ups=3, wpb=15175.980, bsz=558.574, num_updates=29999, lr=0.000182577, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8930, train_wall=8069 | |
| epoch 004: 3430 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51715, ups=3, wpb=15175.534, bsz=558.433, num_updates=30009, lr=0.000182547, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8933, train_wall=8072 | |
| epoch 004: 3440 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51716, ups=3, wpb=15176.018, bsz=558.326, num_updates=30019, lr=0.000182516, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8936, train_wall=8075 | |
| epoch 004: 3450 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51713, ups=3, wpb=15175.487, bsz=558.261, num_updates=30029, lr=0.000182486, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8939, train_wall=8078 | |
| epoch 004: 3460 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51715, ups=3, wpb=15176.077, bsz=558.539, num_updates=30039, lr=0.000182456, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8942, train_wall=8080 | |
| epoch 004: 3470 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51714, ups=3, wpb=15175.988, bsz=558.481, num_updates=30049, lr=0.000182425, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=16.000, wall=8945, train_wall=8083 | |
| WARNING: overflow detected, setting loss scale to: 8.0 | |
| epoch 004: 3480 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51698, ups=3, wpb=15175.668, bsz=558.761, num_updates=30058, lr=0.000182398, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8948, train_wall=8086 | |
| epoch 004: 3490 / 8862 loss=4.167, nll_loss=2.522, ppl=5.75, wps=51698, ups=3, wpb=15176.089, bsz=558.622, num_updates=30068, lr=0.000182368, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8951, train_wall=8088 | |
| epoch 004: 3500 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51701, ups=3, wpb=15176.874, bsz=558.304, num_updates=30078, lr=0.000182337, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8954, train_wall=8091 | |
| epoch 004: 3510 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51698, ups=3, wpb=15175.856, bsz=558.373, num_updates=30088, lr=0.000182307, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8957, train_wall=8094 | |
| epoch 004: 3520 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51694, ups=3, wpb=15174.478, bsz=558.423, num_updates=30098, lr=0.000182277, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8960, train_wall=8097 | |
| epoch 004: 3530 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51690, ups=3, wpb=15174.782, bsz=558.380, num_updates=30108, lr=0.000182246, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8963, train_wall=8099 | |
| epoch 004: 3540 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51690, ups=3, wpb=15174.886, bsz=558.244, num_updates=30118, lr=0.000182216, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8966, train_wall=8102 | |
| epoch 004: 3550 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51690, ups=3, wpb=15174.314, bsz=558.001, num_updates=30128, lr=0.000182186, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8969, train_wall=8105 | |
| epoch 004: 3560 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51691, ups=3, wpb=15174.762, bsz=558.344, num_updates=30138, lr=0.000182156, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8971, train_wall=8108 | |
| epoch 004: 3570 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51692, ups=3, wpb=15174.270, bsz=558.086, num_updates=30148, lr=0.000182125, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8974, train_wall=8110 | |
| epoch 004: 3580 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51694, ups=3, wpb=15174.549, bsz=558.004, num_updates=30158, lr=0.000182095, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8977, train_wall=8113 | |
| epoch 004: 3590 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51693, ups=3, wpb=15173.761, bsz=558.066, num_updates=30168, lr=0.000182065, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8980, train_wall=8116 | |
| epoch 004: 3600 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51697, ups=3, wpb=15174.383, bsz=558.098, num_updates=30178, lr=0.000182035, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=8983, train_wall=8118 | |
| epoch 004: 3610 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51699, ups=3, wpb=15174.631, bsz=558.123, num_updates=30188, lr=0.000182005, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8986, train_wall=8121 | |
| epoch 004: 3620 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51702, ups=3, wpb=15174.628, bsz=558.022, num_updates=30198, lr=0.000181975, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8989, train_wall=8124 | |
| epoch 004: 3630 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51704, ups=3, wpb=15175.130, bsz=558.083, num_updates=30208, lr=0.000181945, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8992, train_wall=8126 | |
| epoch 004: 3640 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=51706, ups=3, wpb=15175.322, bsz=557.958, num_updates=30218, lr=0.000181914, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8995, train_wall=8129 | |
| epoch 004: 3650 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=51710, ups=3, wpb=15175.858, bsz=558.038, num_updates=30228, lr=0.000181884, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=8998, train_wall=8132 | |
| epoch 004: 3660 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51707, ups=3, wpb=15174.928, bsz=558.081, num_updates=30238, lr=0.000181854, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9001, train_wall=8134 | |
| epoch 004: 3670 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51707, ups=3, wpb=15174.329, bsz=558.060, num_updates=30248, lr=0.000181824, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9003, train_wall=8137 | |
| epoch 004: 3680 / 8862 loss=4.167, nll_loss=2.521, ppl=5.74, wps=51706, ups=3, wpb=15173.710, bsz=558.207, num_updates=30258, lr=0.000181794, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9006, train_wall=8140 | |
| epoch 004: 3690 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51710, ups=3, wpb=15173.960, bsz=557.969, num_updates=30268, lr=0.000181764, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9009, train_wall=8142 | |
| epoch 004: 3700 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51713, ups=3, wpb=15174.105, bsz=558.004, num_updates=30278, lr=0.000181734, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9012, train_wall=8145 | |
| epoch 004: 3710 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51715, ups=3, wpb=15174.045, bsz=557.926, num_updates=30288, lr=0.000181704, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9015, train_wall=8148 | |
| epoch 004: 3720 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51718, ups=3, wpb=15174.780, bsz=557.867, num_updates=30298, lr=0.000181674, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9018, train_wall=8150 | |
| epoch 004: 3730 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51718, ups=3, wpb=15174.097, bsz=557.890, num_updates=30308, lr=0.000181644, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9021, train_wall=8153 | |
| epoch 004: 3740 / 8862 loss=4.167, nll_loss=2.522, ppl=5.75, wps=51718, ups=3, wpb=15173.243, bsz=557.899, num_updates=30318, lr=0.000181614, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9024, train_wall=8156 | |
| epoch 004: 3750 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51719, ups=3, wpb=15173.030, bsz=557.722, num_updates=30328, lr=0.000181584, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9027, train_wall=8158 | |
| epoch 004: 3760 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51721, ups=3, wpb=15173.199, bsz=557.677, num_updates=30338, lr=0.000181554, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9029, train_wall=8161 | |
| epoch 004: 3770 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51723, ups=3, wpb=15173.331, bsz=557.668, num_updates=30348, lr=0.000181524, gnorm=0.490, clip=0.000, oom=0.000, loss_scale=8.000, wall=9032, train_wall=8164 | |
| epoch 004: 3780 / 8862 loss=4.168, nll_loss=2.523, ppl=5.75, wps=51727, ups=3, wpb=15173.527, bsz=557.553, num_updates=30358, lr=0.000181494, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9035, train_wall=8166 | |
| epoch 004: 3790 / 8862 loss=4.167, nll_loss=2.522, ppl=5.75, wps=51728, ups=3, wpb=15173.122, bsz=557.674, num_updates=30368, lr=0.000181465, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9038, train_wall=8169 | |
| epoch 004: 3800 / 8862 loss=4.167, nll_loss=2.522, ppl=5.75, wps=51731, ups=3, wpb=15173.926, bsz=557.928, num_updates=30378, lr=0.000181435, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9041, train_wall=8172 | |
| epoch 004: 3810 / 8862 loss=4.167, nll_loss=2.522, ppl=5.75, wps=51731, ups=3, wpb=15173.505, bsz=557.980, num_updates=30388, lr=0.000181405, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9044, train_wall=8175 | |
| epoch 004: 3820 / 8862 loss=4.167, nll_loss=2.522, ppl=5.75, wps=51732, ups=3, wpb=15173.081, bsz=558.034, num_updates=30398, lr=0.000181375, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9047, train_wall=8177 | |
| epoch 004: 3830 / 8862 loss=4.167, nll_loss=2.522, ppl=5.75, wps=51734, ups=3, wpb=15173.173, bsz=558.066, num_updates=30408, lr=0.000181345, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9050, train_wall=8180 | |
| epoch 004: 3840 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51737, ups=3, wpb=15173.882, bsz=558.148, num_updates=30418, lr=0.000181315, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9053, train_wall=8183 | |
| epoch 004: 3850 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51737, ups=3, wpb=15173.536, bsz=558.207, num_updates=30428, lr=0.000181286, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9056, train_wall=8185 | |
| epoch 004: 3860 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51739, ups=3, wpb=15173.716, bsz=558.238, num_updates=30438, lr=0.000181256, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9058, train_wall=8188 | |
| epoch 004: 3870 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51741, ups=3, wpb=15173.633, bsz=558.098, num_updates=30448, lr=0.000181226, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9061, train_wall=8191 | |
| epoch 004: 3880 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51745, ups=3, wpb=15173.988, bsz=557.882, num_updates=30458, lr=0.000181196, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9064, train_wall=8193 | |
| epoch 004: 3890 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51745, ups=3, wpb=15173.353, bsz=557.746, num_updates=30468, lr=0.000181167, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9067, train_wall=8196 | |
| epoch 004: 3900 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51749, ups=3, wpb=15174.009, bsz=557.696, num_updates=30478, lr=0.000181137, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9070, train_wall=8199 | |
| epoch 004: 3910 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51751, ups=3, wpb=15174.321, bsz=557.876, num_updates=30488, lr=0.000181107, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9073, train_wall=8201 | |
| epoch 004: 3920 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51752, ups=3, wpb=15174.381, bsz=557.973, num_updates=30498, lr=0.000181077, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9076, train_wall=8204 | |
| epoch 004: 3930 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51754, ups=3, wpb=15174.293, bsz=557.885, num_updates=30508, lr=0.000181048, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9079, train_wall=8207 | |
| epoch 004: 3940 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51755, ups=3, wpb=15173.774, bsz=557.734, num_updates=30518, lr=0.000181018, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9082, train_wall=8209 | |
| epoch 004: 3950 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51758, ups=3, wpb=15174.039, bsz=557.819, num_updates=30528, lr=0.000180988, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9084, train_wall=8212 | |
| epoch 004: 3960 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51762, ups=3, wpb=15174.854, bsz=557.667, num_updates=30538, lr=0.000180959, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9087, train_wall=8215 | |
| epoch 004: 3970 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51762, ups=3, wpb=15174.511, bsz=557.767, num_updates=30548, lr=0.000180929, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9090, train_wall=8217 | |
| epoch 004: 3980 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51764, ups=3, wpb=15174.590, bsz=557.960, num_updates=30558, lr=0.0001809, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9093, train_wall=8220 | |
| epoch 004: 3990 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51767, ups=3, wpb=15174.928, bsz=558.089, num_updates=30568, lr=0.00018087, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9096, train_wall=8223 | |
| epoch 004: 4000 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51767, ups=3, wpb=15174.781, bsz=558.112, num_updates=30578, lr=0.00018084, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9099, train_wall=8225 | |
| epoch 004: 4010 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51771, ups=3, wpb=15175.585, bsz=558.091, num_updates=30588, lr=0.000180811, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9102, train_wall=8228 | |
| epoch 004: 4020 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51773, ups=3, wpb=15175.241, bsz=558.000, num_updates=30598, lr=0.000180781, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9105, train_wall=8231 | |
| epoch 004: 4030 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51776, ups=3, wpb=15175.720, bsz=557.797, num_updates=30608, lr=0.000180752, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9108, train_wall=8233 | |
| epoch 004: 4040 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51778, ups=3, wpb=15175.721, bsz=557.596, num_updates=30618, lr=0.000180722, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9111, train_wall=8236 | |
| epoch 004: 4050 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51779, ups=3, wpb=15175.118, bsz=557.602, num_updates=30628, lr=0.000180693, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9113, train_wall=8239 | |
| epoch 004: 4060 / 8862 loss=4.167, nll_loss=2.522, ppl=5.74, wps=51780, ups=3, wpb=15175.348, bsz=557.730, num_updates=30638, lr=0.000180663, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9116, train_wall=8241 | |
| epoch 004: 4070 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51782, ups=3, wpb=15175.531, bsz=557.885, num_updates=30648, lr=0.000180634, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9119, train_wall=8244 | |
| epoch 004: 4080 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51786, ups=3, wpb=15176.294, bsz=558.131, num_updates=30658, lr=0.000180604, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9122, train_wall=8247 | |
| epoch 004: 4090 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51790, ups=3, wpb=15177.115, bsz=558.140, num_updates=30668, lr=0.000180575, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9125, train_wall=8249 | |
| epoch 004: 4100 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51790, ups=3, wpb=15176.350, bsz=558.273, num_updates=30678, lr=0.000180545, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9128, train_wall=8252 | |
| epoch 004: 4110 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51789, ups=3, wpb=15176.363, bsz=558.665, num_updates=30688, lr=0.000180516, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9131, train_wall=8255 | |
| epoch 004: 4120 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51789, ups=3, wpb=15175.542, bsz=558.635, num_updates=30698, lr=0.000180487, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9134, train_wall=8258 | |
| epoch 004: 4130 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51792, ups=3, wpb=15176.014, bsz=558.487, num_updates=30708, lr=0.000180457, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9137, train_wall=8260 | |
| epoch 004: 4140 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51794, ups=3, wpb=15176.018, bsz=558.439, num_updates=30718, lr=0.000180428, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9139, train_wall=8263 | |
| epoch 004: 4150 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51795, ups=3, wpb=15175.544, bsz=558.371, num_updates=30728, lr=0.000180398, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9142, train_wall=8266 | |
| epoch 004: 4160 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51793, ups=3, wpb=15174.852, bsz=558.502, num_updates=30738, lr=0.000180369, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9145, train_wall=8268 | |
| epoch 004: 4170 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51795, ups=3, wpb=15175.231, bsz=558.435, num_updates=30748, lr=0.00018034, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9148, train_wall=8271 | |
| epoch 004: 4180 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51800, ups=3, wpb=15176.134, bsz=558.467, num_updates=30758, lr=0.00018031, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9151, train_wall=8274 | |
| epoch 004: 4190 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51801, ups=3, wpb=15175.940, bsz=558.268, num_updates=30768, lr=0.000180281, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9154, train_wall=8276 | |
| epoch 004: 4200 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51804, ups=3, wpb=15176.113, bsz=558.215, num_updates=30778, lr=0.000180252, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9157, train_wall=8279 | |
| epoch 004: 4210 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51804, ups=3, wpb=15175.913, bsz=558.284, num_updates=30788, lr=0.000180223, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9160, train_wall=8282 | |
| epoch 004: 4220 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51805, ups=3, wpb=15175.678, bsz=558.150, num_updates=30798, lr=0.000180193, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9163, train_wall=8284 | |
| epoch 004: 4230 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51808, ups=3, wpb=15176.320, bsz=558.249, num_updates=30808, lr=0.000180164, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9166, train_wall=8287 | |
| epoch 004: 4240 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51809, ups=3, wpb=15176.620, bsz=558.545, num_updates=30818, lr=0.000180135, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9168, train_wall=8290 | |
| epoch 004: 4250 / 8862 loss=4.166, nll_loss=2.520, ppl=5.74, wps=51808, ups=3, wpb=15175.986, bsz=558.464, num_updates=30828, lr=0.000180106, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9171, train_wall=8292 | |
| epoch 004: 4260 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51809, ups=3, wpb=15175.723, bsz=558.563, num_updates=30838, lr=0.000180076, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9174, train_wall=8295 | |
| epoch 004: 4270 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51809, ups=3, wpb=15175.270, bsz=558.591, num_updates=30848, lr=0.000180047, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9177, train_wall=8298 | |
| epoch 004: 4280 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51810, ups=3, wpb=15174.878, bsz=558.372, num_updates=30858, lr=0.000180018, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9180, train_wall=8300 | |
| epoch 004: 4290 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51813, ups=3, wpb=15175.525, bsz=558.426, num_updates=30868, lr=0.000179989, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9183, train_wall=8303 | |
| epoch 004: 4300 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51815, ups=3, wpb=15175.544, bsz=558.266, num_updates=30878, lr=0.00017996, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9186, train_wall=8306 | |
| epoch 004: 4310 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51817, ups=3, wpb=15175.478, bsz=558.120, num_updates=30888, lr=0.000179931, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9189, train_wall=8308 | |
| epoch 004: 4320 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51818, ups=3, wpb=15175.186, bsz=558.183, num_updates=30898, lr=0.000179902, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9192, train_wall=8311 | |
| epoch 004: 4330 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51819, ups=3, wpb=15175.268, bsz=558.175, num_updates=30908, lr=0.000179872, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9194, train_wall=8314 | |
| epoch 004: 4340 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51821, ups=3, wpb=15175.524, bsz=558.291, num_updates=30918, lr=0.000179843, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9197, train_wall=8316 | |
| epoch 004: 4350 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51824, ups=3, wpb=15175.716, bsz=558.177, num_updates=30928, lr=0.000179814, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9200, train_wall=8319 | |
| epoch 004: 4360 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51825, ups=3, wpb=15175.591, bsz=558.237, num_updates=30938, lr=0.000179785, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9203, train_wall=8322 | |
| epoch 004: 4370 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51826, ups=3, wpb=15175.594, bsz=558.146, num_updates=30948, lr=0.000179756, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9206, train_wall=8324 | |
| epoch 004: 4380 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51832, ups=3, wpb=15176.480, bsz=558.016, num_updates=30958, lr=0.000179727, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9209, train_wall=8327 | |
| epoch 004: 4390 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51835, ups=3, wpb=15177.104, bsz=558.052, num_updates=30968, lr=0.000179698, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9212, train_wall=8330 | |
| epoch 004: 4400 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51837, ups=3, wpb=15177.291, bsz=558.007, num_updates=30978, lr=0.000179669, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9215, train_wall=8332 | |
| epoch 004: 4410 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51838, ups=3, wpb=15177.002, bsz=557.925, num_updates=30988, lr=0.00017964, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9218, train_wall=8335 | |
| epoch 004: 4420 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51839, ups=3, wpb=15176.717, bsz=557.803, num_updates=30998, lr=0.000179611, gnorm=0.489, clip=0.000, oom=0.000, loss_scale=8.000, wall=9220, train_wall=8338 | |
| epoch 004: 4430 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51842, ups=3, wpb=15176.779, bsz=557.797, num_updates=31008, lr=0.000179582, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9223, train_wall=8340 | |
| epoch 004: 4440 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51844, ups=3, wpb=15176.966, bsz=557.868, num_updates=31018, lr=0.000179553, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9226, train_wall=8343 | |
| epoch 004: 4450 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51847, ups=3, wpb=15177.787, bsz=557.875, num_updates=31028, lr=0.000179524, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9229, train_wall=8346 | |
| epoch 004: 4460 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51849, ups=3, wpb=15178.055, bsz=557.943, num_updates=31038, lr=0.000179495, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9232, train_wall=8348 | |
| epoch 004: 4470 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51849, ups=3, wpb=15177.873, bsz=558.167, num_updates=31048, lr=0.000179466, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9235, train_wall=8351 | |
| epoch 004: 4480 / 8862 loss=4.166, nll_loss=2.521, ppl=5.74, wps=51852, ups=3, wpb=15178.589, bsz=558.191, num_updates=31058, lr=0.000179438, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9238, train_wall=8354 | |
| epoch 004: 4490 / 8862 loss=4.165, nll_loss=2.521, ppl=5.74, wps=51853, ups=3, wpb=15178.365, bsz=558.190, num_updates=31068, lr=0.000179409, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9241, train_wall=8356 | |
| epoch 004: 4500 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51854, ups=3, wpb=15178.509, bsz=558.318, num_updates=31078, lr=0.00017938, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9244, train_wall=8359 | |
| epoch 004: 4510 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51856, ups=3, wpb=15178.371, bsz=558.380, num_updates=31088, lr=0.000179351, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9247, train_wall=8362 | |
| epoch 004: 4520 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51859, ups=3, wpb=15179.181, bsz=558.634, num_updates=31098, lr=0.000179322, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=8.000, wall=9249, train_wall=8364 | |
| WARNING: overflow detected, setting loss scale to: 4.0 | |
| epoch 004: 4530 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51849, ups=3, wpb=15178.673, bsz=558.742, num_updates=31107, lr=0.000179296, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9252, train_wall=8367 | |
| epoch 004: 4540 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51851, ups=3, wpb=15179.314, bsz=558.847, num_updates=31117, lr=0.000179267, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9255, train_wall=8370 | |
| epoch 004: 4550 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51853, ups=3, wpb=15179.194, bsz=558.707, num_updates=31127, lr=0.000179239, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9258, train_wall=8373 | |
| epoch 004: 4560 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51852, ups=3, wpb=15178.851, bsz=558.805, num_updates=31137, lr=0.00017921, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9261, train_wall=8375 | |
| epoch 004: 4570 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51853, ups=3, wpb=15178.634, bsz=558.647, num_updates=31147, lr=0.000179181, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9264, train_wall=8378 | |
| epoch 004: 4580 / 8862 loss=4.165, nll_loss=2.519, ppl=5.73, wps=51854, ups=3, wpb=15178.484, bsz=558.595, num_updates=31157, lr=0.000179152, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9267, train_wall=8381 | |
| epoch 004: 4590 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51856, ups=3, wpb=15178.672, bsz=558.448, num_updates=31167, lr=0.000179123, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9270, train_wall=8383 | |
| epoch 004: 4600 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51859, ups=3, wpb=15179.018, bsz=558.381, num_updates=31177, lr=0.000179095, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9273, train_wall=8386 | |
| epoch 004: 4610 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51858, ups=3, wpb=15178.343, bsz=558.528, num_updates=31187, lr=0.000179066, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9275, train_wall=8389 | |
| epoch 004: 4620 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51859, ups=3, wpb=15178.569, bsz=558.434, num_updates=31197, lr=0.000179037, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9278, train_wall=8391 | |
| epoch 004: 4630 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51859, ups=3, wpb=15178.100, bsz=558.306, num_updates=31207, lr=0.000179009, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9281, train_wall=8394 | |
| epoch 004: 4640 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51861, ups=3, wpb=15178.505, bsz=558.346, num_updates=31217, lr=0.00017898, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9284, train_wall=8397 | |
| epoch 004: 4650 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51861, ups=3, wpb=15178.106, bsz=558.310, num_updates=31227, lr=0.000178951, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9287, train_wall=8399 | |
| epoch 004: 4660 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51861, ups=3, wpb=15177.164, bsz=558.324, num_updates=31237, lr=0.000178923, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9290, train_wall=8402 | |
| epoch 004: 4670 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51863, ups=3, wpb=15177.440, bsz=558.269, num_updates=31247, lr=0.000178894, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9293, train_wall=8405 | |
| epoch 004: 4680 / 8862 loss=4.165, nll_loss=2.520, ppl=5.74, wps=51862, ups=3, wpb=15176.997, bsz=558.217, num_updates=31257, lr=0.000178865, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9296, train_wall=8407 | |
| epoch 004: 4690 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51862, ups=3, wpb=15176.927, bsz=558.461, num_updates=31267, lr=0.000178837, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9299, train_wall=8410 | |
| epoch 004: 4700 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51862, ups=3, wpb=15176.594, bsz=558.497, num_updates=31277, lr=0.000178808, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9301, train_wall=8413 | |
| epoch 004: 4710 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51865, ups=3, wpb=15177.250, bsz=558.395, num_updates=31287, lr=0.00017878, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9304, train_wall=8415 | |
| epoch 004: 4720 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51867, ups=3, wpb=15177.599, bsz=558.376, num_updates=31297, lr=0.000178751, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9307, train_wall=8418 | |
| epoch 004: 4730 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51867, ups=3, wpb=15177.512, bsz=558.515, num_updates=31307, lr=0.000178723, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9310, train_wall=8421 | |
| epoch 004: 4740 / 8862 loss=4.165, nll_loss=2.520, ppl=5.73, wps=51866, ups=3, wpb=15176.945, bsz=558.307, num_updates=31317, lr=0.000178694, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9313, train_wall=8423 | |
| epoch 004: 4750 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51866, ups=3, wpb=15176.679, bsz=558.376, num_updates=31327, lr=0.000178665, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9316, train_wall=8426 | |
| epoch 004: 4760 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51869, ups=3, wpb=15177.013, bsz=558.411, num_updates=31337, lr=0.000178637, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9319, train_wall=8429 | |
| epoch 004: 4770 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51872, ups=3, wpb=15177.838, bsz=558.425, num_updates=31347, lr=0.000178608, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9322, train_wall=8431 | |
| epoch 004: 4780 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51870, ups=3, wpb=15176.911, bsz=558.482, num_updates=31357, lr=0.00017858, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9325, train_wall=8434 | |
| epoch 004: 4790 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51870, ups=3, wpb=15176.638, bsz=558.657, num_updates=31367, lr=0.000178552, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9328, train_wall=8437 | |
| epoch 004: 4800 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51872, ups=3, wpb=15177.112, bsz=558.756, num_updates=31377, lr=0.000178523, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9331, train_wall=8440 | |
| epoch 004: 4810 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51874, ups=3, wpb=15177.412, bsz=558.839, num_updates=31387, lr=0.000178495, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9333, train_wall=8442 | |
| epoch 004: 4820 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51877, ups=3, wpb=15177.976, bsz=558.762, num_updates=31397, lr=0.000178466, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9336, train_wall=8445 | |
| epoch 004: 4830 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51877, ups=3, wpb=15177.455, bsz=558.660, num_updates=31407, lr=0.000178438, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9339, train_wall=8448 | |
| epoch 004: 4840 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51879, ups=3, wpb=15177.593, bsz=558.621, num_updates=31417, lr=0.000178409, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9342, train_wall=8450 | |
| epoch 004: 4850 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51881, ups=3, wpb=15177.704, bsz=558.555, num_updates=31427, lr=0.000178381, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9345, train_wall=8453 | |
| epoch 004: 4860 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51881, ups=3, wpb=15177.320, bsz=558.413, num_updates=31437, lr=0.000178353, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9348, train_wall=8456 | |
| epoch 004: 4870 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51878, ups=3, wpb=15177.026, bsz=558.797, num_updates=31447, lr=0.000178324, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9351, train_wall=8458 | |
| epoch 004: 4880 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51881, ups=3, wpb=15177.411, bsz=558.819, num_updates=31457, lr=0.000178296, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9354, train_wall=8461 | |
| epoch 004: 4890 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51881, ups=3, wpb=15177.187, bsz=558.851, num_updates=31467, lr=0.000178268, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9357, train_wall=8464 | |
| epoch 004: 4900 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51883, ups=3, wpb=15177.483, bsz=558.927, num_updates=31477, lr=0.000178239, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9360, train_wall=8466 | |
| epoch 004: 4910 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51884, ups=3, wpb=15177.209, bsz=558.880, num_updates=31487, lr=0.000178211, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9362, train_wall=8469 | |
| epoch 004: 4920 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51885, ups=3, wpb=15177.284, bsz=558.751, num_updates=31497, lr=0.000178183, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9365, train_wall=8472 | |
| epoch 004: 4930 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51886, ups=3, wpb=15177.264, bsz=558.916, num_updates=31507, lr=0.000178154, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9368, train_wall=8474 | |
| epoch 004: 4940 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51885, ups=3, wpb=15176.725, bsz=558.884, num_updates=31517, lr=0.000178126, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9371, train_wall=8477 | |
| epoch 004: 4950 / 8862 loss=4.164, nll_loss=2.518, ppl=5.73, wps=51887, ups=3, wpb=15176.801, bsz=558.747, num_updates=31527, lr=0.000178098, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9374, train_wall=8480 | |
| epoch 004: 4960 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51890, ups=3, wpb=15177.530, bsz=558.663, num_updates=31537, lr=0.00017807, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9377, train_wall=8482 | |
| epoch 004: 4970 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51890, ups=3, wpb=15177.538, bsz=558.757, num_updates=31547, lr=0.000178041, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9380, train_wall=8485 | |
| epoch 004: 4980 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51891, ups=3, wpb=15177.534, bsz=558.753, num_updates=31557, lr=0.000178013, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9383, train_wall=8488 | |
| epoch 004: 4990 / 8862 loss=4.164, nll_loss=2.519, ppl=5.73, wps=51892, ups=3, wpb=15177.424, bsz=558.825, num_updates=31567, lr=0.000177985, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9386, train_wall=8490 | |
| epoch 004: 5000 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51893, ups=3, wpb=15177.552, bsz=558.910, num_updates=31577, lr=0.000177957, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9389, train_wall=8493 | |
| epoch 004: 5010 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51895, ups=3, wpb=15177.584, bsz=558.751, num_updates=31587, lr=0.000177929, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9391, train_wall=8496 | |
| epoch 004: 5020 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51897, ups=3, wpb=15178.150, bsz=558.789, num_updates=31597, lr=0.0001779, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9394, train_wall=8498 | |
| epoch 004: 5030 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51900, ups=3, wpb=15178.505, bsz=558.788, num_updates=31607, lr=0.000177872, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9397, train_wall=8501 | |
| epoch 004: 5040 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51901, ups=3, wpb=15178.634, bsz=558.790, num_updates=31617, lr=0.000177844, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9400, train_wall=8504 | |
| epoch 004: 5050 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51902, ups=3, wpb=15178.430, bsz=558.564, num_updates=31627, lr=0.000177816, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9403, train_wall=8506 | |
| epoch 004: 5060 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51900, ups=3, wpb=15177.457, bsz=558.689, num_updates=31637, lr=0.000177788, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9406, train_wall=8509 | |
| epoch 004: 5070 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51903, ups=3, wpb=15177.682, bsz=558.558, num_updates=31647, lr=0.00017776, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9409, train_wall=8512 | |
| epoch 004: 5080 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51905, ups=3, wpb=15177.788, bsz=558.368, num_updates=31657, lr=0.000177732, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9412, train_wall=8514 | |
| epoch 004: 5090 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51908, ups=3, wpb=15178.483, bsz=558.496, num_updates=31667, lr=0.000177704, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9415, train_wall=8517 | |
| epoch 004: 5100 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51908, ups=3, wpb=15178.871, bsz=558.820, num_updates=31677, lr=0.000177676, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9417, train_wall=8520 | |
| epoch 004: 5110 / 8862 loss=4.163, nll_loss=2.517, ppl=5.73, wps=51907, ups=3, wpb=15178.193, bsz=559.062, num_updates=31687, lr=0.000177648, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9420, train_wall=8523 | |
| epoch 004: 5120 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51908, ups=3, wpb=15178.080, bsz=558.915, num_updates=31697, lr=0.00017762, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9423, train_wall=8525 | |
| epoch 004: 5130 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51910, ups=3, wpb=15178.124, bsz=558.807, num_updates=31707, lr=0.000177592, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9426, train_wall=8528 | |
| epoch 004: 5140 / 8862 loss=4.163, nll_loss=2.517, ppl=5.73, wps=51910, ups=3, wpb=15178.069, bsz=558.828, num_updates=31717, lr=0.000177564, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9429, train_wall=8531 | |
| epoch 004: 5150 / 8862 loss=4.163, nll_loss=2.517, ppl=5.73, wps=51910, ups=3, wpb=15177.800, bsz=558.746, num_updates=31727, lr=0.000177536, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9432, train_wall=8533 | |
| epoch 004: 5160 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51909, ups=3, wpb=15177.074, bsz=558.625, num_updates=31737, lr=0.000177508, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9435, train_wall=8536 | |
| epoch 004: 5170 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51909, ups=3, wpb=15177.279, bsz=558.779, num_updates=31747, lr=0.00017748, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9438, train_wall=8539 | |
| epoch 004: 5180 / 8862 loss=4.163, nll_loss=2.517, ppl=5.73, wps=51910, ups=3, wpb=15177.269, bsz=558.733, num_updates=31757, lr=0.000177452, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9441, train_wall=8541 | |
| epoch 004: 5190 / 8862 loss=4.163, nll_loss=2.517, ppl=5.73, wps=51912, ups=3, wpb=15177.553, bsz=558.609, num_updates=31767, lr=0.000177424, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9444, train_wall=8544 | |
| epoch 004: 5200 / 8862 loss=4.163, nll_loss=2.517, ppl=5.73, wps=51914, ups=3, wpb=15177.486, bsz=558.486, num_updates=31777, lr=0.000177396, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9446, train_wall=8547 | |
| epoch 004: 5210 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51915, ups=3, wpb=15177.805, bsz=558.383, num_updates=31787, lr=0.000177368, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9449, train_wall=8549 | |
| epoch 004: 5220 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51917, ups=3, wpb=15178.130, bsz=558.394, num_updates=31797, lr=0.00017734, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9452, train_wall=8552 | |
| epoch 004: 5230 / 8862 loss=4.163, nll_loss=2.517, ppl=5.73, wps=51919, ups=3, wpb=15178.104, bsz=558.303, num_updates=31807, lr=0.000177312, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9455, train_wall=8555 | |
| epoch 004: 5240 / 8862 loss=4.162, nll_loss=2.517, ppl=5.73, wps=51918, ups=3, wpb=15177.702, bsz=558.270, num_updates=31817, lr=0.000177284, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9458, train_wall=8557 | |
| epoch 004: 5250 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51916, ups=3, wpb=15176.455, bsz=558.272, num_updates=31827, lr=0.000177256, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9461, train_wall=8560 | |
| epoch 004: 5260 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51917, ups=3, wpb=15176.353, bsz=558.235, num_updates=31837, lr=0.000177229, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9464, train_wall=8563 | |
| epoch 004: 5270 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51919, ups=3, wpb=15176.699, bsz=558.271, num_updates=31847, lr=0.000177201, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9467, train_wall=8565 | |
| epoch 004: 5280 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51919, ups=3, wpb=15176.429, bsz=558.259, num_updates=31857, lr=0.000177173, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9470, train_wall=8568 | |
| epoch 004: 5290 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51922, ups=3, wpb=15176.944, bsz=558.189, num_updates=31867, lr=0.000177145, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9472, train_wall=8571 | |
| epoch 004: 5300 / 8862 loss=4.163, nll_loss=2.518, ppl=5.73, wps=51923, ups=3, wpb=15176.658, bsz=558.211, num_updates=31877, lr=0.000177117, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9475, train_wall=8573 | |
| epoch 004: 5310 / 8862 loss=4.162, nll_loss=2.517, ppl=5.73, wps=51924, ups=3, wpb=15176.809, bsz=558.062, num_updates=31887, lr=0.00017709, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9478, train_wall=8576 | |
| epoch 004: 5320 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51925, ups=3, wpb=15177.118, bsz=558.324, num_updates=31897, lr=0.000177062, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9481, train_wall=8579 | |
| epoch 004: 5330 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51926, ups=3, wpb=15177.082, bsz=558.421, num_updates=31907, lr=0.000177034, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9484, train_wall=8581 | |
| epoch 004: 5340 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51927, ups=3, wpb=15177.289, bsz=558.424, num_updates=31917, lr=0.000177006, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9487, train_wall=8584 | |
| epoch 004: 5350 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51926, ups=3, wpb=15176.947, bsz=558.457, num_updates=31927, lr=0.000176979, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9490, train_wall=8587 | |
| epoch 004: 5360 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51928, ups=3, wpb=15177.025, bsz=558.568, num_updates=31937, lr=0.000176951, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9493, train_wall=8589 | |
| epoch 004: 5370 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51929, ups=3, wpb=15177.196, bsz=558.602, num_updates=31947, lr=0.000176923, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9496, train_wall=8592 | |
| epoch 004: 5380 / 8862 loss=4.162, nll_loss=2.516, ppl=5.72, wps=51930, ups=3, wpb=15177.273, bsz=558.721, num_updates=31957, lr=0.000176896, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9498, train_wall=8595 | |
| epoch 004: 5390 / 8862 loss=4.162, nll_loss=2.516, ppl=5.72, wps=51932, ups=3, wpb=15177.363, bsz=558.639, num_updates=31967, lr=0.000176868, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9501, train_wall=8597 | |
| epoch 004: 5400 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51929, ups=3, wpb=15176.706, bsz=558.908, num_updates=31977, lr=0.00017684, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9504, train_wall=8600 | |
| epoch 004: 5410 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51928, ups=3, wpb=15176.329, bsz=558.839, num_updates=31987, lr=0.000176813, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9507, train_wall=8603 | |
| epoch 004: 5420 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51929, ups=3, wpb=15176.185, bsz=558.834, num_updates=31997, lr=0.000176785, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9510, train_wall=8606 | |
| epoch 004: 5430 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51930, ups=3, wpb=15176.210, bsz=558.890, num_updates=32007, lr=0.000176757, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9513, train_wall=8608 | |
| epoch 004: 5440 / 8862 loss=4.162, nll_loss=2.516, ppl=5.72, wps=51933, ups=3, wpb=15176.726, bsz=558.987, num_updates=32017, lr=0.00017673, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9516, train_wall=8611 | |
| epoch 004: 5450 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51935, ups=3, wpb=15176.970, bsz=559.053, num_updates=32027, lr=0.000176702, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9519, train_wall=8614 | |
| epoch 004: 5460 / 8862 loss=4.162, nll_loss=2.517, ppl=5.72, wps=51937, ups=3, wpb=15176.983, bsz=558.916, num_updates=32037, lr=0.000176675, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9522, train_wall=8616 | |
| epoch 004: 5470 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51937, ups=3, wpb=15177.043, bsz=559.200, num_updates=32047, lr=0.000176647, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9525, train_wall=8619 | |
| epoch 004: 5480 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51938, ups=3, wpb=15177.065, bsz=559.055, num_updates=32057, lr=0.000176619, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9527, train_wall=8622 | |
| epoch 004: 5490 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51939, ups=3, wpb=15177.266, bsz=559.360, num_updates=32067, lr=0.000176592, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9530, train_wall=8624 | |
| epoch 004: 5500 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51940, ups=3, wpb=15177.150, bsz=559.222, num_updates=32077, lr=0.000176564, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9533, train_wall=8627 | |
| epoch 004: 5510 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51940, ups=3, wpb=15176.877, bsz=559.178, num_updates=32087, lr=0.000176537, gnorm=0.488, clip=0.000, oom=0.000, loss_scale=4.000, wall=9536, train_wall=8630 | |
| epoch 004: 5520 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51939, ups=3, wpb=15176.195, bsz=559.033, num_updates=32097, lr=0.000176509, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9539, train_wall=8632 | |
| epoch 004: 5530 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51941, ups=3, wpb=15176.703, bsz=559.112, num_updates=32107, lr=0.000176482, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9542, train_wall=8635 | |
| epoch 004: 5540 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51941, ups=3, wpb=15176.295, bsz=559.265, num_updates=32117, lr=0.000176454, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9545, train_wall=8638 | |
| epoch 004: 5550 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51943, ups=3, wpb=15176.674, bsz=559.223, num_updates=32127, lr=0.000176427, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9548, train_wall=8640 | |
| epoch 004: 5560 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51946, ups=3, wpb=15177.298, bsz=559.337, num_updates=32137, lr=0.000176399, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9551, train_wall=8643 | |
| epoch 004: 5570 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51947, ups=3, wpb=15177.156, bsz=559.282, num_updates=32147, lr=0.000176372, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9553, train_wall=8646 | |
| epoch 004: 5580 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51948, ups=3, wpb=15177.377, bsz=559.240, num_updates=32157, lr=0.000176345, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9556, train_wall=8648 | |
| epoch 004: 5590 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51949, ups=3, wpb=15177.248, bsz=559.217, num_updates=32167, lr=0.000176317, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9559, train_wall=8651 | |
| epoch 004: 5600 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51948, ups=3, wpb=15176.771, bsz=559.107, num_updates=32177, lr=0.00017629, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9562, train_wall=8654 | |
| epoch 004: 5610 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51950, ups=3, wpb=15176.828, bsz=559.110, num_updates=32187, lr=0.000176262, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9565, train_wall=8656 | |
| epoch 004: 5620 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51947, ups=3, wpb=15176.150, bsz=559.052, num_updates=32197, lr=0.000176235, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9568, train_wall=8659 | |
| epoch 004: 5630 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51944, ups=3, wpb=15176.220, bsz=559.123, num_updates=32207, lr=0.000176208, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9571, train_wall=8662 | |
| epoch 004: 5640 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51947, ups=3, wpb=15176.916, bsz=559.119, num_updates=32217, lr=0.00017618, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9574, train_wall=8665 | |
| epoch 004: 5650 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51945, ups=3, wpb=15176.981, bsz=559.122, num_updates=32227, lr=0.000176153, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9577, train_wall=8667 | |
| epoch 004: 5660 / 8862 loss=4.161, nll_loss=2.516, ppl=5.72, wps=51943, ups=3, wpb=15176.684, bsz=559.196, num_updates=32237, lr=0.000176126, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9580, train_wall=8670 | |
| epoch 004: 5670 / 8862 loss=4.161, nll_loss=2.515, ppl=5.72, wps=51943, ups=3, wpb=15176.769, bsz=559.213, num_updates=32247, lr=0.000176098, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9583, train_wall=8673 | |
| epoch 004: 5680 / 8862 loss=4.161, nll_loss=2.515, ppl=5.72, wps=51943, ups=3, wpb=15177.035, bsz=559.244, num_updates=32257, lr=0.000176071, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9586, train_wall=8675 | |
| epoch 004: 5690 / 8862 loss=4.161, nll_loss=2.515, ppl=5.72, wps=51944, ups=3, wpb=15177.314, bsz=559.215, num_updates=32267, lr=0.000176044, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9589, train_wall=8678 | |
| epoch 004: 5700 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51942, ups=3, wpb=15177.372, bsz=559.500, num_updates=32277, lr=0.000176017, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9592, train_wall=8681 | |
| epoch 004: 5710 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51943, ups=3, wpb=15177.535, bsz=559.386, num_updates=32287, lr=0.000175989, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9595, train_wall=8684 | |
| epoch 004: 5720 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51940, ups=3, wpb=15176.805, bsz=559.353, num_updates=32297, lr=0.000175962, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9598, train_wall=8686 | |
| epoch 004: 5730 / 8862 loss=4.161, nll_loss=2.515, ppl=5.72, wps=51938, ups=3, wpb=15176.288, bsz=559.222, num_updates=32307, lr=0.000175935, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9600, train_wall=8689 | |
| epoch 004: 5740 / 8862 loss=4.161, nll_loss=2.515, ppl=5.72, wps=51937, ups=3, wpb=15176.230, bsz=559.140, num_updates=32317, lr=0.000175908, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9603, train_wall=8692 | |
| epoch 004: 5750 / 8862 loss=4.161, nll_loss=2.515, ppl=5.72, wps=51936, ups=3, wpb=15176.170, bsz=559.174, num_updates=32327, lr=0.00017588, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9606, train_wall=8695 | |
| epoch 004: 5760 / 8862 loss=4.161, nll_loss=2.515, ppl=5.72, wps=51938, ups=3, wpb=15176.624, bsz=559.341, num_updates=32337, lr=0.000175853, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9609, train_wall=8697 | |
| epoch 004: 5770 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51936, ups=3, wpb=15176.627, bsz=559.477, num_updates=32347, lr=0.000175826, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9612, train_wall=8700 | |
| epoch 004: 5780 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51935, ups=3, wpb=15176.542, bsz=559.601, num_updates=32357, lr=0.000175799, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9615, train_wall=8703 | |
| epoch 004: 5790 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51935, ups=3, wpb=15176.527, bsz=559.763, num_updates=32367, lr=0.000175772, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9618, train_wall=8705 | |
| epoch 004: 5800 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51934, ups=3, wpb=15176.329, bsz=559.754, num_updates=32377, lr=0.000175744, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9621, train_wall=8708 | |
| epoch 004: 5810 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51934, ups=3, wpb=15176.636, bsz=559.602, num_updates=32387, lr=0.000175717, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9624, train_wall=8711 | |
| epoch 004: 5820 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51933, ups=3, wpb=15176.189, bsz=559.513, num_updates=32397, lr=0.00017569, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9627, train_wall=8714 | |
| epoch 004: 5830 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51932, ups=3, wpb=15176.141, bsz=559.611, num_updates=32407, lr=0.000175663, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9630, train_wall=8716 | |
| epoch 004: 5840 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51932, ups=3, wpb=15176.507, bsz=559.545, num_updates=32417, lr=0.000175636, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9633, train_wall=8719 | |
| epoch 004: 5850 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51931, ups=3, wpb=15176.541, bsz=559.479, num_updates=32427, lr=0.000175609, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9636, train_wall=8722 | |
| epoch 004: 5860 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51932, ups=3, wpb=15176.719, bsz=559.460, num_updates=32437, lr=0.000175582, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9639, train_wall=8724 | |
| epoch 004: 5870 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51932, ups=3, wpb=15176.747, bsz=559.441, num_updates=32447, lr=0.000175555, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9642, train_wall=8727 | |
| epoch 004: 5880 / 8862 loss=4.160, nll_loss=2.515, ppl=5.72, wps=51932, ups=3, wpb=15176.836, bsz=559.340, num_updates=32457, lr=0.000175528, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9645, train_wall=8730 | |
| epoch 004: 5890 / 8862 loss=4.160, nll_loss=2.515, ppl=5.71, wps=51931, ups=3, wpb=15176.673, bsz=559.399, num_updates=32467, lr=0.000175501, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9647, train_wall=8733 | |
| epoch 004: 5900 / 8862 loss=4.160, nll_loss=2.515, ppl=5.71, wps=51930, ups=3, wpb=15176.633, bsz=559.425, num_updates=32477, lr=0.000175474, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9650, train_wall=8735 | |
| epoch 004: 5910 / 8862 loss=4.160, nll_loss=2.515, ppl=5.71, wps=51931, ups=3, wpb=15177.101, bsz=559.454, num_updates=32487, lr=0.000175447, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9653, train_wall=8738 | |
| epoch 004: 5920 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51931, ups=3, wpb=15176.940, bsz=559.443, num_updates=32497, lr=0.00017542, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9656, train_wall=8741 | |
| epoch 004: 5930 / 8862 loss=4.160, nll_loss=2.515, ppl=5.71, wps=51931, ups=3, wpb=15176.879, bsz=559.318, num_updates=32507, lr=0.000175393, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9659, train_wall=8743 | |
| epoch 004: 5940 / 8862 loss=4.160, nll_loss=2.515, ppl=5.71, wps=51931, ups=3, wpb=15176.619, bsz=559.310, num_updates=32517, lr=0.000175366, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9662, train_wall=8746 | |
| epoch 004: 5950 / 8862 loss=4.160, nll_loss=2.515, ppl=5.71, wps=51929, ups=3, wpb=15176.366, bsz=559.415, num_updates=32527, lr=0.000175339, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9665, train_wall=8749 | |
| epoch 004: 5960 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51929, ups=3, wpb=15176.240, bsz=559.298, num_updates=32537, lr=0.000175312, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9668, train_wall=8752 | |
| epoch 004: 5970 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51928, ups=3, wpb=15175.927, bsz=559.338, num_updates=32547, lr=0.000175285, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9671, train_wall=8754 | |
| epoch 004: 5980 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51927, ups=3, wpb=15175.607, bsz=559.257, num_updates=32557, lr=0.000175258, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9674, train_wall=8757 | |
| epoch 004: 5990 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51928, ups=3, wpb=15175.820, bsz=559.102, num_updates=32567, lr=0.000175231, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9677, train_wall=8760 | |
| epoch 004: 6000 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51926, ups=3, wpb=15175.376, bsz=559.073, num_updates=32577, lr=0.000175204, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9680, train_wall=8762 | |
| epoch 004: 6010 / 8862 loss=4.160, nll_loss=2.514, ppl=5.71, wps=51924, ups=3, wpb=15174.605, bsz=558.972, num_updates=32587, lr=0.000175177, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9683, train_wall=8765 | |
| epoch 004: 6020 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51926, ups=3, wpb=15175.433, bsz=558.984, num_updates=32597, lr=0.00017515, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9685, train_wall=8768 | |
| epoch 004: 6030 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51923, ups=3, wpb=15175.638, bsz=559.314, num_updates=32607, lr=0.000175124, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9689, train_wall=8771 | |
| epoch 004: 6040 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51923, ups=3, wpb=15175.876, bsz=559.418, num_updates=32617, lr=0.000175097, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9691, train_wall=8773 | |
| epoch 004: 6050 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51925, ups=3, wpb=15176.433, bsz=559.398, num_updates=32627, lr=0.00017507, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9694, train_wall=8776 | |
| epoch 004: 6060 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51924, ups=3, wpb=15176.125, bsz=559.304, num_updates=32637, lr=0.000175043, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9697, train_wall=8779 | |
| epoch 004: 6070 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51923, ups=3, wpb=15176.212, bsz=559.263, num_updates=32647, lr=0.000175016, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9700, train_wall=8781 | |
| epoch 004: 6080 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51922, ups=3, wpb=15176.040, bsz=559.222, num_updates=32657, lr=0.000174989, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9703, train_wall=8784 | |
| epoch 004: 6090 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51922, ups=3, wpb=15176.164, bsz=559.167, num_updates=32667, lr=0.000174963, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9706, train_wall=8787 | |
| epoch 004: 6100 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51920, ups=3, wpb=15175.723, bsz=559.188, num_updates=32677, lr=0.000174936, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9709, train_wall=8790 | |
| epoch 004: 6110 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51919, ups=3, wpb=15175.590, bsz=559.240, num_updates=32687, lr=0.000174909, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9712, train_wall=8792 | |
| epoch 004: 6120 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51919, ups=3, wpb=15175.998, bsz=559.271, num_updates=32697, lr=0.000174882, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9715, train_wall=8795 | |
| epoch 004: 6130 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51920, ups=3, wpb=15176.279, bsz=559.192, num_updates=32707, lr=0.000174856, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9718, train_wall=8798 | |
| epoch 004: 6140 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51920, ups=3, wpb=15176.420, bsz=559.210, num_updates=32717, lr=0.000174829, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9721, train_wall=8801 | |
| epoch 004: 6150 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51919, ups=3, wpb=15176.038, bsz=559.317, num_updates=32727, lr=0.000174802, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9724, train_wall=8803 | |
| epoch 004: 6160 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51917, ups=3, wpb=15175.696, bsz=559.357, num_updates=32737, lr=0.000174776, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9727, train_wall=8806 | |
| epoch 004: 6170 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51918, ups=3, wpb=15175.935, bsz=559.215, num_updates=32747, lr=0.000174749, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9730, train_wall=8809 | |
| epoch 004: 6180 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51918, ups=3, wpb=15176.224, bsz=559.249, num_updates=32757, lr=0.000174722, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9733, train_wall=8811 | |
| epoch 004: 6190 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51919, ups=3, wpb=15176.308, bsz=559.269, num_updates=32767, lr=0.000174695, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9736, train_wall=8814 | |
| epoch 004: 6200 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51917, ups=3, wpb=15176.290, bsz=559.188, num_updates=32777, lr=0.000174669, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9738, train_wall=8817 | |
| epoch 004: 6210 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51916, ups=3, wpb=15176.278, bsz=559.170, num_updates=32787, lr=0.000174642, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9741, train_wall=8820 | |
| epoch 004: 6220 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51914, ups=3, wpb=15176.143, bsz=559.557, num_updates=32797, lr=0.000174616, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9744, train_wall=8822 | |
| epoch 004: 6230 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51914, ups=3, wpb=15176.546, bsz=559.611, num_updates=32807, lr=0.000174589, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9747, train_wall=8825 | |
| epoch 004: 6240 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51915, ups=3, wpb=15176.467, bsz=559.697, num_updates=32817, lr=0.000174562, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9750, train_wall=8828 | |
| epoch 004: 6250 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51912, ups=3, wpb=15175.616, bsz=559.767, num_updates=32827, lr=0.000174536, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9753, train_wall=8830 | |
| epoch 004: 6260 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51913, ups=3, wpb=15175.592, bsz=559.578, num_updates=32837, lr=0.000174509, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9756, train_wall=8833 | |
| epoch 004: 6270 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51913, ups=3, wpb=15175.185, bsz=559.470, num_updates=32847, lr=0.000174483, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9759, train_wall=8836 | |
| epoch 004: 6280 / 8862 loss=4.159, nll_loss=2.514, ppl=5.71, wps=51914, ups=3, wpb=15175.398, bsz=559.426, num_updates=32857, lr=0.000174456, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9762, train_wall=8838 | |
| epoch 004: 6290 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51915, ups=3, wpb=15175.695, bsz=559.549, num_updates=32867, lr=0.00017443, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9765, train_wall=8841 | |
| epoch 004: 6300 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51916, ups=3, wpb=15175.537, bsz=559.470, num_updates=32877, lr=0.000174403, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9768, train_wall=8844 | |
| epoch 004: 6310 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51916, ups=3, wpb=15175.486, bsz=559.415, num_updates=32887, lr=0.000174376, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9771, train_wall=8847 | |
| epoch 004: 6320 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51916, ups=3, wpb=15175.077, bsz=559.435, num_updates=32897, lr=0.00017435, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9773, train_wall=8849 | |
| epoch 004: 6330 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51919, ups=3, wpb=15175.486, bsz=559.394, num_updates=32907, lr=0.000174323, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9776, train_wall=8852 | |
| epoch 004: 6340 / 8862 loss=4.159, nll_loss=2.513, ppl=5.71, wps=51918, ups=3, wpb=15175.038, bsz=559.460, num_updates=32917, lr=0.000174297, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9779, train_wall=8855 | |
| epoch 004: 6350 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51920, ups=3, wpb=15175.450, bsz=559.413, num_updates=32927, lr=0.000174271, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9782, train_wall=8857 | |
| epoch 004: 6360 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51920, ups=3, wpb=15175.332, bsz=559.508, num_updates=32937, lr=0.000174244, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9785, train_wall=8860 | |
| epoch 004: 6370 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51920, ups=3, wpb=15175.018, bsz=559.413, num_updates=32947, lr=0.000174218, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9788, train_wall=8863 | |
| epoch 004: 6380 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51921, ups=3, wpb=15174.887, bsz=559.355, num_updates=32957, lr=0.000174191, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9791, train_wall=8865 | |
| epoch 004: 6390 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51924, ups=3, wpb=15175.615, bsz=559.449, num_updates=32967, lr=0.000174165, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9794, train_wall=8868 | |
| epoch 004: 6400 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51927, ups=3, wpb=15176.324, bsz=559.431, num_updates=32977, lr=0.000174138, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9797, train_wall=8871 | |
| epoch 004: 6410 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51928, ups=3, wpb=15176.521, bsz=559.564, num_updates=32987, lr=0.000174112, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9800, train_wall=8873 | |
| epoch 004: 6420 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51929, ups=3, wpb=15176.440, bsz=559.657, num_updates=32997, lr=0.000174086, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9802, train_wall=8876 | |
| epoch 004: 6430 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51931, ups=3, wpb=15176.946, bsz=559.635, num_updates=33007, lr=0.000174059, gnorm=0.487, clip=0.000, oom=0.000, loss_scale=4.000, wall=9805, train_wall=8879 | |
| epoch 004: 6440 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51932, ups=3, wpb=15176.842, bsz=559.610, num_updates=33017, lr=0.000174033, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9808, train_wall=8881 | |
| epoch 004: 6450 / 8862 loss=4.158, nll_loss=2.512, ppl=5.70, wps=51932, ups=3, wpb=15176.692, bsz=559.631, num_updates=33027, lr=0.000174006, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9811, train_wall=8884 | |
| epoch 004: 6460 / 8862 loss=4.158, nll_loss=2.512, ppl=5.70, wps=51933, ups=3, wpb=15176.932, bsz=559.693, num_updates=33037, lr=0.00017398, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9814, train_wall=8887 | |
| epoch 004: 6470 / 8862 loss=4.158, nll_loss=2.512, ppl=5.70, wps=51933, ups=3, wpb=15176.585, bsz=559.678, num_updates=33047, lr=0.000173954, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9817, train_wall=8889 | |
| epoch 004: 6480 / 8862 loss=4.158, nll_loss=2.512, ppl=5.70, wps=51935, ups=3, wpb=15176.699, bsz=559.545, num_updates=33057, lr=0.000173928, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9820, train_wall=8892 | |
| epoch 004: 6490 / 8862 loss=4.158, nll_loss=2.512, ppl=5.70, wps=51935, ups=3, wpb=15176.638, bsz=559.583, num_updates=33067, lr=0.000173901, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9823, train_wall=8895 | |
| epoch 004: 6500 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51934, ups=3, wpb=15176.176, bsz=559.625, num_updates=33077, lr=0.000173875, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9826, train_wall=8897 | |
| epoch 004: 6510 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51934, ups=3, wpb=15175.702, bsz=559.547, num_updates=33087, lr=0.000173849, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9828, train_wall=8900 | |
| epoch 004: 6520 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51934, ups=3, wpb=15175.470, bsz=559.635, num_updates=33097, lr=0.000173822, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9831, train_wall=8903 | |
| epoch 004: 6530 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51933, ups=3, wpb=15174.965, bsz=559.543, num_updates=33107, lr=0.000173796, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9834, train_wall=8905 | |
| epoch 004: 6540 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51934, ups=3, wpb=15174.878, bsz=559.492, num_updates=33117, lr=0.00017377, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9837, train_wall=8908 | |
| epoch 004: 6550 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51935, ups=3, wpb=15174.995, bsz=559.476, num_updates=33127, lr=0.000173744, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9840, train_wall=8911 | |
| epoch 004: 6560 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51937, ups=3, wpb=15175.180, bsz=559.385, num_updates=33137, lr=0.000173717, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9843, train_wall=8913 | |
| epoch 004: 6570 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51938, ups=3, wpb=15175.355, bsz=559.280, num_updates=33147, lr=0.000173691, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9846, train_wall=8916 | |
| epoch 004: 6580 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51938, ups=3, wpb=15175.324, bsz=559.448, num_updates=33157, lr=0.000173665, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9849, train_wall=8919 | |
| epoch 004: 6590 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51939, ups=3, wpb=15175.291, bsz=559.368, num_updates=33167, lr=0.000173639, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9852, train_wall=8921 | |
| epoch 004: 6600 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51940, ups=3, wpb=15175.300, bsz=559.276, num_updates=33177, lr=0.000173613, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9854, train_wall=8924 | |
| epoch 004: 6610 / 8862 loss=4.158, nll_loss=2.513, ppl=5.71, wps=51940, ups=3, wpb=15174.780, bsz=559.261, num_updates=33187, lr=0.000173587, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9857, train_wall=8927 | |
| epoch 004: 6620 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51941, ups=3, wpb=15175.257, bsz=559.462, num_updates=33197, lr=0.00017356, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9860, train_wall=8929 | |
| epoch 004: 6630 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51941, ups=3, wpb=15174.926, bsz=559.490, num_updates=33207, lr=0.000173534, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9863, train_wall=8932 | |
| epoch 004: 6640 / 8862 loss=4.158, nll_loss=2.512, ppl=5.71, wps=51943, ups=3, wpb=15175.179, bsz=559.441, num_updates=33217, lr=0.000173508, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9866, train_wall=8935 | |
| epoch 004: 6650 / 8862 loss=4.158, nll_loss=2.512, ppl=5.70, wps=51944, ups=3, wpb=15175.178, bsz=559.368, num_updates=33227, lr=0.000173482, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9869, train_wall=8937 | |
| epoch 004: 6660 / 8862 loss=4.158, nll_loss=2.512, ppl=5.70, wps=51943, ups=3, wpb=15174.828, bsz=559.377, num_updates=33237, lr=0.000173456, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9872, train_wall=8940 | |
| epoch 004: 6670 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51943, ups=3, wpb=15174.400, bsz=559.288, num_updates=33247, lr=0.00017343, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9875, train_wall=8943 | |
| epoch 004: 6680 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51944, ups=3, wpb=15174.524, bsz=559.345, num_updates=33257, lr=0.000173404, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9878, train_wall=8945 | |
| epoch 004: 6690 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51945, ups=3, wpb=15174.754, bsz=559.389, num_updates=33267, lr=0.000173378, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9880, train_wall=8948 | |
| epoch 004: 6700 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51948, ups=3, wpb=15175.371, bsz=559.335, num_updates=33277, lr=0.000173352, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9883, train_wall=8951 | |
| epoch 004: 6710 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51949, ups=3, wpb=15175.564, bsz=559.297, num_updates=33287, lr=0.000173326, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9886, train_wall=8954 | |
| epoch 004: 6720 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51950, ups=3, wpb=15175.437, bsz=559.152, num_updates=33297, lr=0.0001733, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9889, train_wall=8956 | |
| epoch 004: 6730 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51951, ups=3, wpb=15175.731, bsz=559.251, num_updates=33307, lr=0.000173274, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9892, train_wall=8959 | |
| epoch 004: 6740 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51952, ups=3, wpb=15175.874, bsz=559.204, num_updates=33317, lr=0.000173248, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9895, train_wall=8962 | |
| epoch 004: 6750 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51954, ups=3, wpb=15176.101, bsz=559.189, num_updates=33327, lr=0.000173222, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9898, train_wall=8964 | |
| epoch 004: 6760 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51955, ups=3, wpb=15176.149, bsz=559.245, num_updates=33337, lr=0.000173196, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9901, train_wall=8967 | |
| epoch 004: 6770 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51956, ups=3, wpb=15176.124, bsz=559.233, num_updates=33347, lr=0.00017317, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9904, train_wall=8970 | |
| epoch 004: 6780 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51957, ups=3, wpb=15176.272, bsz=559.190, num_updates=33357, lr=0.000173144, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9907, train_wall=8972 | |
| epoch 004: 6790 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51958, ups=3, wpb=15176.468, bsz=559.114, num_updates=33367, lr=0.000173118, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9909, train_wall=8975 | |
| epoch 004: 6800 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51959, ups=3, wpb=15176.522, bsz=559.184, num_updates=33377, lr=0.000173092, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9912, train_wall=8978 | |
| epoch 004: 6810 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51960, ups=3, wpb=15176.372, bsz=559.074, num_updates=33387, lr=0.000173066, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9915, train_wall=8980 | |
| epoch 004: 6820 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51960, ups=3, wpb=15176.268, bsz=559.080, num_updates=33397, lr=0.00017304, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9918, train_wall=8983 | |
| epoch 004: 6830 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51960, ups=3, wpb=15176.099, bsz=559.150, num_updates=33407, lr=0.000173014, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9921, train_wall=8986 | |
| epoch 004: 6840 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51961, ups=3, wpb=15176.207, bsz=559.053, num_updates=33417, lr=0.000172988, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9924, train_wall=8988 | |
| epoch 004: 6850 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51963, ups=3, wpb=15176.394, bsz=558.936, num_updates=33427, lr=0.000172962, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9927, train_wall=8991 | |
| epoch 004: 6860 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51964, ups=3, wpb=15176.526, bsz=558.830, num_updates=33437, lr=0.000172936, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9930, train_wall=8994 | |
| epoch 004: 6870 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51966, ups=3, wpb=15176.468, bsz=558.713, num_updates=33447, lr=0.000172911, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9932, train_wall=8996 | |
| epoch 004: 6880 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51966, ups=3, wpb=15176.738, bsz=558.858, num_updates=33457, lr=0.000172885, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9935, train_wall=8999 | |
| epoch 004: 6890 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51967, ups=3, wpb=15176.887, bsz=558.867, num_updates=33467, lr=0.000172859, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9938, train_wall=9002 | |
| epoch 004: 6900 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51968, ups=3, wpb=15176.775, bsz=558.890, num_updates=33477, lr=0.000172833, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9941, train_wall=9004 | |
| epoch 004: 6910 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51967, ups=3, wpb=15176.221, bsz=558.823, num_updates=33487, lr=0.000172807, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9944, train_wall=9007 | |
| epoch 004: 6920 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51967, ups=3, wpb=15176.070, bsz=558.800, num_updates=33497, lr=0.000172781, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9947, train_wall=9010 | |
| epoch 004: 6930 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51969, ups=3, wpb=15176.551, bsz=558.925, num_updates=33507, lr=0.000172756, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9950, train_wall=9012 | |
| epoch 004: 6940 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51969, ups=3, wpb=15176.176, bsz=558.793, num_updates=33517, lr=0.00017273, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9953, train_wall=9015 | |
| epoch 004: 6950 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51967, ups=3, wpb=15175.779, bsz=558.976, num_updates=33527, lr=0.000172704, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9956, train_wall=9018 | |
| epoch 004: 6960 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51968, ups=3, wpb=15175.656, bsz=558.872, num_updates=33537, lr=0.000172678, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9959, train_wall=9020 | |
| epoch 004: 6970 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51969, ups=3, wpb=15175.659, bsz=558.839, num_updates=33547, lr=0.000172653, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9961, train_wall=9023 | |
| epoch 004: 6980 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51969, ups=3, wpb=15175.770, bsz=558.908, num_updates=33557, lr=0.000172627, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9964, train_wall=9026 | |
| epoch 004: 6990 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51970, ups=3, wpb=15175.974, bsz=559.006, num_updates=33567, lr=0.000172601, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9967, train_wall=9028 | |
| epoch 004: 7000 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51970, ups=3, wpb=15175.709, bsz=558.964, num_updates=33577, lr=0.000172575, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9970, train_wall=9031 | |
| epoch 004: 7010 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51970, ups=3, wpb=15175.603, bsz=559.009, num_updates=33587, lr=0.00017255, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9973, train_wall=9034 | |
| epoch 004: 7020 / 8862 loss=4.157, nll_loss=2.512, ppl=5.70, wps=51972, ups=3, wpb=15175.689, bsz=558.860, num_updates=33597, lr=0.000172524, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9976, train_wall=9036 | |
| epoch 004: 7030 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51972, ups=3, wpb=15175.587, bsz=559.090, num_updates=33607, lr=0.000172498, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9979, train_wall=9039 | |
| epoch 004: 7040 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51973, ups=3, wpb=15175.589, bsz=558.976, num_updates=33617, lr=0.000172473, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9982, train_wall=9042 | |
| epoch 004: 7050 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51975, ups=3, wpb=15176.202, bsz=558.934, num_updates=33627, lr=0.000172447, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9985, train_wall=9044 | |
| epoch 004: 7060 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51976, ups=3, wpb=15176.094, bsz=558.908, num_updates=33637, lr=0.000172421, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9988, train_wall=9047 | |
| epoch 004: 7070 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51976, ups=3, wpb=15176.094, bsz=558.811, num_updates=33647, lr=0.000172396, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9990, train_wall=9050 | |
| epoch 004: 7080 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51974, ups=3, wpb=15176.073, bsz=558.813, num_updates=33657, lr=0.00017237, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9993, train_wall=9053 | |
| epoch 004: 7090 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51973, ups=3, wpb=15176.229, bsz=558.767, num_updates=33667, lr=0.000172345, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9996, train_wall=9055 | |
| epoch 004: 7100 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51970, ups=3, wpb=15175.780, bsz=558.576, num_updates=33677, lr=0.000172319, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=9999, train_wall=9058 | |
| epoch 004: 7110 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51971, ups=3, wpb=15176.370, bsz=558.564, num_updates=33687, lr=0.000172293, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10002, train_wall=9061 | |
| epoch 004: 7120 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51970, ups=3, wpb=15176.552, bsz=558.529, num_updates=33697, lr=0.000172268, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10005, train_wall=9064 | |
| epoch 004: 7130 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51967, ups=3, wpb=15176.068, bsz=558.548, num_updates=33707, lr=0.000172242, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10008, train_wall=9066 | |
| epoch 004: 7140 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51966, ups=3, wpb=15176.387, bsz=558.587, num_updates=33717, lr=0.000172217, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10011, train_wall=9069 | |
| epoch 004: 7150 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51964, ups=3, wpb=15176.163, bsz=558.584, num_updates=33727, lr=0.000172191, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10014, train_wall=9072 | |
| epoch 004: 7160 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51962, ups=3, wpb=15176.114, bsz=558.526, num_updates=33737, lr=0.000172166, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10017, train_wall=9075 | |
| epoch 004: 7170 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51960, ups=3, wpb=15176.102, bsz=558.576, num_updates=33747, lr=0.00017214, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10020, train_wall=9077 | |
| epoch 004: 7180 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51958, ups=3, wpb=15176.079, bsz=558.629, num_updates=33757, lr=0.000172115, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10023, train_wall=9080 | |
| epoch 004: 7190 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51957, ups=3, wpb=15176.090, bsz=558.615, num_updates=33767, lr=0.000172089, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10026, train_wall=9083 | |
| epoch 004: 7200 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51955, ups=3, wpb=15175.944, bsz=558.590, num_updates=33777, lr=0.000172064, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10029, train_wall=9086 | |
| epoch 004: 7210 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51953, ups=3, wpb=15175.792, bsz=558.583, num_updates=33787, lr=0.000172038, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10032, train_wall=9089 | |
| epoch 004: 7220 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51952, ups=3, wpb=15175.827, bsz=558.520, num_updates=33797, lr=0.000172013, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10035, train_wall=9091 | |
| epoch 004: 7230 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51949, ups=3, wpb=15175.575, bsz=558.568, num_updates=33807, lr=0.000171987, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10038, train_wall=9094 | |
| epoch 004: 7240 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51949, ups=3, wpb=15175.829, bsz=558.559, num_updates=33817, lr=0.000171962, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10041, train_wall=9097 | |
| epoch 004: 7250 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51946, ups=3, wpb=15175.642, bsz=558.574, num_updates=33827, lr=0.000171937, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10044, train_wall=9100 | |
| epoch 004: 7260 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51946, ups=3, wpb=15175.898, bsz=558.544, num_updates=33837, lr=0.000171911, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10047, train_wall=9102 | |
| epoch 004: 7270 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51945, ups=3, wpb=15176.183, bsz=558.552, num_updates=33847, lr=0.000171886, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10050, train_wall=9105 | |
| epoch 004: 7280 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51943, ups=3, wpb=15175.958, bsz=558.489, num_updates=33857, lr=0.00017186, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10053, train_wall=9108 | |
| epoch 004: 7290 / 8862 loss=4.157, nll_loss=2.511, ppl=5.70, wps=51939, ups=3, wpb=15175.364, bsz=558.409, num_updates=33867, lr=0.000171835, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10056, train_wall=9111 | |
| epoch 004: 7300 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51939, ups=3, wpb=15175.596, bsz=558.294, num_updates=33877, lr=0.00017181, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10059, train_wall=9113 | |
| epoch 004: 7310 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51935, ups=3, wpb=15175.141, bsz=558.340, num_updates=33887, lr=0.000171784, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10062, train_wall=9116 | |
| epoch 004: 7320 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51934, ups=3, wpb=15175.391, bsz=558.502, num_updates=33897, lr=0.000171759, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10065, train_wall=9119 | |
| epoch 004: 7330 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51935, ups=3, wpb=15175.908, bsz=558.398, num_updates=33907, lr=0.000171734, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10068, train_wall=9122 | |
| epoch 004: 7340 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51934, ups=3, wpb=15176.254, bsz=558.377, num_updates=33917, lr=0.000171708, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10071, train_wall=9125 | |
| epoch 004: 7350 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51934, ups=3, wpb=15176.583, bsz=558.303, num_updates=33927, lr=0.000171683, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10074, train_wall=9127 | |
| epoch 004: 7360 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51933, ups=3, wpb=15176.851, bsz=558.317, num_updates=33937, lr=0.000171658, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10077, train_wall=9130 | |
| epoch 004: 7370 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51931, ups=3, wpb=15176.711, bsz=558.400, num_updates=33947, lr=0.000171632, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10080, train_wall=9133 | |
| epoch 004: 7380 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51930, ups=3, wpb=15176.797, bsz=558.338, num_updates=33957, lr=0.000171607, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10083, train_wall=9136 | |
| epoch 004: 7390 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51928, ups=3, wpb=15176.737, bsz=558.422, num_updates=33967, lr=0.000171582, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10086, train_wall=9138 | |
| epoch 004: 7400 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51927, ups=3, wpb=15176.778, bsz=558.385, num_updates=33977, lr=0.000171557, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10089, train_wall=9141 | |
| epoch 004: 7410 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51926, ups=3, wpb=15176.857, bsz=558.267, num_updates=33987, lr=0.000171531, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10092, train_wall=9144 | |
| epoch 004: 7420 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51925, ups=3, wpb=15176.858, bsz=558.206, num_updates=33997, lr=0.000171506, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10095, train_wall=9147 | |
| epoch 004: 7430 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51922, ups=3, wpb=15176.727, bsz=558.197, num_updates=34007, lr=0.000171481, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10098, train_wall=9149 | |
| epoch 004: 7440 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51922, ups=3, wpb=15176.857, bsz=558.166, num_updates=34017, lr=0.000171456, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10101, train_wall=9152 | |
| epoch 004: 7450 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51921, ups=3, wpb=15177.017, bsz=558.111, num_updates=34027, lr=0.000171431, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10104, train_wall=9155 | |
| epoch 004: 7460 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51920, ups=3, wpb=15177.118, bsz=558.027, num_updates=34037, lr=0.000171405, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10107, train_wall=9158 | |
| epoch 004: 7470 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51918, ups=3, wpb=15176.654, bsz=558.029, num_updates=34047, lr=0.00017138, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10110, train_wall=9160 | |
| epoch 004: 7480 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51917, ups=3, wpb=15175.988, bsz=557.940, num_updates=34057, lr=0.000171355, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10113, train_wall=9163 | |
| epoch 004: 7490 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51917, ups=3, wpb=15175.913, bsz=558.011, num_updates=34067, lr=0.00017133, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10116, train_wall=9166 | |
| epoch 004: 7500 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51918, ups=3, wpb=15175.781, bsz=557.944, num_updates=34077, lr=0.000171305, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10118, train_wall=9168 | |
| epoch 004: 7510 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51919, ups=3, wpb=15175.877, bsz=557.986, num_updates=34087, lr=0.00017128, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10121, train_wall=9171 | |
| epoch 004: 7520 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51920, ups=3, wpb=15175.739, bsz=558.064, num_updates=34097, lr=0.000171254, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10124, train_wall=9174 | |
| epoch 004: 7530 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51920, ups=3, wpb=15175.640, bsz=558.019, num_updates=34107, lr=0.000171229, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10127, train_wall=9176 | |
| epoch 004: 7540 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51922, ups=3, wpb=15175.834, bsz=558.036, num_updates=34117, lr=0.000171204, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10130, train_wall=9179 | |
| epoch 004: 7550 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51922, ups=3, wpb=15175.630, bsz=557.979, num_updates=34127, lr=0.000171179, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10133, train_wall=9182 | |
| epoch 004: 7560 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51923, ups=3, wpb=15175.728, bsz=558.105, num_updates=34137, lr=0.000171154, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10136, train_wall=9184 | |
| epoch 004: 7570 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51924, ups=3, wpb=15176.036, bsz=558.080, num_updates=34147, lr=0.000171129, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10139, train_wall=9187 | |
| epoch 004: 7580 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51926, ups=3, wpb=15176.354, bsz=558.025, num_updates=34157, lr=0.000171104, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10142, train_wall=9190 | |
| epoch 004: 7590 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51928, ups=3, wpb=15176.446, bsz=557.926, num_updates=34167, lr=0.000171079, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10144, train_wall=9192 | |
| epoch 004: 7600 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51930, ups=3, wpb=15176.854, bsz=557.949, num_updates=34177, lr=0.000171054, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10147, train_wall=9195 | |
| epoch 004: 7610 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51931, ups=3, wpb=15176.975, bsz=557.923, num_updates=34187, lr=0.000171029, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10150, train_wall=9198 | |
| epoch 004: 7620 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51931, ups=3, wpb=15176.891, bsz=557.943, num_updates=34197, lr=0.000171004, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10153, train_wall=9200 | |
| epoch 004: 7630 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51931, ups=3, wpb=15176.720, bsz=558.080, num_updates=34207, lr=0.000170979, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10156, train_wall=9203 | |
| epoch 004: 7640 / 8862 loss=4.156, nll_loss=2.510, ppl=5.70, wps=51933, ups=3, wpb=15176.913, bsz=558.095, num_updates=34217, lr=0.000170954, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10159, train_wall=9206 | |
| epoch 004: 7650 / 8862 loss=4.156, nll_loss=2.510, ppl=5.70, wps=51932, ups=3, wpb=15176.498, bsz=558.078, num_updates=34227, lr=0.000170929, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10162, train_wall=9208 | |
| epoch 004: 7660 / 8862 loss=4.156, nll_loss=2.511, ppl=5.70, wps=51933, ups=3, wpb=15176.611, bsz=558.187, num_updates=34237, lr=0.000170904, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10165, train_wall=9211 | |
| epoch 004: 7670 / 8862 loss=4.156, nll_loss=2.510, ppl=5.70, wps=51934, ups=3, wpb=15176.598, bsz=558.152, num_updates=34247, lr=0.000170879, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10168, train_wall=9214 | |
| epoch 004: 7680 / 8862 loss=4.156, nll_loss=2.510, ppl=5.70, wps=51935, ups=3, wpb=15176.763, bsz=558.136, num_updates=34257, lr=0.000170854, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10170, train_wall=9216 | |
| epoch 004: 7690 / 8862 loss=4.155, nll_loss=2.510, ppl=5.70, wps=51935, ups=3, wpb=15176.623, bsz=558.262, num_updates=34267, lr=0.000170829, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10173, train_wall=9219 | |
| epoch 004: 7700 / 8862 loss=4.155, nll_loss=2.510, ppl=5.70, wps=51935, ups=3, wpb=15176.592, bsz=558.312, num_updates=34277, lr=0.000170804, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10176, train_wall=9222 | |
| epoch 004: 7710 / 8862 loss=4.155, nll_loss=2.510, ppl=5.70, wps=51937, ups=3, wpb=15177.005, bsz=558.365, num_updates=34287, lr=0.000170779, gnorm=0.486, clip=0.000, oom=0.000, loss_scale=4.000, wall=10179, train_wall=9225 | |
| epoch 004: 7720 / 8862 loss=4.155, nll_loss=2.510, ppl=5.70, wps=51937, ups=3, wpb=15176.683, bsz=558.338, num_updates=34297, lr=0.000170754, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10182, train_wall=9227 | |
| epoch 004: 7730 / 8862 loss=4.155, nll_loss=2.510, ppl=5.70, wps=51939, ups=3, wpb=15177.074, bsz=558.370, num_updates=34307, lr=0.00017073, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10185, train_wall=9230 | |
| epoch 004: 7740 / 8862 loss=4.155, nll_loss=2.510, ppl=5.70, wps=51939, ups=3, wpb=15176.919, bsz=558.348, num_updates=34317, lr=0.000170705, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10188, train_wall=9233 | |
| epoch 004: 7750 / 8862 loss=4.155, nll_loss=2.510, ppl=5.70, wps=51940, ups=3, wpb=15177.156, bsz=558.315, num_updates=34327, lr=0.00017068, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10191, train_wall=9235 | |
| epoch 004: 7760 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51942, ups=3, wpb=15177.326, bsz=558.238, num_updates=34337, lr=0.000170655, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10194, train_wall=9238 | |
| epoch 004: 7770 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51944, ups=3, wpb=15177.616, bsz=558.221, num_updates=34347, lr=0.00017063, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10196, train_wall=9241 | |
| epoch 004: 7780 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51946, ups=3, wpb=15178.139, bsz=558.157, num_updates=34357, lr=0.000170605, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10199, train_wall=9243 | |
| epoch 004: 7790 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51948, ups=3, wpb=15178.425, bsz=558.080, num_updates=34367, lr=0.00017058, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10202, train_wall=9246 | |
| epoch 004: 7800 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51948, ups=3, wpb=15178.299, bsz=558.109, num_updates=34377, lr=0.000170556, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10205, train_wall=9249 | |
| epoch 004: 7810 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51948, ups=3, wpb=15178.076, bsz=558.032, num_updates=34387, lr=0.000170531, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10208, train_wall=9251 | |
| epoch 004: 7820 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51949, ups=3, wpb=15178.008, bsz=557.972, num_updates=34397, lr=0.000170506, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10211, train_wall=9254 | |
| epoch 004: 7830 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51948, ups=3, wpb=15177.570, bsz=557.943, num_updates=34407, lr=0.000170481, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10214, train_wall=9257 | |
| epoch 004: 7840 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51950, ups=3, wpb=15177.973, bsz=557.858, num_updates=34417, lr=0.000170456, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10217, train_wall=9259 | |
| epoch 004: 7850 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51952, ups=3, wpb=15178.226, bsz=557.971, num_updates=34427, lr=0.000170432, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10220, train_wall=9262 | |
| epoch 004: 7860 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51954, ups=3, wpb=15178.609, bsz=557.947, num_updates=34437, lr=0.000170407, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10222, train_wall=9265 | |
| epoch 004: 7870 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51953, ups=3, wpb=15178.331, bsz=557.993, num_updates=34447, lr=0.000170382, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10225, train_wall=9267 | |
| epoch 004: 7880 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51951, ups=3, wpb=15178.159, bsz=558.284, num_updates=34457, lr=0.000170358, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10228, train_wall=9270 | |
| epoch 004: 7890 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51952, ups=3, wpb=15178.343, bsz=558.263, num_updates=34467, lr=0.000170333, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10231, train_wall=9273 | |
| epoch 004: 7900 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51953, ups=3, wpb=15178.226, bsz=558.188, num_updates=34477, lr=0.000170308, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10234, train_wall=9275 | |
| epoch 004: 7910 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51954, ups=3, wpb=15178.368, bsz=558.163, num_updates=34487, lr=0.000170283, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10237, train_wall=9278 | |
| epoch 004: 7920 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51955, ups=3, wpb=15178.132, bsz=558.057, num_updates=34497, lr=0.000170259, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10240, train_wall=9281 | |
| epoch 004: 7930 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51955, ups=3, wpb=15177.889, bsz=558.047, num_updates=34507, lr=0.000170234, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10243, train_wall=9283 | |
| epoch 004: 7940 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51956, ups=3, wpb=15177.858, bsz=557.974, num_updates=34517, lr=0.000170209, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10246, train_wall=9286 | |
| epoch 004: 7950 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51958, ups=3, wpb=15178.201, bsz=557.955, num_updates=34527, lr=0.000170185, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10249, train_wall=9289 | |
| epoch 004: 7960 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51959, ups=3, wpb=15178.516, bsz=557.917, num_updates=34537, lr=0.00017016, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10251, train_wall=9291 | |
| epoch 004: 7970 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51959, ups=3, wpb=15178.376, bsz=557.809, num_updates=34547, lr=0.000170135, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10254, train_wall=9294 | |
| epoch 004: 7980 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51960, ups=3, wpb=15178.545, bsz=557.824, num_updates=34557, lr=0.000170111, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10257, train_wall=9297 | |
| epoch 004: 7990 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51960, ups=3, wpb=15178.374, bsz=557.716, num_updates=34567, lr=0.000170086, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10260, train_wall=9299 | |
| epoch 004: 8000 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51959, ups=3, wpb=15178.092, bsz=557.592, num_updates=34577, lr=0.000170062, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10263, train_wall=9302 | |
| epoch 004: 8010 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51960, ups=3, wpb=15178.142, bsz=557.537, num_updates=34587, lr=0.000170037, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10266, train_wall=9305 | |
| epoch 004: 8020 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51961, ups=3, wpb=15178.153, bsz=557.500, num_updates=34597, lr=0.000170012, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10269, train_wall=9307 | |
| epoch 004: 8030 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51963, ups=3, wpb=15178.523, bsz=557.496, num_updates=34607, lr=0.000169988, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10272, train_wall=9310 | |
| epoch 004: 8040 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51962, ups=3, wpb=15178.372, bsz=557.613, num_updates=34617, lr=0.000169963, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10275, train_wall=9313 | |
| epoch 004: 8050 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51963, ups=3, wpb=15178.346, bsz=557.537, num_updates=34627, lr=0.000169939, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10278, train_wall=9316 | |
| epoch 004: 8060 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51964, ups=3, wpb=15178.619, bsz=557.521, num_updates=34637, lr=0.000169914, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10280, train_wall=9318 | |
| epoch 004: 8070 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51963, ups=3, wpb=15177.987, bsz=557.425, num_updates=34647, lr=0.00016989, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10283, train_wall=9321 | |
| epoch 004: 8080 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51963, ups=3, wpb=15178.118, bsz=557.443, num_updates=34657, lr=0.000169865, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10286, train_wall=9324 | |
| epoch 004: 8090 / 8862 loss=4.155, nll_loss=2.510, ppl=5.69, wps=51962, ups=3, wpb=15177.853, bsz=557.458, num_updates=34667, lr=0.000169841, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10289, train_wall=9326 | |
| epoch 004: 8100 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51964, ups=3, wpb=15178.298, bsz=557.436, num_updates=34677, lr=0.000169816, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10292, train_wall=9329 | |
| epoch 004: 8110 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51963, ups=3, wpb=15178.106, bsz=557.378, num_updates=34687, lr=0.000169792, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10295, train_wall=9332 | |
| epoch 004: 8120 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51964, ups=3, wpb=15178.202, bsz=557.409, num_updates=34697, lr=0.000169767, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10298, train_wall=9334 | |
| epoch 004: 8130 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51963, ups=3, wpb=15178.052, bsz=557.380, num_updates=34707, lr=0.000169743, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10301, train_wall=9337 | |
| epoch 004: 8140 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51964, ups=3, wpb=15178.238, bsz=557.288, num_updates=34717, lr=0.000169718, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10304, train_wall=9340 | |
| epoch 004: 8150 / 8862 loss=4.155, nll_loss=2.509, ppl=5.69, wps=51964, ups=3, wpb=15178.452, bsz=557.185, num_updates=34727, lr=0.000169694, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10307, train_wall=9342 | |
| epoch 004: 8160 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51963, ups=3, wpb=15178.356, bsz=557.259, num_updates=34737, lr=0.00016967, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10310, train_wall=9345 | |
| epoch 004: 8170 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51963, ups=3, wpb=15178.211, bsz=557.133, num_updates=34747, lr=0.000169645, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10313, train_wall=9348 | |
| epoch 004: 8180 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51962, ups=3, wpb=15178.106, bsz=557.171, num_updates=34757, lr=0.000169621, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10315, train_wall=9351 | |
| epoch 004: 8190 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51961, ups=3, wpb=15177.909, bsz=557.195, num_updates=34767, lr=0.000169596, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10318, train_wall=9353 | |
| epoch 004: 8200 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51961, ups=3, wpb=15178.002, bsz=557.214, num_updates=34777, lr=0.000169572, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10321, train_wall=9356 | |
| epoch 004: 8210 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51961, ups=3, wpb=15177.866, bsz=557.100, num_updates=34787, lr=0.000169548, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10324, train_wall=9359 | |
| epoch 004: 8220 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51962, ups=3, wpb=15178.025, bsz=557.120, num_updates=34797, lr=0.000169523, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10327, train_wall=9361 | |
| epoch 004: 8230 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51962, ups=3, wpb=15178.358, bsz=557.129, num_updates=34807, lr=0.000169499, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10330, train_wall=9364 | |
| epoch 004: 8240 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51960, ups=3, wpb=15178.150, bsz=557.220, num_updates=34817, lr=0.000169474, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10333, train_wall=9367 | |
| epoch 004: 8250 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51959, ups=3, wpb=15177.962, bsz=557.161, num_updates=34827, lr=0.00016945, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10336, train_wall=9370 | |
| epoch 004: 8260 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51958, ups=3, wpb=15177.877, bsz=557.181, num_updates=34837, lr=0.000169426, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10339, train_wall=9372 | |
| epoch 004: 8270 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51958, ups=3, wpb=15178.065, bsz=557.182, num_updates=34847, lr=0.000169402, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10342, train_wall=9375 | |
| epoch 004: 8280 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51958, ups=3, wpb=15178.018, bsz=557.233, num_updates=34857, lr=0.000169377, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10345, train_wall=9378 | |
| epoch 004: 8290 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51958, ups=3, wpb=15178.318, bsz=557.444, num_updates=34867, lr=0.000169353, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10348, train_wall=9381 | |
| epoch 004: 8300 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51958, ups=3, wpb=15178.692, bsz=557.497, num_updates=34877, lr=0.000169329, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10351, train_wall=9383 | |
| epoch 004: 8310 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51956, ups=3, wpb=15178.206, bsz=557.541, num_updates=34887, lr=0.000169304, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10354, train_wall=9386 | |
| epoch 004: 8320 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51957, ups=3, wpb=15178.251, bsz=557.502, num_updates=34897, lr=0.00016928, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10357, train_wall=9389 | |
| epoch 004: 8330 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51957, ups=3, wpb=15178.249, bsz=557.558, num_updates=34907, lr=0.000169256, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10360, train_wall=9391 | |
| epoch 004: 8340 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51955, ups=3, wpb=15178.201, bsz=557.531, num_updates=34917, lr=0.000169232, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10363, train_wall=9394 | |
| epoch 004: 8350 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51956, ups=3, wpb=15178.539, bsz=557.549, num_updates=34927, lr=0.000169207, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10366, train_wall=9397 | |
| epoch 004: 8360 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51955, ups=3, wpb=15178.577, bsz=557.475, num_updates=34937, lr=0.000169183, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10368, train_wall=9400 | |
| epoch 004: 8370 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51954, ups=3, wpb=15178.509, bsz=557.526, num_updates=34947, lr=0.000169159, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10371, train_wall=9402 | |
| epoch 004: 8380 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51955, ups=3, wpb=15179.011, bsz=557.726, num_updates=34957, lr=0.000169135, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10374, train_wall=9405 | |
| epoch 004: 8390 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51954, ups=3, wpb=15178.878, bsz=557.634, num_updates=34967, lr=0.000169111, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10377, train_wall=9408 | |
| epoch 004: 8400 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51954, ups=3, wpb=15178.729, bsz=557.544, num_updates=34977, lr=0.000169086, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10380, train_wall=9411 | |
| epoch 004: 8410 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51954, ups=3, wpb=15178.908, bsz=557.514, num_updates=34987, lr=0.000169062, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10383, train_wall=9413 | |
| epoch 004: 8420 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51954, ups=3, wpb=15178.762, bsz=557.477, num_updates=34997, lr=0.000169038, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10386, train_wall=9416 | |
| epoch 004: 8430 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51953, ups=3, wpb=15178.867, bsz=557.505, num_updates=35007, lr=0.000169014, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10389, train_wall=9419 | |
| epoch 004: 8440 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51953, ups=3, wpb=15178.727, bsz=557.457, num_updates=35017, lr=0.00016899, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10392, train_wall=9421 | |
| epoch 004: 8450 / 8862 loss=4.154, nll_loss=2.509, ppl=5.69, wps=51953, ups=3, wpb=15179.032, bsz=557.454, num_updates=35027, lr=0.000168966, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10395, train_wall=9424 | |
| epoch 004: 8460 / 8862 loss=4.154, nll_loss=2.508, ppl=5.69, wps=51951, ups=3, wpb=15178.784, bsz=557.616, num_updates=35037, lr=0.000168942, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10398, train_wall=9427 | |
| epoch 004: 8470 / 8862 loss=4.154, nll_loss=2.508, ppl=5.69, wps=51952, ups=3, wpb=15179.051, bsz=557.734, num_updates=35047, lr=0.000168917, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10401, train_wall=9430 | |
| epoch 004: 8480 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51950, ups=3, wpb=15179.035, bsz=557.817, num_updates=35057, lr=0.000168893, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10404, train_wall=9432 | |
| epoch 004: 8490 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51950, ups=3, wpb=15179.116, bsz=557.865, num_updates=35067, lr=0.000168869, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10407, train_wall=9435 | |
| epoch 004: 8500 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51950, ups=3, wpb=15179.149, bsz=557.731, num_updates=35077, lr=0.000168845, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10410, train_wall=9438 | |
| epoch 004: 8510 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51949, ups=3, wpb=15178.969, bsz=557.709, num_updates=35087, lr=0.000168821, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10413, train_wall=9441 | |
| epoch 004: 8520 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51948, ups=3, wpb=15178.741, bsz=557.808, num_updates=35097, lr=0.000168797, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10416, train_wall=9443 | |
| epoch 004: 8530 / 8862 loss=4.154, nll_loss=2.508, ppl=5.69, wps=51947, ups=3, wpb=15178.703, bsz=557.855, num_updates=35107, lr=0.000168773, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10419, train_wall=9446 | |
| epoch 004: 8540 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51947, ups=3, wpb=15178.891, bsz=557.763, num_updates=35117, lr=0.000168749, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10421, train_wall=9449 | |
| epoch 004: 8550 / 8862 loss=4.154, nll_loss=2.508, ppl=5.69, wps=51946, ups=3, wpb=15178.752, bsz=557.797, num_updates=35127, lr=0.000168725, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10424, train_wall=9451 | |
| epoch 004: 8560 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51945, ups=3, wpb=15178.999, bsz=558.097, num_updates=35137, lr=0.000168701, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10427, train_wall=9454 | |
| epoch 004: 8570 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51945, ups=3, wpb=15179.041, bsz=558.105, num_updates=35147, lr=0.000168677, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10430, train_wall=9457 | |
| epoch 004: 8580 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51944, ups=3, wpb=15178.708, bsz=558.085, num_updates=35157, lr=0.000168653, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10433, train_wall=9460 | |
| epoch 004: 8590 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51942, ups=3, wpb=15178.449, bsz=558.137, num_updates=35167, lr=0.000168629, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10436, train_wall=9462 | |
| epoch 004: 8600 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51942, ups=3, wpb=15178.387, bsz=558.092, num_updates=35177, lr=0.000168605, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10439, train_wall=9465 | |
| epoch 004: 8610 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51943, ups=3, wpb=15178.778, bsz=558.208, num_updates=35187, lr=0.000168581, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=4.000, wall=10442, train_wall=9468 | |
| epoch 004: 8620 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51941, ups=3, wpb=15178.671, bsz=558.266, num_updates=35197, lr=0.000168557, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10445, train_wall=9471 | |
| epoch 004: 8630 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51941, ups=3, wpb=15178.497, bsz=558.180, num_updates=35207, lr=0.000168533, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10448, train_wall=9473 | |
| epoch 004: 8640 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51941, ups=3, wpb=15178.389, bsz=558.228, num_updates=35217, lr=0.000168509, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10451, train_wall=9476 | |
| epoch 004: 8650 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51941, ups=3, wpb=15178.421, bsz=558.282, num_updates=35227, lr=0.000168485, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10454, train_wall=9479 | |
| epoch 004: 8660 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51939, ups=3, wpb=15178.015, bsz=558.257, num_updates=35237, lr=0.000168461, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10457, train_wall=9481 | |
| epoch 004: 8670 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51941, ups=3, wpb=15178.314, bsz=558.216, num_updates=35247, lr=0.000168438, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10460, train_wall=9484 | |
| epoch 004: 8680 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51939, ups=3, wpb=15178.493, bsz=558.451, num_updates=35257, lr=0.000168414, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10463, train_wall=9487 | |
| epoch 004: 8690 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51940, ups=3, wpb=15178.657, bsz=558.392, num_updates=35267, lr=0.00016839, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10466, train_wall=9490 | |
| epoch 004: 8700 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51939, ups=3, wpb=15178.438, bsz=558.438, num_updates=35277, lr=0.000168366, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10469, train_wall=9492 | |
| epoch 004: 8710 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51938, ups=3, wpb=15178.162, bsz=558.451, num_updates=35287, lr=0.000168342, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10472, train_wall=9495 | |
| epoch 004: 8720 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51939, ups=3, wpb=15178.517, bsz=558.429, num_updates=35297, lr=0.000168318, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10474, train_wall=9498 | |
| epoch 004: 8730 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51938, ups=3, wpb=15178.502, bsz=558.363, num_updates=35307, lr=0.000168294, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10477, train_wall=9500 | |
| epoch 004: 8740 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51939, ups=3, wpb=15178.841, bsz=558.303, num_updates=35317, lr=0.000168271, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10480, train_wall=9503 | |
| epoch 004: 8750 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51938, ups=3, wpb=15178.683, bsz=558.230, num_updates=35327, lr=0.000168247, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10483, train_wall=9506 | |
| epoch 004: 8760 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51937, ups=3, wpb=15178.373, bsz=558.192, num_updates=35337, lr=0.000168223, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10486, train_wall=9509 | |
| epoch 004: 8770 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51936, ups=3, wpb=15178.154, bsz=558.147, num_updates=35347, lr=0.000168199, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10489, train_wall=9511 | |
| epoch 004: 8780 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51935, ups=3, wpb=15177.758, bsz=558.027, num_updates=35357, lr=0.000168175, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10492, train_wall=9514 | |
| epoch 004: 8790 / 8862 loss=4.153, nll_loss=2.508, ppl=5.69, wps=51934, ups=3, wpb=15177.296, bsz=557.960, num_updates=35367, lr=0.000168152, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10495, train_wall=9517 | |
| epoch 004: 8800 / 8862 loss=4.153, nll_loss=2.507, ppl=5.69, wps=51935, ups=3, wpb=15177.562, bsz=558.034, num_updates=35377, lr=0.000168128, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10498, train_wall=9519 | |
| epoch 004: 8810 / 8862 loss=4.153, nll_loss=2.507, ppl=5.68, wps=51935, ups=3, wpb=15177.791, bsz=558.039, num_updates=35387, lr=0.000168104, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10501, train_wall=9522 | |
| epoch 004: 8820 / 8862 loss=4.153, nll_loss=2.507, ppl=5.68, wps=51934, ups=3, wpb=15177.634, bsz=557.990, num_updates=35397, lr=0.00016808, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10504, train_wall=9525 | |
| epoch 004: 8830 / 8862 loss=4.153, nll_loss=2.507, ppl=5.68, wps=51935, ups=3, wpb=15177.795, bsz=557.962, num_updates=35407, lr=0.000168057, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10507, train_wall=9528 | |
| epoch 004: 8840 / 8862 loss=4.152, nll_loss=2.507, ppl=5.68, wps=51935, ups=3, wpb=15177.922, bsz=558.000, num_updates=35417, lr=0.000168033, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10510, train_wall=9530 | |
| epoch 004: 8850 / 8862 loss=4.152, nll_loss=2.507, ppl=5.68, wps=51934, ups=3, wpb=15177.990, bsz=558.094, num_updates=35427, lr=0.000168009, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10513, train_wall=9533 | |
| epoch 004: 8860 / 8862 loss=4.152, nll_loss=2.507, ppl=5.68, wps=51934, ups=3, wpb=15178.140, bsz=558.050, num_updates=35437, lr=0.000167985, gnorm=0.485, clip=0.000, oom=0.000, loss_scale=8.000, wall=10516, train_wall=9536 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 004 | loss 4.152 | nll_loss 2.507 | ppl 5.68 | wps 51928 | ups 3 | wpb 15177.765 | bsz 558.031 | num_updates 35438 | lr 0.000167983 | gnorm 0.485 | clip 0.000 | oom 0.000 | loss_scale 8.000 | wall 10516 | train_wall 9536 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| WARNING: 2459 samples have invalid sizes and will be skipped, max_positions=(64, 64), first few sample ids=[34935, 29199, 25522, 50610, 31640, 50522, 29514, 23772, 21318, 30173] | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 004 | valid on 'valid' subset | loss 4.075 | nll_loss 2.316 | ppl 4.98 | num_updates 35438 | best_loss 4.07532 | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
/opt/anaconda3/lib/python3.7/multiprocessing/semaphore_tracker.py:144: UserWarning: semaphore_tracker: There appear to be 1 leaked semaphores to clean up at shutdown | |
len(cache)) | |
| epoch 005: 10 / 8862 loss=4.112, nll_loss=2.463, ppl=5.51, wps=51541, ups=0, wpb=15234.909, bsz=506.182, num_updates=35449, lr=0.000167957, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10565, train_wall=9541 | |
| epoch 005: 20 / 8862 loss=4.078, nll_loss=2.425, ppl=5.37, wps=51180, ups=0, wpb=15145.143, bsz=572.952, num_updates=35459, lr=0.000167933, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=10568, train_wall=9544 | |
| epoch 005: 30 / 8862 loss=4.063, nll_loss=2.408, ppl=5.31, wps=51541, ups=1, wpb=15185.290, bsz=570.839, num_updates=35469, lr=0.00016791, gnorm=0.470, clip=0.000, oom=0.000, loss_scale=8.000, wall=10571, train_wall=9546 | |
| epoch 005: 40 / 8862 loss=4.053, nll_loss=2.395, ppl=5.26, wps=51563, ups=1, wpb=15122.829, bsz=573.854, num_updates=35479, lr=0.000167886, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10573, train_wall=9549 | |
| epoch 005: 50 / 8862 loss=4.060, nll_loss=2.404, ppl=5.29, wps=51784, ups=1, wpb=15140.157, bsz=559.686, num_updates=35489, lr=0.000167862, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10576, train_wall=9552 | |
| epoch 005: 60 / 8862 loss=4.050, nll_loss=2.392, ppl=5.25, wps=51682, ups=1, wpb=15124.115, bsz=566.820, num_updates=35499, lr=0.000167839, gnorm=0.470, clip=0.000, oom=0.000, loss_scale=8.000, wall=10579, train_wall=9554 | |
| epoch 005: 70 / 8862 loss=4.057, nll_loss=2.401, ppl=5.28, wps=51698, ups=1, wpb=15102.845, bsz=567.662, num_updates=35509, lr=0.000167815, gnorm=0.469, clip=0.000, oom=0.000, loss_scale=8.000, wall=10582, train_wall=9557 | |
| epoch 005: 80 / 8862 loss=4.056, nll_loss=2.399, ppl=5.28, wps=51731, ups=1, wpb=15096.864, bsz=570.173, num_updates=35519, lr=0.000167791, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10585, train_wall=9560 | |
| epoch 005: 90 / 8862 loss=4.049, nll_loss=2.391, ppl=5.25, wps=51824, ups=1, wpb=15127.692, bsz=575.385, num_updates=35529, lr=0.000167768, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=10588, train_wall=9562 | |
| epoch 005: 100 / 8862 loss=4.039, nll_loss=2.380, ppl=5.20, wps=51922, ups=1, wpb=15143.307, bsz=579.960, num_updates=35539, lr=0.000167744, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=10591, train_wall=9565 | |
| epoch 005: 110 / 8862 loss=4.038, nll_loss=2.379, ppl=5.20, wps=51944, ups=1, wpb=15149.640, bsz=578.595, num_updates=35549, lr=0.000167721, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10594, train_wall=9568 | |
| epoch 005: 120 / 8862 loss=4.041, nll_loss=2.383, ppl=5.22, wps=51905, ups=2, wpb=15135.628, bsz=583.603, num_updates=35559, lr=0.000167697, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=10597, train_wall=9570 | |
| epoch 005: 130 / 8862 loss=4.046, nll_loss=2.388, ppl=5.24, wps=51928, ups=2, wpb=15131.588, bsz=580.885, num_updates=35569, lr=0.000167673, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=10600, train_wall=9573 | |
| epoch 005: 140 / 8862 loss=4.038, nll_loss=2.379, ppl=5.20, wps=52008, ups=2, wpb=15151.532, bsz=578.837, num_updates=35579, lr=0.00016765, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10602, train_wall=9576 | |
| epoch 005: 150 / 8862 loss=4.041, nll_loss=2.382, ppl=5.21, wps=51997, ups=2, wpb=15138.020, bsz=577.219, num_updates=35589, lr=0.000167626, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10605, train_wall=9579 | |
| epoch 005: 160 / 8862 loss=4.040, nll_loss=2.382, ppl=5.21, wps=51964, ups=2, wpb=15126.615, bsz=579.876, num_updates=35599, lr=0.000167603, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10608, train_wall=9581 | |
| epoch 005: 170 / 8862 loss=4.044, nll_loss=2.386, ppl=5.23, wps=51968, ups=2, wpb=15129.105, bsz=576.094, num_updates=35609, lr=0.000167579, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10611, train_wall=9584 | |
| epoch 005: 180 / 8862 loss=4.039, nll_loss=2.381, ppl=5.21, wps=51952, ups=2, wpb=15126.260, bsz=582.099, num_updates=35619, lr=0.000167556, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10614, train_wall=9587 | |
| epoch 005: 190 / 8862 loss=4.037, nll_loss=2.378, ppl=5.20, wps=51990, ups=2, wpb=15138.152, bsz=586.262, num_updates=35629, lr=0.000167532, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10617, train_wall=9589 | |
| epoch 005: 200 / 8862 loss=4.038, nll_loss=2.379, ppl=5.20, wps=52026, ups=2, wpb=15149.348, bsz=583.522, num_updates=35639, lr=0.000167509, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10620, train_wall=9592 | |
| epoch 005: 210 / 8862 loss=4.036, nll_loss=2.377, ppl=5.19, wps=52106, ups=2, wpb=15169.493, bsz=582.749, num_updates=35649, lr=0.000167485, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10623, train_wall=9595 | |
| epoch 005: 220 / 8862 loss=4.040, nll_loss=2.381, ppl=5.21, wps=52103, ups=2, wpb=15167.190, bsz=584.724, num_updates=35659, lr=0.000167462, gnorm=0.471, clip=0.000, oom=0.000, loss_scale=8.000, wall=10626, train_wall=9597 | |
| epoch 005: 230 / 8862 loss=4.042, nll_loss=2.384, ppl=5.22, wps=52114, ups=2, wpb=15170.294, bsz=582.130, num_updates=35669, lr=0.000167438, gnorm=0.471, clip=0.000, oom=0.000, loss_scale=8.000, wall=10629, train_wall=9600 | |
| epoch 005: 240 / 8862 loss=4.045, nll_loss=2.387, ppl=5.23, wps=52157, ups=2, wpb=15178.004, bsz=577.593, num_updates=35679, lr=0.000167415, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10632, train_wall=9603 | |
| epoch 005: 250 / 8862 loss=4.052, nll_loss=2.395, ppl=5.26, wps=52103, ups=2, wpb=15157.251, bsz=575.713, num_updates=35689, lr=0.000167391, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10634, train_wall=9605 | |
| epoch 005: 260 / 8862 loss=4.054, nll_loss=2.397, ppl=5.27, wps=52096, ups=2, wpb=15151.682, bsz=573.333, num_updates=35699, lr=0.000167368, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10637, train_wall=9608 | |
| epoch 005: 270 / 8862 loss=4.053, nll_loss=2.396, ppl=5.26, wps=52142, ups=2, wpb=15166.399, bsz=575.646, num_updates=35709, lr=0.000167344, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10640, train_wall=9611 | |
| epoch 005: 280 / 8862 loss=4.052, nll_loss=2.395, ppl=5.26, wps=52162, ups=2, wpb=15166.676, bsz=575.260, num_updates=35719, lr=0.000167321, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10643, train_wall=9613 | |
| epoch 005: 290 / 8862 loss=4.055, nll_loss=2.398, ppl=5.27, wps=52147, ups=2, wpb=15167.773, bsz=577.924, num_updates=35729, lr=0.000167298, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10646, train_wall=9616 | |
| epoch 005: 300 / 8862 loss=4.056, nll_loss=2.400, ppl=5.28, wps=52146, ups=2, wpb=15168.488, bsz=578.870, num_updates=35739, lr=0.000167274, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10649, train_wall=9619 | |
| epoch 005: 310 / 8862 loss=4.054, nll_loss=2.397, ppl=5.27, wps=52163, ups=2, wpb=15172.871, bsz=578.367, num_updates=35749, lr=0.000167251, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10652, train_wall=9621 | |
| epoch 005: 320 / 8862 loss=4.053, nll_loss=2.396, ppl=5.26, wps=52176, ups=2, wpb=15177.844, bsz=576.723, num_updates=35759, lr=0.000167227, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10655, train_wall=9624 | |
| epoch 005: 330 / 8862 loss=4.055, nll_loss=2.398, ppl=5.27, wps=52188, ups=2, wpb=15179.411, bsz=575.807, num_updates=35769, lr=0.000167204, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10658, train_wall=9627 | |
| epoch 005: 340 / 8862 loss=4.050, nll_loss=2.392, ppl=5.25, wps=52215, ups=2, wpb=15187.499, bsz=578.440, num_updates=35779, lr=0.000167181, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10661, train_wall=9630 | |
| epoch 005: 350 / 8862 loss=4.050, nll_loss=2.393, ppl=5.25, wps=52226, ups=2, wpb=15190.228, bsz=576.684, num_updates=35789, lr=0.000167157, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10664, train_wall=9632 | |
| epoch 005: 360 / 8862 loss=4.050, nll_loss=2.392, ppl=5.25, wps=52228, ups=2, wpb=15188.488, bsz=577.152, num_updates=35799, lr=0.000167134, gnorm=0.471, clip=0.000, oom=0.000, loss_scale=8.000, wall=10666, train_wall=9635 | |
| epoch 005: 370 / 8862 loss=4.050, nll_loss=2.393, ppl=5.25, wps=52257, ups=2, wpb=15194.340, bsz=574.146, num_updates=35809, lr=0.000167111, gnorm=0.471, clip=0.000, oom=0.000, loss_scale=8.000, wall=10669, train_wall=9638 | |
| epoch 005: 380 / 8862 loss=4.053, nll_loss=2.395, ppl=5.26, wps=52245, ups=2, wpb=15184.890, bsz=572.808, num_updates=35819, lr=0.000167087, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10672, train_wall=9640 | |
| epoch 005: 390 / 8862 loss=4.055, nll_loss=2.398, ppl=5.27, wps=52218, ups=2, wpb=15175.529, bsz=572.706, num_updates=35829, lr=0.000167064, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10675, train_wall=9643 | |
| epoch 005: 400 / 8862 loss=4.054, nll_loss=2.397, ppl=5.27, wps=52226, ups=2, wpb=15177.743, bsz=573.047, num_updates=35839, lr=0.000167041, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10678, train_wall=9646 | |
| epoch 005: 410 / 8862 loss=4.056, nll_loss=2.399, ppl=5.27, wps=52220, ups=2, wpb=15172.518, bsz=570.978, num_updates=35849, lr=0.000167017, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10681, train_wall=9648 | |
| epoch 005: 420 / 8862 loss=4.056, nll_loss=2.399, ppl=5.28, wps=52231, ups=3, wpb=15174.425, bsz=569.558, num_updates=35859, lr=0.000166994, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10684, train_wall=9651 | |
| epoch 005: 430 / 8862 loss=4.054, nll_loss=2.397, ppl=5.27, wps=52240, ups=3, wpb=15179.000, bsz=568.501, num_updates=35869, lr=0.000166971, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10687, train_wall=9654 | |
| epoch 005: 440 / 8862 loss=4.055, nll_loss=2.398, ppl=5.27, wps=52235, ups=3, wpb=15178.787, bsz=568.834, num_updates=35879, lr=0.000166947, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10690, train_wall=9656 | |
| epoch 005: 450 / 8862 loss=4.057, nll_loss=2.401, ppl=5.28, wps=52240, ups=3, wpb=15177.508, bsz=567.344, num_updates=35889, lr=0.000166924, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10692, train_wall=9659 | |
| epoch 005: 460 / 8862 loss=4.058, nll_loss=2.401, ppl=5.28, wps=52253, ups=3, wpb=15184.176, bsz=566.941, num_updates=35899, lr=0.000166901, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10695, train_wall=9662 | |
| epoch 005: 470 / 8862 loss=4.061, nll_loss=2.405, ppl=5.30, wps=52271, ups=3, wpb=15186.907, bsz=565.265, num_updates=35909, lr=0.000166878, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10698, train_wall=9664 | |
| epoch 005: 480 / 8862 loss=4.062, nll_loss=2.406, ppl=5.30, wps=52282, ups=3, wpb=15189.056, bsz=565.738, num_updates=35919, lr=0.000166854, gnorm=0.472, clip=0.000, oom=0.000, loss_scale=8.000, wall=10701, train_wall=9667 | |
| epoch 005: 490 / 8862 loss=4.063, nll_loss=2.408, ppl=5.31, wps=52266, ups=3, wpb=15183.898, bsz=564.627, num_updates=35929, lr=0.000166831, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10704, train_wall=9670 | |
| epoch 005: 500 / 8862 loss=4.064, nll_loss=2.409, ppl=5.31, wps=52254, ups=3, wpb=15180.707, bsz=564.024, num_updates=35939, lr=0.000166808, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10707, train_wall=9672 | |
| epoch 005: 510 / 8862 loss=4.067, nll_loss=2.412, ppl=5.32, wps=52226, ups=3, wpb=15169.703, bsz=563.100, num_updates=35949, lr=0.000166785, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10710, train_wall=9675 | |
| epoch 005: 520 / 8862 loss=4.067, nll_loss=2.412, ppl=5.32, wps=52246, ups=3, wpb=15174.583, bsz=562.702, num_updates=35959, lr=0.000166762, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10713, train_wall=9678 | |
| epoch 005: 530 / 8862 loss=4.070, nll_loss=2.415, ppl=5.33, wps=52232, ups=3, wpb=15170.113, bsz=562.275, num_updates=35969, lr=0.000166738, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10716, train_wall=9680 | |
| epoch 005: 540 / 8862 loss=4.070, nll_loss=2.416, ppl=5.34, wps=52239, ups=3, wpb=15171.723, bsz=561.878, num_updates=35979, lr=0.000166715, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10719, train_wall=9683 | |
| epoch 005: 550 / 8862 loss=4.069, nll_loss=2.414, ppl=5.33, wps=52225, ups=3, wpb=15177.624, bsz=565.633, num_updates=35989, lr=0.000166692, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10722, train_wall=9686 | |
| epoch 005: 560 / 8862 loss=4.068, nll_loss=2.413, ppl=5.32, wps=52199, ups=3, wpb=15177.971, bsz=567.330, num_updates=35999, lr=0.000166669, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10725, train_wall=9689 | |
| epoch 005: 570 / 8862 loss=4.066, nll_loss=2.411, ppl=5.32, wps=52214, ups=3, wpb=15183.923, bsz=567.426, num_updates=36009, lr=0.000166646, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10727, train_wall=9691 | |
| epoch 005: 580 / 8862 loss=4.067, nll_loss=2.411, ppl=5.32, wps=52183, ups=3, wpb=15181.019, bsz=567.711, num_updates=36019, lr=0.000166623, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10730, train_wall=9694 | |
| epoch 005: 590 / 8862 loss=4.068, nll_loss=2.413, ppl=5.33, wps=52175, ups=3, wpb=15177.739, bsz=566.863, num_updates=36029, lr=0.0001666, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10733, train_wall=9697 | |
| epoch 005: 600 / 8862 loss=4.070, nll_loss=2.415, ppl=5.33, wps=52143, ups=3, wpb=15172.336, bsz=566.043, num_updates=36039, lr=0.000166576, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10736, train_wall=9700 | |
| epoch 005: 610 / 8862 loss=4.072, nll_loss=2.417, ppl=5.34, wps=52141, ups=3, wpb=15171.205, bsz=565.015, num_updates=36049, lr=0.000166553, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10739, train_wall=9702 | |
| epoch 005: 620 / 8862 loss=4.072, nll_loss=2.418, ppl=5.34, wps=52129, ups=3, wpb=15172.699, bsz=563.684, num_updates=36059, lr=0.00016653, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10742, train_wall=9705 | |
| epoch 005: 630 / 8862 loss=4.072, nll_loss=2.417, ppl=5.34, wps=52123, ups=3, wpb=15173.537, bsz=563.208, num_updates=36069, lr=0.000166507, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10745, train_wall=9708 | |
| epoch 005: 640 / 8862 loss=4.071, nll_loss=2.416, ppl=5.34, wps=52140, ups=3, wpb=15178.356, bsz=562.396, num_updates=36079, lr=0.000166484, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10748, train_wall=9710 | |
| epoch 005: 650 / 8862 loss=4.073, nll_loss=2.418, ppl=5.35, wps=52140, ups=3, wpb=15177.897, bsz=560.627, num_updates=36089, lr=0.000166461, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10751, train_wall=9713 | |
| epoch 005: 660 / 8862 loss=4.073, nll_loss=2.418, ppl=5.35, wps=52144, ups=3, wpb=15178.596, bsz=559.770, num_updates=36099, lr=0.000166438, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10754, train_wall=9716 | |
| epoch 005: 670 / 8862 loss=4.072, nll_loss=2.418, ppl=5.34, wps=52146, ups=3, wpb=15177.526, bsz=560.083, num_updates=36109, lr=0.000166415, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10757, train_wall=9718 | |
| epoch 005: 680 / 8862 loss=4.075, nll_loss=2.421, ppl=5.35, wps=52133, ups=3, wpb=15174.476, bsz=560.599, num_updates=36119, lr=0.000166392, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10760, train_wall=9721 | |
| epoch 005: 690 / 8862 loss=4.077, nll_loss=2.422, ppl=5.36, wps=52131, ups=3, wpb=15172.990, bsz=559.861, num_updates=36129, lr=0.000166369, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10763, train_wall=9724 | |
| epoch 005: 700 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52127, ups=3, wpb=15171.252, bsz=559.989, num_updates=36139, lr=0.000166346, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10765, train_wall=9726 | |
| epoch 005: 710 / 8862 loss=4.078, nll_loss=2.424, ppl=5.37, wps=52116, ups=3, wpb=15167.550, bsz=559.269, num_updates=36149, lr=0.000166323, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10768, train_wall=9729 | |
| epoch 005: 720 / 8862 loss=4.075, nll_loss=2.421, ppl=5.35, wps=52126, ups=3, wpb=15171.420, bsz=559.789, num_updates=36159, lr=0.0001663, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10771, train_wall=9732 | |
| epoch 005: 730 / 8862 loss=4.076, nll_loss=2.421, ppl=5.36, wps=52126, ups=3, wpb=15170.008, bsz=559.015, num_updates=36169, lr=0.000166277, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10774, train_wall=9735 | |
| epoch 005: 740 / 8862 loss=4.075, nll_loss=2.420, ppl=5.35, wps=52125, ups=3, wpb=15171.024, bsz=560.345, num_updates=36179, lr=0.000166254, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10777, train_wall=9737 | |
| epoch 005: 750 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52136, ups=3, wpb=15173.393, bsz=559.947, num_updates=36189, lr=0.000166231, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10780, train_wall=9740 | |
| epoch 005: 760 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52139, ups=3, wpb=15174.556, bsz=559.432, num_updates=36199, lr=0.000166208, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10783, train_wall=9743 | |
| epoch 005: 770 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52133, ups=3, wpb=15173.175, bsz=559.720, num_updates=36209, lr=0.000166185, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10786, train_wall=9745 | |
| epoch 005: 780 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52134, ups=3, wpb=15172.709, bsz=559.805, num_updates=36219, lr=0.000166162, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10789, train_wall=9748 | |
| epoch 005: 790 / 8862 loss=4.078, nll_loss=2.424, ppl=5.36, wps=52142, ups=3, wpb=15174.229, bsz=560.536, num_updates=36229, lr=0.000166139, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10792, train_wall=9751 | |
| epoch 005: 800 / 8862 loss=4.078, nll_loss=2.424, ppl=5.37, wps=52142, ups=3, wpb=15173.554, bsz=560.250, num_updates=36239, lr=0.000166116, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10795, train_wall=9753 | |
| epoch 005: 810 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52163, ups=3, wpb=15179.424, bsz=560.306, num_updates=36249, lr=0.000166093, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10797, train_wall=9756 | |
| epoch 005: 820 / 8862 loss=4.078, nll_loss=2.424, ppl=5.37, wps=52163, ups=3, wpb=15180.552, bsz=560.994, num_updates=36259, lr=0.00016607, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10800, train_wall=9759 | |
| epoch 005: 830 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52165, ups=3, wpb=15180.704, bsz=562.002, num_updates=36269, lr=0.000166047, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10803, train_wall=9761 | |
| epoch 005: 840 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52171, ups=3, wpb=15181.564, bsz=562.026, num_updates=36279, lr=0.000166025, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10806, train_wall=9764 | |
| epoch 005: 850 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52182, ups=3, wpb=15185.603, bsz=561.730, num_updates=36289, lr=0.000166002, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10809, train_wall=9767 | |
| epoch 005: 860 / 8862 loss=4.078, nll_loss=2.425, ppl=5.37, wps=52183, ups=3, wpb=15184.796, bsz=561.134, num_updates=36299, lr=0.000165979, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10812, train_wall=9769 | |
| epoch 005: 870 / 8862 loss=4.078, nll_loss=2.424, ppl=5.37, wps=52193, ups=3, wpb=15188.194, bsz=561.800, num_updates=36309, lr=0.000165956, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10815, train_wall=9772 | |
| epoch 005: 880 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52198, ups=3, wpb=15187.826, bsz=561.943, num_updates=36319, lr=0.000165933, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10818, train_wall=9775 | |
| epoch 005: 890 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52198, ups=3, wpb=15188.464, bsz=561.526, num_updates=36329, lr=0.00016591, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10821, train_wall=9778 | |
| epoch 005: 900 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52189, ups=3, wpb=15185.129, bsz=562.362, num_updates=36339, lr=0.000165887, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10824, train_wall=9780 | |
| epoch 005: 910 / 8862 loss=4.077, nll_loss=2.424, ppl=5.36, wps=52182, ups=3, wpb=15183.341, bsz=562.353, num_updates=36349, lr=0.000165865, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10826, train_wall=9783 | |
| epoch 005: 920 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52180, ups=3, wpb=15184.374, bsz=564.117, num_updates=36359, lr=0.000165842, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10829, train_wall=9786 | |
| epoch 005: 930 / 8862 loss=4.075, nll_loss=2.421, ppl=5.35, wps=52182, ups=3, wpb=15184.679, bsz=564.614, num_updates=36369, lr=0.000165819, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10832, train_wall=9788 | |
| epoch 005: 940 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52175, ups=3, wpb=15181.535, bsz=565.101, num_updates=36379, lr=0.000165796, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10835, train_wall=9791 | |
| epoch 005: 950 / 8862 loss=4.075, nll_loss=2.421, ppl=5.36, wps=52179, ups=3, wpb=15182.688, bsz=564.921, num_updates=36389, lr=0.000165773, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10838, train_wall=9794 | |
| epoch 005: 960 / 8862 loss=4.076, nll_loss=2.421, ppl=5.36, wps=52179, ups=3, wpb=15181.644, bsz=564.429, num_updates=36399, lr=0.000165751, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10841, train_wall=9796 | |
| epoch 005: 970 / 8862 loss=4.075, nll_loss=2.421, ppl=5.36, wps=52184, ups=3, wpb=15182.772, bsz=564.325, num_updates=36409, lr=0.000165728, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10844, train_wall=9799 | |
| epoch 005: 980 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52176, ups=3, wpb=15183.609, bsz=565.146, num_updates=36419, lr=0.000165705, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10847, train_wall=9802 | |
| epoch 005: 990 / 8862 loss=4.074, nll_loss=2.419, ppl=5.35, wps=52182, ups=3, wpb=15184.514, bsz=565.086, num_updates=36429, lr=0.000165682, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10850, train_wall=9804 | |
| epoch 005: 1000 / 8862 loss=4.073, nll_loss=2.419, ppl=5.35, wps=52185, ups=3, wpb=15184.977, bsz=565.195, num_updates=36439, lr=0.00016566, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10853, train_wall=9807 | |
| epoch 005: 1010 / 8862 loss=4.074, nll_loss=2.419, ppl=5.35, wps=52188, ups=3, wpb=15184.439, bsz=564.368, num_updates=36449, lr=0.000165637, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10856, train_wall=9810 | |
| epoch 005: 1020 / 8862 loss=4.074, nll_loss=2.419, ppl=5.35, wps=52182, ups=3, wpb=15183.424, bsz=563.855, num_updates=36459, lr=0.000165614, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10858, train_wall=9813 | |
| epoch 005: 1030 / 8862 loss=4.074, nll_loss=2.419, ppl=5.35, wps=52173, ups=3, wpb=15180.817, bsz=564.314, num_updates=36469, lr=0.000165592, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10861, train_wall=9815 | |
| epoch 005: 1040 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52173, ups=3, wpb=15179.320, bsz=563.819, num_updates=36479, lr=0.000165569, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10864, train_wall=9818 | |
| epoch 005: 1050 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52171, ups=3, wpb=15178.998, bsz=564.712, num_updates=36489, lr=0.000165546, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10867, train_wall=9821 | |
| epoch 005: 1060 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52168, ups=3, wpb=15178.828, bsz=564.464, num_updates=36499, lr=0.000165523, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10870, train_wall=9823 | |
| epoch 005: 1070 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52171, ups=3, wpb=15179.309, bsz=564.258, num_updates=36509, lr=0.000165501, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10873, train_wall=9826 | |
| epoch 005: 1080 / 8862 loss=4.074, nll_loss=2.419, ppl=5.35, wps=52168, ups=3, wpb=15177.858, bsz=564.751, num_updates=36519, lr=0.000165478, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10876, train_wall=9829 | |
| epoch 005: 1090 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52159, ups=3, wpb=15175.187, bsz=564.488, num_updates=36529, lr=0.000165455, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10879, train_wall=9831 | |
| epoch 005: 1100 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52150, ups=3, wpb=15173.882, bsz=564.578, num_updates=36539, lr=0.000165433, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10882, train_wall=9834 | |
| epoch 005: 1110 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52143, ups=3, wpb=15173.034, bsz=565.465, num_updates=36549, lr=0.00016541, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10885, train_wall=9837 | |
| epoch 005: 1120 / 8862 loss=4.075, nll_loss=2.421, ppl=5.35, wps=52142, ups=3, wpb=15171.983, bsz=564.724, num_updates=36559, lr=0.000165388, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10888, train_wall=9839 | |
| epoch 005: 1130 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52144, ups=3, wpb=15173.723, bsz=565.857, num_updates=36569, lr=0.000165365, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10891, train_wall=9842 | |
| epoch 005: 1140 / 8862 loss=4.073, nll_loss=2.419, ppl=5.35, wps=52151, ups=3, wpb=15175.543, bsz=566.079, num_updates=36579, lr=0.000165342, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10893, train_wall=9845 | |
| epoch 005: 1150 / 8862 loss=4.073, nll_loss=2.418, ppl=5.34, wps=52154, ups=3, wpb=15176.234, bsz=566.436, num_updates=36589, lr=0.00016532, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10896, train_wall=9848 | |
| epoch 005: 1160 / 8862 loss=4.074, nll_loss=2.419, ppl=5.35, wps=52153, ups=3, wpb=15175.291, bsz=566.167, num_updates=36599, lr=0.000165297, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10899, train_wall=9850 | |
| epoch 005: 1170 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52153, ups=3, wpb=15174.397, bsz=566.067, num_updates=36609, lr=0.000165275, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10902, train_wall=9853 | |
| epoch 005: 1180 / 8862 loss=4.073, nll_loss=2.419, ppl=5.35, wps=52161, ups=3, wpb=15176.777, bsz=566.171, num_updates=36619, lr=0.000165252, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10905, train_wall=9856 | |
| epoch 005: 1190 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52171, ups=3, wpb=15178.510, bsz=565.837, num_updates=36629, lr=0.000165229, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10908, train_wall=9858 | |
| epoch 005: 1200 / 8862 loss=4.073, nll_loss=2.419, ppl=5.35, wps=52180, ups=3, wpb=15181.067, bsz=565.502, num_updates=36639, lr=0.000165207, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10911, train_wall=9861 | |
| epoch 005: 1210 / 8862 loss=4.073, nll_loss=2.418, ppl=5.34, wps=52184, ups=3, wpb=15182.654, bsz=565.912, num_updates=36649, lr=0.000165184, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10914, train_wall=9864 | |
| epoch 005: 1220 / 8862 loss=4.073, nll_loss=2.418, ppl=5.35, wps=52189, ups=3, wpb=15183.085, bsz=565.183, num_updates=36659, lr=0.000165162, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10917, train_wall=9866 | |
| epoch 005: 1230 / 8862 loss=4.073, nll_loss=2.418, ppl=5.34, wps=52191, ups=3, wpb=15184.702, bsz=565.355, num_updates=36669, lr=0.000165139, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10920, train_wall=9869 | |
| epoch 005: 1240 / 8862 loss=4.073, nll_loss=2.419, ppl=5.35, wps=52198, ups=3, wpb=15185.251, bsz=564.448, num_updates=36679, lr=0.000165117, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10922, train_wall=9872 | |
| epoch 005: 1250 / 8862 loss=4.073, nll_loss=2.419, ppl=5.35, wps=52199, ups=3, wpb=15184.845, bsz=564.195, num_updates=36689, lr=0.000165094, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10925, train_wall=9874 | |
| epoch 005: 1260 / 8862 loss=4.073, nll_loss=2.418, ppl=5.35, wps=52203, ups=3, wpb=15186.380, bsz=564.339, num_updates=36699, lr=0.000165072, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10928, train_wall=9877 | |
| epoch 005: 1270 / 8862 loss=4.073, nll_loss=2.418, ppl=5.35, wps=52208, ups=3, wpb=15187.332, bsz=564.123, num_updates=36709, lr=0.000165049, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10931, train_wall=9880 | |
| epoch 005: 1280 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52207, ups=3, wpb=15185.440, bsz=563.460, num_updates=36719, lr=0.000165027, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10934, train_wall=9882 | |
| epoch 005: 1290 / 8862 loss=4.074, nll_loss=2.420, ppl=5.35, wps=52220, ups=3, wpb=15189.214, bsz=563.030, num_updates=36729, lr=0.000165004, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10937, train_wall=9885 | |
| epoch 005: 1300 / 8862 loss=4.075, nll_loss=2.421, ppl=5.36, wps=52226, ups=3, wpb=15190.171, bsz=562.140, num_updates=36739, lr=0.000164982, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10940, train_wall=9888 | |
| epoch 005: 1310 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52233, ups=3, wpb=15191.037, bsz=561.281, num_updates=36749, lr=0.000164959, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10943, train_wall=9890 | |
| epoch 005: 1320 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52231, ups=3, wpb=15191.456, bsz=561.556, num_updates=36759, lr=0.000164937, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10946, train_wall=9893 | |
| epoch 005: 1330 / 8862 loss=4.076, nll_loss=2.423, ppl=5.36, wps=52230, ups=3, wpb=15190.134, bsz=561.184, num_updates=36769, lr=0.000164915, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10949, train_wall=9896 | |
| epoch 005: 1340 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52234, ups=3, wpb=15191.053, bsz=561.187, num_updates=36779, lr=0.000164892, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10951, train_wall=9898 | |
| epoch 005: 1350 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52235, ups=3, wpb=15190.611, bsz=561.060, num_updates=36789, lr=0.00016487, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10954, train_wall=9901 | |
| epoch 005: 1360 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52227, ups=3, wpb=15190.112, bsz=561.758, num_updates=36799, lr=0.000164847, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10957, train_wall=9904 | |
| epoch 005: 1370 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52219, ups=3, wpb=15189.626, bsz=562.527, num_updates=36809, lr=0.000164825, gnorm=0.473, clip=0.000, oom=0.000, loss_scale=8.000, wall=10960, train_wall=9907 | |
| epoch 005: 1380 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52215, ups=3, wpb=15190.771, bsz=562.178, num_updates=36819, lr=0.000164803, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10963, train_wall=9909 | |
| epoch 005: 1390 / 8862 loss=4.075, nll_loss=2.421, ppl=5.36, wps=52209, ups=3, wpb=15191.158, bsz=562.899, num_updates=36829, lr=0.00016478, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10966, train_wall=9912 | |
| epoch 005: 1400 / 8862 loss=4.075, nll_loss=2.421, ppl=5.36, wps=52202, ups=3, wpb=15190.456, bsz=562.912, num_updates=36839, lr=0.000164758, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10969, train_wall=9915 | |
| epoch 005: 1410 / 8862 loss=4.076, nll_loss=2.422, ppl=5.36, wps=52198, ups=3, wpb=15189.681, bsz=562.960, num_updates=36849, lr=0.000164735, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10972, train_wall=9917 | |
| epoch 005: 1420 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52189, ups=3, wpb=15187.268, bsz=562.877, num_updates=36859, lr=0.000164713, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10975, train_wall=9920 | |
| epoch 005: 1430 / 8862 loss=4.078, nll_loss=2.424, ppl=5.37, wps=52184, ups=3, wpb=15185.495, bsz=562.784, num_updates=36869, lr=0.000164691, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10978, train_wall=9923 | |
| epoch 005: 1440 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52164, ups=3, wpb=15185.074, bsz=563.853, num_updates=36879, lr=0.000164668, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10981, train_wall=9926 | |
| epoch 005: 1450 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52160, ups=3, wpb=15186.559, bsz=563.948, num_updates=36889, lr=0.000164646, gnorm=0.474, clip=0.000, oom=0.000, loss_scale=8.000, wall=10984, train_wall=9928 | |
| epoch 005: 1460 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52154, ups=3, wpb=15184.361, bsz=564.112, num_updates=36899, lr=0.000164624, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=10987, train_wall=9931 | |
| epoch 005: 1470 / 8862 loss=4.077, nll_loss=2.424, ppl=5.37, wps=52148, ups=3, wpb=15183.150, bsz=563.796, num_updates=36909, lr=0.000164602, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=10990, train_wall=9934 | |
| epoch 005: 1480 / 8862 loss=4.078, nll_loss=2.424, ppl=5.37, wps=52143, ups=3, wpb=15183.560, bsz=563.300, num_updates=36919, lr=0.000164579, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=10993, train_wall=9937 | |
| epoch 005: 1490 / 8862 loss=4.077, nll_loss=2.423, ppl=5.36, wps=52141, ups=3, wpb=15184.503, bsz=563.697, num_updates=36929, lr=0.000164557, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=10996, train_wall=9939 | |
| epoch 005: 1500 / 8862 loss=4.077, nll_loss=2.424, ppl=5.36, wps=52136, ups=3, wpb=15184.500, bsz=563.779, num_updates=36939, lr=0.000164535, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=10999, train_wall=9942 | |
| epoch 005: 1510 / 8862 loss=4.077, nll_loss=2.424, ppl=5.37, wps=52132, ups=3, wpb=15183.463, bsz=563.330, num_updates=36949, lr=0.000164512, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11001, train_wall=9945 | |
| epoch 005: 1520 / 8862 loss=4.077, nll_loss=2.424, ppl=5.37, wps=52127, ups=3, wpb=15183.206, bsz=563.329, num_updates=36959, lr=0.00016449, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11004, train_wall=9947 | |
| epoch 005: 1530 / 8862 loss=4.078, nll_loss=2.425, ppl=5.37, wps=52121, ups=3, wpb=15181.232, bsz=562.796, num_updates=36969, lr=0.000164468, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11007, train_wall=9950 | |
| epoch 005: 1540 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52116, ups=3, wpb=15179.251, bsz=562.456, num_updates=36979, lr=0.000164446, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11010, train_wall=9953 | |
| epoch 005: 1550 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52114, ups=3, wpb=15179.064, bsz=562.486, num_updates=36989, lr=0.000164423, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11013, train_wall=9956 | |
| epoch 005: 1560 / 8862 loss=4.078, nll_loss=2.425, ppl=5.37, wps=52116, ups=3, wpb=15181.122, bsz=562.860, num_updates=36999, lr=0.000164401, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11016, train_wall=9958 | |
| epoch 005: 1570 / 8862 loss=4.078, nll_loss=2.424, ppl=5.37, wps=52109, ups=3, wpb=15180.582, bsz=563.208, num_updates=37009, lr=0.000164379, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11019, train_wall=9961 | |
| epoch 005: 1580 / 8862 loss=4.078, nll_loss=2.425, ppl=5.37, wps=52101, ups=3, wpb=15177.760, bsz=562.854, num_updates=37019, lr=0.000164357, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11022, train_wall=9964 | |
| epoch 005: 1590 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52101, ups=3, wpb=15177.123, bsz=562.590, num_updates=37029, lr=0.000164335, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11025, train_wall=9966 | |
| epoch 005: 1600 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52105, ups=3, wpb=15178.714, bsz=562.668, num_updates=37039, lr=0.000164312, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11028, train_wall=9969 | |
| epoch 005: 1610 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52106, ups=3, wpb=15178.415, bsz=562.761, num_updates=37049, lr=0.00016429, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11031, train_wall=9972 | |
| epoch 005: 1620 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52106, ups=3, wpb=15179.638, bsz=562.695, num_updates=37059, lr=0.000164268, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11034, train_wall=9975 | |
| epoch 005: 1630 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52101, ups=3, wpb=15178.811, bsz=562.845, num_updates=37069, lr=0.000164246, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11037, train_wall=9977 | |
| epoch 005: 1640 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52101, ups=3, wpb=15179.467, bsz=562.837, num_updates=37079, lr=0.000164224, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11040, train_wall=9980 | |
| epoch 005: 1650 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52100, ups=3, wpb=15179.850, bsz=562.946, num_updates=37089, lr=0.000164202, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11042, train_wall=9983 | |
| epoch 005: 1660 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52098, ups=3, wpb=15180.618, bsz=563.126, num_updates=37099, lr=0.000164179, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11045, train_wall=9985 | |
| epoch 005: 1670 / 8862 loss=4.078, nll_loss=2.425, ppl=5.37, wps=52096, ups=3, wpb=15181.279, bsz=563.840, num_updates=37109, lr=0.000164157, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11048, train_wall=9988 | |
| epoch 005: 1680 / 8862 loss=4.078, nll_loss=2.425, ppl=5.37, wps=52092, ups=3, wpb=15180.482, bsz=563.774, num_updates=37119, lr=0.000164135, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11051, train_wall=9991 | |
| epoch 005: 1690 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52086, ups=3, wpb=15179.242, bsz=563.397, num_updates=37129, lr=0.000164113, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11054, train_wall=9994 | |
| epoch 005: 1700 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52085, ups=3, wpb=15180.465, bsz=563.128, num_updates=37139, lr=0.000164091, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11057, train_wall=9996 | |
| epoch 005: 1710 / 8862 loss=4.079, nll_loss=2.425, ppl=5.37, wps=52089, ups=3, wpb=15182.358, bsz=562.960, num_updates=37149, lr=0.000164069, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11060, train_wall=9999 | |
| epoch 005: 1720 / 8862 loss=4.080, nll_loss=2.426, ppl=5.37, wps=52092, ups=3, wpb=15183.615, bsz=562.715, num_updates=37159, lr=0.000164047, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11063, train_wall=10002 | |
| epoch 005: 1730 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52096, ups=3, wpb=15185.341, bsz=562.801, num_updates=37169, lr=0.000164025, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11066, train_wall=10004 | |
| epoch 005: 1740 / 8862 loss=4.080, nll_loss=2.426, ppl=5.37, wps=52090, ups=3, wpb=15184.883, bsz=562.463, num_updates=37179, lr=0.000164003, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11069, train_wall=10007 | |
| epoch 005: 1750 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52091, ups=3, wpb=15185.865, bsz=562.677, num_updates=37189, lr=0.000163981, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11072, train_wall=10010 | |
| epoch 005: 1760 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52081, ups=3, wpb=15183.159, bsz=563.130, num_updates=37199, lr=0.000163959, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11075, train_wall=10013 | |
| epoch 005: 1770 / 8862 loss=4.080, nll_loss=2.426, ppl=5.37, wps=52082, ups=3, wpb=15184.067, bsz=563.049, num_updates=37209, lr=0.000163937, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11078, train_wall=10015 | |
| epoch 005: 1780 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52084, ups=3, wpb=15184.369, bsz=562.462, num_updates=37219, lr=0.000163915, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11081, train_wall=10018 | |
| epoch 005: 1790 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52078, ups=3, wpb=15184.195, bsz=562.801, num_updates=37229, lr=0.000163893, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11084, train_wall=10021 | |
| epoch 005: 1800 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52072, ups=3, wpb=15183.132, bsz=562.741, num_updates=37239, lr=0.000163871, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11087, train_wall=10023 | |
| epoch 005: 1810 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52076, ups=3, wpb=15184.569, bsz=562.584, num_updates=37249, lr=0.000163849, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11089, train_wall=10026 | |
| epoch 005: 1820 / 8862 loss=4.081, nll_loss=2.427, ppl=5.38, wps=52069, ups=3, wpb=15182.312, bsz=562.689, num_updates=37259, lr=0.000163827, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11092, train_wall=10029 | |
| epoch 005: 1830 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52068, ups=3, wpb=15181.922, bsz=562.757, num_updates=37269, lr=0.000163805, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11095, train_wall=10032 | |
| epoch 005: 1840 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52065, ups=3, wpb=15182.395, bsz=563.203, num_updates=37279, lr=0.000163783, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11098, train_wall=10034 | |
| epoch 005: 1850 / 8862 loss=4.080, nll_loss=2.426, ppl=5.38, wps=52056, ups=3, wpb=15179.922, bsz=563.440, num_updates=37289, lr=0.000163761, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11101, train_wall=10037 | |
| epoch 005: 1860 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52048, ups=3, wpb=15178.947, bsz=563.697, num_updates=37299, lr=0.000163739, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11104, train_wall=10040 | |
| epoch 005: 1870 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52040, ups=3, wpb=15176.456, bsz=563.972, num_updates=37309, lr=0.000163717, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11107, train_wall=10042 | |
| epoch 005: 1880 / 8862 loss=4.080, nll_loss=2.426, ppl=5.37, wps=52034, ups=3, wpb=15175.210, bsz=563.794, num_updates=37319, lr=0.000163695, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11110, train_wall=10045 | |
| epoch 005: 1890 / 8862 loss=4.080, nll_loss=2.426, ppl=5.38, wps=52032, ups=3, wpb=15174.537, bsz=563.820, num_updates=37329, lr=0.000163673, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11113, train_wall=10048 | |
| epoch 005: 1900 / 8862 loss=4.079, nll_loss=2.426, ppl=5.37, wps=52029, ups=3, wpb=15175.361, bsz=564.065, num_updates=37339, lr=0.000163651, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11116, train_wall=10051 | |
| epoch 005: 1910 / 8862 loss=4.080, nll_loss=2.426, ppl=5.38, wps=52030, ups=3, wpb=15175.561, bsz=563.655, num_updates=37349, lr=0.000163629, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11119, train_wall=10053 | |
| epoch 005: 1920 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52025, ups=3, wpb=15174.136, bsz=563.465, num_updates=37359, lr=0.000163607, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11122, train_wall=10056 | |
| epoch 005: 1930 / 8862 loss=4.081, nll_loss=2.427, ppl=5.38, wps=52028, ups=3, wpb=15174.885, bsz=563.012, num_updates=37369, lr=0.000163585, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11125, train_wall=10059 | |
| epoch 005: 1940 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52025, ups=3, wpb=15174.745, bsz=562.642, num_updates=37379, lr=0.000163563, gnorm=0.475, clip=0.000, oom=0.000, loss_scale=8.000, wall=11128, train_wall=10061 | |
| epoch 005: 1950 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52023, ups=3, wpb=15175.555, bsz=563.309, num_updates=37389, lr=0.000163542, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11131, train_wall=10064 | |
| epoch 005: 1960 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52026, ups=3, wpb=15176.391, bsz=562.892, num_updates=37399, lr=0.00016352, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11133, train_wall=10067 | |
| epoch 005: 1970 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52030, ups=3, wpb=15178.597, bsz=562.845, num_updates=37409, lr=0.000163498, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11136, train_wall=10070 | |
| epoch 005: 1980 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=52027, ups=3, wpb=15178.094, bsz=562.601, num_updates=37419, lr=0.000163476, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11139, train_wall=10072 | |
| epoch 005: 1990 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=52031, ups=3, wpb=15179.852, bsz=562.447, num_updates=37429, lr=0.000163454, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11142, train_wall=10075 | |
| epoch 005: 2000 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52031, ups=3, wpb=15181.068, bsz=562.383, num_updates=37439, lr=0.000163432, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11145, train_wall=10078 | |
| epoch 005: 2010 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52029, ups=3, wpb=15180.935, bsz=562.466, num_updates=37449, lr=0.00016341, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11148, train_wall=10081 | |
| epoch 005: 2020 / 8862 loss=4.081, nll_loss=2.427, ppl=5.38, wps=52026, ups=3, wpb=15181.247, bsz=562.969, num_updates=37459, lr=0.000163389, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11151, train_wall=10083 | |
| epoch 005: 2030 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52024, ups=3, wpb=15181.007, bsz=562.812, num_updates=37469, lr=0.000163367, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11154, train_wall=10086 | |
| epoch 005: 2040 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52026, ups=3, wpb=15181.244, bsz=562.489, num_updates=37479, lr=0.000163345, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11157, train_wall=10089 | |
| epoch 005: 2050 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52024, ups=3, wpb=15182.988, bsz=563.323, num_updates=37489, lr=0.000163323, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11160, train_wall=10091 | |
| epoch 005: 2060 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52023, ups=3, wpb=15182.951, bsz=563.571, num_updates=37499, lr=0.000163301, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11163, train_wall=10094 | |
| epoch 005: 2070 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52022, ups=3, wpb=15183.401, bsz=563.604, num_updates=37509, lr=0.00016328, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11166, train_wall=10097 | |
| epoch 005: 2080 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52013, ups=3, wpb=15182.168, bsz=564.025, num_updates=37519, lr=0.000163258, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11169, train_wall=10100 | |
| epoch 005: 2090 / 8862 loss=4.080, nll_loss=2.427, ppl=5.38, wps=52011, ups=3, wpb=15181.587, bsz=563.837, num_updates=37529, lr=0.000163236, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11172, train_wall=10102 | |
| epoch 005: 2100 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52009, ups=3, wpb=15181.262, bsz=563.370, num_updates=37539, lr=0.000163214, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11175, train_wall=10105 | |
| epoch 005: 2110 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52008, ups=3, wpb=15182.042, bsz=562.782, num_updates=37549, lr=0.000163193, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11178, train_wall=10108 | |
| epoch 005: 2120 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52009, ups=3, wpb=15181.825, bsz=562.437, num_updates=37559, lr=0.000163171, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11181, train_wall=10110 | |
| epoch 005: 2130 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52009, ups=3, wpb=15182.912, bsz=562.110, num_updates=37569, lr=0.000163149, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11184, train_wall=10113 | |
| epoch 005: 2140 / 8862 loss=4.082, nll_loss=2.428, ppl=5.38, wps=52005, ups=3, wpb=15181.497, bsz=562.276, num_updates=37579, lr=0.000163128, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11186, train_wall=10116 | |
| epoch 005: 2150 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=52001, ups=3, wpb=15181.714, bsz=563.161, num_updates=37589, lr=0.000163106, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11189, train_wall=10119 | |
| epoch 005: 2160 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51996, ups=3, wpb=15181.069, bsz=562.913, num_updates=37599, lr=0.000163084, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11192, train_wall=10121 | |
| epoch 005: 2170 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51996, ups=3, wpb=15181.567, bsz=563.235, num_updates=37609, lr=0.000163063, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11195, train_wall=10124 | |
| epoch 005: 2180 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51999, ups=3, wpb=15182.205, bsz=563.074, num_updates=37619, lr=0.000163041, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11198, train_wall=10127 | |
| epoch 005: 2190 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51989, ups=3, wpb=15179.414, bsz=562.614, num_updates=37629, lr=0.000163019, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11201, train_wall=10130 | |
| epoch 005: 2200 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51987, ups=3, wpb=15179.521, bsz=562.483, num_updates=37639, lr=0.000162998, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11204, train_wall=10132 | |
| epoch 005: 2210 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51986, ups=3, wpb=15179.291, bsz=562.652, num_updates=37649, lr=0.000162976, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11207, train_wall=10135 | |
| epoch 005: 2220 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51987, ups=3, wpb=15179.184, bsz=562.363, num_updates=37659, lr=0.000162954, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11210, train_wall=10138 | |
| epoch 005: 2230 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51983, ups=3, wpb=15179.318, bsz=562.732, num_updates=37669, lr=0.000162933, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11213, train_wall=10140 | |
| epoch 005: 2240 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51981, ups=3, wpb=15178.264, bsz=562.402, num_updates=37679, lr=0.000162911, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11216, train_wall=10143 | |
| epoch 005: 2250 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51974, ups=3, wpb=15178.005, bsz=562.637, num_updates=37689, lr=0.000162889, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11219, train_wall=10146 | |
| epoch 005: 2260 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51975, ups=3, wpb=15177.840, bsz=562.523, num_updates=37699, lr=0.000162868, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11222, train_wall=10149 | |
| epoch 005: 2270 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51971, ups=3, wpb=15176.985, bsz=562.346, num_updates=37709, lr=0.000162846, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11225, train_wall=10151 | |
| epoch 005: 2280 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51968, ups=3, wpb=15176.787, bsz=562.241, num_updates=37719, lr=0.000162825, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11228, train_wall=10154 | |
| epoch 005: 2290 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51967, ups=3, wpb=15176.870, bsz=562.057, num_updates=37729, lr=0.000162803, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11230, train_wall=10157 | |
| epoch 005: 2300 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51961, ups=3, wpb=15175.963, bsz=561.818, num_updates=37739, lr=0.000162781, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11233, train_wall=10159 | |
| epoch 005: 2310 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51963, ups=3, wpb=15176.275, bsz=561.724, num_updates=37749, lr=0.00016276, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11236, train_wall=10162 | |
| epoch 005: 2320 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51960, ups=3, wpb=15176.119, bsz=561.582, num_updates=37759, lr=0.000162738, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11239, train_wall=10165 | |
| epoch 005: 2330 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51960, ups=3, wpb=15176.979, bsz=561.884, num_updates=37769, lr=0.000162717, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11242, train_wall=10168 | |
| epoch 005: 2340 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51957, ups=3, wpb=15176.712, bsz=562.091, num_updates=37779, lr=0.000162695, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11245, train_wall=10170 | |
| epoch 005: 2350 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51948, ups=3, wpb=15175.364, bsz=562.328, num_updates=37789, lr=0.000162674, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11248, train_wall=10173 | |
| epoch 005: 2360 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51949, ups=3, wpb=15175.700, bsz=562.202, num_updates=37799, lr=0.000162652, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11251, train_wall=10176 | |
| epoch 005: 2370 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51945, ups=3, wpb=15175.181, bsz=562.446, num_updates=37809, lr=0.000162631, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11254, train_wall=10179 | |
| epoch 005: 2380 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51946, ups=3, wpb=15175.270, bsz=562.083, num_updates=37819, lr=0.000162609, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11257, train_wall=10181 | |
| epoch 005: 2390 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51946, ups=3, wpb=15176.205, bsz=562.095, num_updates=37829, lr=0.000162588, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11260, train_wall=10184 | |
| epoch 005: 2400 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51942, ups=3, wpb=15175.344, bsz=562.346, num_updates=37839, lr=0.000162566, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11263, train_wall=10187 | |
| epoch 005: 2410 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51943, ups=3, wpb=15175.708, bsz=562.160, num_updates=37849, lr=0.000162545, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11266, train_wall=10189 | |
| epoch 005: 2420 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51944, ups=3, wpb=15175.969, bsz=562.310, num_updates=37859, lr=0.000162523, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11269, train_wall=10192 | |
| epoch 005: 2430 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51945, ups=3, wpb=15176.215, bsz=562.313, num_updates=37869, lr=0.000162502, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11272, train_wall=10195 | |
| epoch 005: 2440 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51941, ups=3, wpb=15175.821, bsz=561.888, num_updates=37879, lr=0.00016248, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11275, train_wall=10198 | |
| epoch 005: 2450 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51939, ups=3, wpb=15175.873, bsz=562.259, num_updates=37889, lr=0.000162459, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11278, train_wall=10200 | |
| epoch 005: 2460 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51934, ups=3, wpb=15173.677, bsz=562.015, num_updates=37899, lr=0.000162437, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11280, train_wall=10203 | |
| epoch 005: 2470 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51936, ups=3, wpb=15173.924, bsz=561.735, num_updates=37909, lr=0.000162416, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11283, train_wall=10206 | |
| epoch 005: 2480 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51932, ups=3, wpb=15174.906, bsz=561.644, num_updates=37919, lr=0.000162395, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11286, train_wall=10208 | |
| epoch 005: 2490 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51928, ups=3, wpb=15173.282, bsz=561.712, num_updates=37929, lr=0.000162373, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11289, train_wall=10211 | |
| epoch 005: 2500 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51929, ups=3, wpb=15173.793, bsz=561.481, num_updates=37939, lr=0.000162352, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11292, train_wall=10214 | |
| epoch 005: 2510 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51933, ups=3, wpb=15174.892, bsz=561.491, num_updates=37949, lr=0.00016233, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11295, train_wall=10217 | |
| epoch 005: 2520 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51933, ups=3, wpb=15175.167, bsz=561.368, num_updates=37959, lr=0.000162309, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11298, train_wall=10219 | |
| epoch 005: 2530 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51927, ups=3, wpb=15173.903, bsz=561.109, num_updates=37969, lr=0.000162288, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11301, train_wall=10222 | |
| epoch 005: 2540 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51925, ups=3, wpb=15174.096, bsz=561.325, num_updates=37979, lr=0.000162266, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11304, train_wall=10225 | |
| epoch 005: 2550 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51927, ups=3, wpb=15174.314, bsz=561.091, num_updates=37989, lr=0.000162245, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11307, train_wall=10227 | |
| epoch 005: 2560 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51924, ups=3, wpb=15174.690, bsz=561.115, num_updates=37999, lr=0.000162224, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11310, train_wall=10230 | |
| epoch 005: 2570 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51924, ups=3, wpb=15174.173, bsz=561.064, num_updates=38009, lr=0.000162202, gnorm=0.476, clip=0.000, oom=0.000, loss_scale=8.000, wall=11313, train_wall=10233 | |
| epoch 005: 2580 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51919, ups=3, wpb=15174.126, bsz=561.175, num_updates=38019, lr=0.000162181, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11316, train_wall=10236 | |
| epoch 005: 2590 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51919, ups=3, wpb=15173.906, bsz=560.781, num_updates=38029, lr=0.00016216, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11319, train_wall=10238 | |
| epoch 005: 2600 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51920, ups=3, wpb=15174.802, bsz=560.923, num_updates=38039, lr=0.000162138, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11322, train_wall=10241 | |
| epoch 005: 2610 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51921, ups=3, wpb=15175.271, bsz=560.827, num_updates=38049, lr=0.000162117, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11325, train_wall=10244 | |
| epoch 005: 2620 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51925, ups=3, wpb=15176.363, bsz=560.467, num_updates=38059, lr=0.000162096, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11327, train_wall=10246 | |
| epoch 005: 2630 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51923, ups=3, wpb=15175.675, bsz=560.487, num_updates=38069, lr=0.000162074, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11330, train_wall=10249 | |
| epoch 005: 2640 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51917, ups=3, wpb=15174.728, bsz=560.703, num_updates=38079, lr=0.000162053, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11333, train_wall=10252 | |
| epoch 005: 2650 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51916, ups=3, wpb=15174.144, bsz=560.634, num_updates=38089, lr=0.000162032, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11336, train_wall=10255 | |
| epoch 005: 2660 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51917, ups=3, wpb=15174.271, bsz=560.797, num_updates=38099, lr=0.000162011, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11339, train_wall=10257 | |
| epoch 005: 2670 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51914, ups=3, wpb=15173.857, bsz=560.665, num_updates=38109, lr=0.000161989, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11342, train_wall=10260 | |
| epoch 005: 2680 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51913, ups=3, wpb=15173.052, bsz=560.468, num_updates=38119, lr=0.000161968, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11345, train_wall=10263 | |
| epoch 005: 2690 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51914, ups=3, wpb=15174.116, bsz=560.384, num_updates=38129, lr=0.000161947, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11348, train_wall=10265 | |
| epoch 005: 2700 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51909, ups=3, wpb=15174.014, bsz=560.883, num_updates=38139, lr=0.000161926, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11351, train_wall=10268 | |
| epoch 005: 2710 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51909, ups=3, wpb=15173.776, bsz=561.065, num_updates=38149, lr=0.000161904, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11354, train_wall=10271 | |
| epoch 005: 2720 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51912, ups=3, wpb=15174.690, bsz=561.003, num_updates=38159, lr=0.000161883, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11357, train_wall=10274 | |
| epoch 005: 2730 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51910, ups=3, wpb=15174.820, bsz=561.049, num_updates=38169, lr=0.000161862, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11360, train_wall=10276 | |
| epoch 005: 2740 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51907, ups=3, wpb=15174.092, bsz=561.150, num_updates=38179, lr=0.000161841, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11363, train_wall=10279 | |
| epoch 005: 2750 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51906, ups=3, wpb=15173.883, bsz=561.143, num_updates=38189, lr=0.00016182, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11366, train_wall=10282 | |
| epoch 005: 2760 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51906, ups=3, wpb=15174.658, bsz=560.950, num_updates=38199, lr=0.000161798, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11369, train_wall=10284 | |
| epoch 005: 2770 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51905, ups=3, wpb=15174.785, bsz=560.987, num_updates=38209, lr=0.000161777, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11372, train_wall=10287 | |
| epoch 005: 2780 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51903, ups=3, wpb=15174.519, bsz=561.194, num_updates=38219, lr=0.000161756, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11374, train_wall=10290 | |
| epoch 005: 2790 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51902, ups=3, wpb=15174.901, bsz=561.247, num_updates=38229, lr=0.000161735, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11377, train_wall=10293 | |
| epoch 005: 2800 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51901, ups=3, wpb=15174.741, bsz=560.885, num_updates=38239, lr=0.000161714, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11380, train_wall=10295 | |
| epoch 005: 2810 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51901, ups=3, wpb=15174.359, bsz=560.783, num_updates=38249, lr=0.000161693, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11383, train_wall=10298 | |
| epoch 005: 2820 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51901, ups=3, wpb=15173.996, bsz=560.539, num_updates=38259, lr=0.000161671, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11386, train_wall=10301 | |
| epoch 005: 2830 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51898, ups=3, wpb=15174.150, bsz=560.427, num_updates=38269, lr=0.00016165, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11389, train_wall=10303 | |
| epoch 005: 2840 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51902, ups=3, wpb=15175.250, bsz=560.127, num_updates=38279, lr=0.000161629, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11392, train_wall=10306 | |
| epoch 005: 2850 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51903, ups=3, wpb=15175.654, bsz=560.067, num_updates=38289, lr=0.000161608, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11395, train_wall=10309 | |
| epoch 005: 2860 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51901, ups=3, wpb=15175.070, bsz=559.701, num_updates=38299, lr=0.000161587, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11398, train_wall=10312 | |
| epoch 005: 2870 / 8862 loss=4.084, nll_loss=2.431, ppl=5.39, wps=51899, ups=3, wpb=15174.658, bsz=559.830, num_updates=38309, lr=0.000161566, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11401, train_wall=10314 | |
| epoch 005: 2880 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51892, ups=3, wpb=15174.609, bsz=560.497, num_updates=38319, lr=0.000161545, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11404, train_wall=10317 | |
| epoch 005: 2890 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51898, ups=3, wpb=15176.404, bsz=560.404, num_updates=38329, lr=0.000161524, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11407, train_wall=10320 | |
| epoch 005: 2900 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51897, ups=3, wpb=15176.287, bsz=560.670, num_updates=38339, lr=0.000161503, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11410, train_wall=10323 | |
| epoch 005: 2910 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51894, ups=3, wpb=15175.181, bsz=560.294, num_updates=38349, lr=0.000161482, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11413, train_wall=10325 | |
| epoch 005: 2920 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51892, ups=3, wpb=15174.568, bsz=560.068, num_updates=38359, lr=0.000161461, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11416, train_wall=10328 | |
| epoch 005: 2930 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51887, ups=3, wpb=15173.473, bsz=560.377, num_updates=38369, lr=0.000161439, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11419, train_wall=10331 | |
| epoch 005: 2940 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51886, ups=3, wpb=15173.155, bsz=560.530, num_updates=38379, lr=0.000161418, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11421, train_wall=10333 | |
| epoch 005: 2950 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51886, ups=3, wpb=15173.088, bsz=560.575, num_updates=38389, lr=0.000161397, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11424, train_wall=10336 | |
| epoch 005: 2960 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51881, ups=3, wpb=15172.257, bsz=560.600, num_updates=38399, lr=0.000161376, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11427, train_wall=10339 | |
| epoch 005: 2970 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51884, ups=3, wpb=15173.350, bsz=560.705, num_updates=38409, lr=0.000161355, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11430, train_wall=10342 | |
| epoch 005: 2980 / 8862 loss=4.083, nll_loss=2.431, ppl=5.39, wps=51884, ups=3, wpb=15173.577, bsz=560.657, num_updates=38419, lr=0.000161334, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11433, train_wall=10344 | |
| epoch 005: 2990 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51881, ups=3, wpb=15173.601, bsz=560.741, num_updates=38429, lr=0.000161313, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11436, train_wall=10347 | |
| epoch 005: 3000 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51880, ups=3, wpb=15174.244, bsz=560.744, num_updates=38439, lr=0.000161292, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11439, train_wall=10350 | |
| epoch 005: 3010 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51878, ups=3, wpb=15173.989, bsz=560.444, num_updates=38449, lr=0.000161271, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11442, train_wall=10353 | |
| epoch 005: 3020 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51880, ups=3, wpb=15175.113, bsz=560.543, num_updates=38459, lr=0.00016125, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11445, train_wall=10355 | |
| epoch 005: 3030 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51877, ups=3, wpb=15174.355, bsz=560.388, num_updates=38469, lr=0.00016123, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11448, train_wall=10358 | |
| epoch 005: 3040 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51877, ups=3, wpb=15174.292, bsz=560.174, num_updates=38479, lr=0.000161209, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11451, train_wall=10361 | |
| epoch 005: 3050 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51876, ups=3, wpb=15174.184, bsz=559.806, num_updates=38489, lr=0.000161188, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11454, train_wall=10363 | |
| epoch 005: 3060 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51877, ups=3, wpb=15174.656, bsz=559.762, num_updates=38499, lr=0.000161167, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11457, train_wall=10366 | |
| epoch 005: 3070 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51876, ups=3, wpb=15174.079, bsz=559.859, num_updates=38509, lr=0.000161146, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11460, train_wall=10369 | |
| epoch 005: 3080 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51877, ups=3, wpb=15175.160, bsz=560.177, num_updates=38519, lr=0.000161125, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11463, train_wall=10372 | |
| epoch 005: 3090 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51874, ups=3, wpb=15175.758, bsz=560.207, num_updates=38529, lr=0.000161104, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11466, train_wall=10374 | |
| epoch 005: 3100 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51873, ups=3, wpb=15175.284, bsz=560.039, num_updates=38539, lr=0.000161083, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11469, train_wall=10377 | |
| epoch 005: 3110 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51874, ups=3, wpb=15176.123, bsz=560.257, num_updates=38549, lr=0.000161062, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11472, train_wall=10380 | |
| epoch 005: 3120 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51875, ups=3, wpb=15176.213, bsz=560.146, num_updates=38559, lr=0.000161041, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11474, train_wall=10382 | |
| epoch 005: 3130 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51873, ups=3, wpb=15175.997, bsz=560.038, num_updates=38569, lr=0.00016102, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11477, train_wall=10385 | |
| epoch 005: 3140 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51875, ups=3, wpb=15176.582, bsz=560.130, num_updates=38579, lr=0.000160999, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11480, train_wall=10388 | |
| epoch 005: 3150 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51875, ups=3, wpb=15176.462, bsz=559.934, num_updates=38589, lr=0.000160979, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11483, train_wall=10391 | |
| epoch 005: 3160 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51875, ups=3, wpb=15176.493, bsz=559.952, num_updates=38599, lr=0.000160958, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11486, train_wall=10393 | |
| epoch 005: 3170 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51875, ups=3, wpb=15176.543, bsz=560.086, num_updates=38609, lr=0.000160937, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11489, train_wall=10396 | |
| epoch 005: 3180 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51873, ups=3, wpb=15175.767, bsz=559.912, num_updates=38619, lr=0.000160916, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11492, train_wall=10399 | |
| epoch 005: 3190 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51873, ups=3, wpb=15175.838, bsz=560.178, num_updates=38629, lr=0.000160895, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11495, train_wall=10401 | |
| epoch 005: 3200 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51873, ups=3, wpb=15176.199, bsz=559.935, num_updates=38639, lr=0.000160874, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11498, train_wall=10404 | |
| epoch 005: 3210 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51874, ups=3, wpb=15176.363, bsz=559.801, num_updates=38649, lr=0.000160854, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11501, train_wall=10407 | |
| epoch 005: 3220 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51876, ups=3, wpb=15177.408, bsz=559.804, num_updates=38659, lr=0.000160833, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11504, train_wall=10410 | |
| epoch 005: 3230 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51876, ups=3, wpb=15176.949, bsz=559.864, num_updates=38669, lr=0.000160812, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11507, train_wall=10412 | |
| epoch 005: 3240 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51877, ups=3, wpb=15177.409, bsz=559.807, num_updates=38679, lr=0.000160791, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11510, train_wall=10415 | |
| epoch 005: 3250 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51871, ups=3, wpb=15176.232, bsz=560.022, num_updates=38689, lr=0.00016077, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11513, train_wall=10418 | |
| epoch 005: 3260 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51871, ups=3, wpb=15175.837, bsz=560.076, num_updates=38699, lr=0.00016075, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11515, train_wall=10420 | |
| epoch 005: 3270 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51874, ups=3, wpb=15176.510, bsz=559.927, num_updates=38709, lr=0.000160729, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11518, train_wall=10423 | |
| epoch 005: 3280 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51875, ups=3, wpb=15176.889, bsz=559.690, num_updates=38719, lr=0.000160708, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11521, train_wall=10426 | |
| epoch 005: 3290 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51876, ups=3, wpb=15177.171, bsz=559.596, num_updates=38729, lr=0.000160687, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11524, train_wall=10429 | |
| epoch 005: 3300 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51877, ups=3, wpb=15177.131, bsz=559.491, num_updates=38739, lr=0.000160667, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11527, train_wall=10431 | |
| epoch 005: 3310 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51876, ups=3, wpb=15177.335, bsz=559.606, num_updates=38749, lr=0.000160646, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11530, train_wall=10434 | |
| epoch 005: 3320 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51873, ups=3, wpb=15176.619, bsz=559.749, num_updates=38759, lr=0.000160625, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11533, train_wall=10437 | |
| epoch 005: 3330 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51876, ups=3, wpb=15177.310, bsz=559.765, num_updates=38769, lr=0.000160604, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11536, train_wall=10439 | |
| epoch 005: 3340 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51877, ups=3, wpb=15177.702, bsz=559.636, num_updates=38779, lr=0.000160584, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11539, train_wall=10442 | |
| epoch 005: 3350 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51874, ups=3, wpb=15176.928, bsz=559.487, num_updates=38789, lr=0.000160563, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11542, train_wall=10445 | |
| epoch 005: 3360 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51867, ups=3, wpb=15175.378, bsz=559.605, num_updates=38799, lr=0.000160542, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11545, train_wall=10448 | |
| epoch 005: 3370 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51863, ups=3, wpb=15173.669, bsz=559.513, num_updates=38809, lr=0.000160522, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11548, train_wall=10450 | |
| epoch 005: 3380 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51864, ups=3, wpb=15174.186, bsz=559.546, num_updates=38819, lr=0.000160501, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11551, train_wall=10453 | |
| epoch 005: 3390 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51859, ups=3, wpb=15174.008, bsz=559.684, num_updates=38829, lr=0.00016048, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11554, train_wall=10456 | |
| epoch 005: 3400 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51862, ups=3, wpb=15174.814, bsz=559.800, num_updates=38839, lr=0.00016046, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11557, train_wall=10458 | |
| epoch 005: 3410 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51862, ups=3, wpb=15174.576, bsz=559.899, num_updates=38849, lr=0.000160439, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11559, train_wall=10461 | |
| epoch 005: 3420 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51862, ups=3, wpb=15174.200, bsz=559.614, num_updates=38859, lr=0.000160418, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11562, train_wall=10464 | |
| epoch 005: 3430 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51864, ups=3, wpb=15174.512, bsz=559.585, num_updates=38869, lr=0.000160398, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11565, train_wall=10466 | |
| epoch 005: 3440 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51865, ups=3, wpb=15174.688, bsz=559.756, num_updates=38879, lr=0.000160377, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11568, train_wall=10469 | |
| epoch 005: 3450 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51862, ups=3, wpb=15173.695, bsz=559.886, num_updates=38889, lr=0.000160357, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11571, train_wall=10472 | |
| epoch 005: 3460 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51863, ups=3, wpb=15173.628, bsz=559.609, num_updates=38899, lr=0.000160336, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11574, train_wall=10475 | |
| epoch 005: 3470 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51866, ups=3, wpb=15174.082, bsz=559.770, num_updates=38909, lr=0.000160315, gnorm=0.478, clip=0.000, oom=0.000, loss_scale=8.000, wall=11577, train_wall=10477 | |
| epoch 005: 3480 / 8862 loss=4.083, nll_loss=2.430, ppl=5.39, wps=51867, ups=3, wpb=15173.914, bsz=559.786, num_updates=38919, lr=0.000160295, gnorm=0.478, clip=0.000, oom=0.000, loss_scale=8.000, wall=11580, train_wall=10480 | |
| epoch 005: 3490 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51869, ups=3, wpb=15174.426, bsz=559.766, num_updates=38929, lr=0.000160274, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11583, train_wall=10483 | |
| epoch 005: 3500 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51870, ups=3, wpb=15173.995, bsz=559.842, num_updates=38939, lr=0.000160254, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11586, train_wall=10485 | |
| epoch 005: 3510 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51873, ups=3, wpb=15174.349, bsz=559.836, num_updates=38949, lr=0.000160233, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11588, train_wall=10488 | |
| epoch 005: 3520 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51876, ups=3, wpb=15174.913, bsz=559.982, num_updates=38959, lr=0.000160212, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11591, train_wall=10491 | |
| epoch 005: 3530 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51878, ups=3, wpb=15175.714, bsz=560.054, num_updates=38969, lr=0.000160192, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11594, train_wall=10493 | |
| epoch 005: 3540 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51880, ups=3, wpb=15175.814, bsz=559.955, num_updates=38979, lr=0.000160171, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11597, train_wall=10496 | |
| epoch 005: 3550 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51878, ups=3, wpb=15175.168, bsz=560.369, num_updates=38989, lr=0.000160151, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11600, train_wall=10499 | |
| epoch 005: 3560 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51877, ups=3, wpb=15174.467, bsz=560.465, num_updates=38999, lr=0.00016013, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11603, train_wall=10501 | |
| epoch 005: 3570 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51875, ups=3, wpb=15173.637, bsz=560.468, num_updates=39009, lr=0.00016011, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11606, train_wall=10504 | |
| epoch 005: 3580 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51873, ups=3, wpb=15173.200, bsz=560.769, num_updates=39019, lr=0.000160089, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11609, train_wall=10507 | |
| epoch 005: 3590 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51869, ups=3, wpb=15172.943, bsz=561.441, num_updates=39029, lr=0.000160069, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11612, train_wall=10510 | |
| epoch 005: 3600 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51873, ups=3, wpb=15173.611, bsz=561.360, num_updates=39039, lr=0.000160048, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11615, train_wall=10512 | |
| epoch 005: 3610 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51877, ups=3, wpb=15174.561, bsz=561.360, num_updates=39049, lr=0.000160028, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11618, train_wall=10515 | |
| epoch 005: 3620 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51878, ups=3, wpb=15174.732, bsz=561.644, num_updates=39059, lr=0.000160007, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11621, train_wall=10518 | |
| epoch 005: 3630 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51881, ups=3, wpb=15175.451, bsz=561.487, num_updates=39069, lr=0.000159987, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11623, train_wall=10520 | |
| epoch 005: 3640 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51882, ups=3, wpb=15175.071, bsz=561.481, num_updates=39079, lr=0.000159966, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11626, train_wall=10523 | |
| epoch 005: 3650 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51886, ups=3, wpb=15175.946, bsz=561.273, num_updates=39089, lr=0.000159946, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11629, train_wall=10526 | |
| epoch 005: 3660 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51886, ups=3, wpb=15175.779, bsz=561.283, num_updates=39099, lr=0.000159925, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11632, train_wall=10528 | |
| epoch 005: 3670 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51890, ups=3, wpb=15176.631, bsz=561.231, num_updates=39109, lr=0.000159905, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11635, train_wall=10531 | |
| epoch 005: 3680 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51891, ups=3, wpb=15176.559, bsz=561.056, num_updates=39119, lr=0.000159884, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11638, train_wall=10534 | |
| epoch 005: 3690 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51889, ups=3, wpb=15175.581, bsz=561.017, num_updates=39129, lr=0.000159864, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11641, train_wall=10536 | |
| epoch 005: 3700 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51890, ups=3, wpb=15175.320, bsz=560.914, num_updates=39139, lr=0.000159844, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11644, train_wall=10539 | |
| epoch 005: 3710 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51886, ups=3, wpb=15175.059, bsz=561.548, num_updates=39149, lr=0.000159823, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11647, train_wall=10542 | |
| epoch 005: 3720 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51887, ups=3, wpb=15175.571, bsz=561.804, num_updates=39159, lr=0.000159803, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11650, train_wall=10545 | |
| epoch 005: 3730 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51886, ups=3, wpb=15174.659, bsz=561.741, num_updates=39169, lr=0.000159782, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11653, train_wall=10547 | |
| epoch 005: 3740 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51889, ups=3, wpb=15175.270, bsz=561.777, num_updates=39179, lr=0.000159762, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11656, train_wall=10550 | |
| epoch 005: 3750 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51887, ups=3, wpb=15175.252, bsz=562.088, num_updates=39189, lr=0.000159742, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11658, train_wall=10553 | |
| epoch 005: 3760 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51890, ups=3, wpb=15175.839, bsz=562.055, num_updates=39199, lr=0.000159721, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11661, train_wall=10555 | |
| epoch 005: 3770 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51892, ups=3, wpb=15176.146, bsz=562.104, num_updates=39209, lr=0.000159701, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11664, train_wall=10558 | |
| epoch 005: 3780 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51895, ups=3, wpb=15176.913, bsz=562.099, num_updates=39219, lr=0.00015968, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11667, train_wall=10561 | |
| epoch 005: 3790 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51897, ups=3, wpb=15177.259, bsz=562.163, num_updates=39229, lr=0.00015966, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11670, train_wall=10563 | |
| epoch 005: 3800 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51896, ups=3, wpb=15176.499, bsz=562.075, num_updates=39239, lr=0.00015964, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11673, train_wall=10566 | |
| epoch 005: 3810 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51899, ups=3, wpb=15176.959, bsz=562.019, num_updates=39249, lr=0.000159619, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11676, train_wall=10569 | |
| epoch 005: 3820 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51899, ups=3, wpb=15176.679, bsz=561.958, num_updates=39259, lr=0.000159599, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11679, train_wall=10571 | |
| epoch 005: 3830 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51900, ups=3, wpb=15176.544, bsz=561.911, num_updates=39269, lr=0.000159579, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11682, train_wall=10574 | |
| epoch 005: 3840 / 8862 loss=4.082, nll_loss=2.430, ppl=5.39, wps=51897, ups=3, wpb=15175.627, bsz=561.770, num_updates=39279, lr=0.000159558, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11685, train_wall=10577 | |
| epoch 005: 3850 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51901, ups=3, wpb=15176.292, bsz=561.641, num_updates=39289, lr=0.000159538, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=8.000, wall=11687, train_wall=10579 | |
| epoch 005: 3860 / 8862 loss=4.082, nll_loss=2.429, ppl=5.39, wps=51902, ups=3, wpb=15176.223, bsz=561.552, num_updates=39299, lr=0.000159518, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11690, train_wall=10582 | |
| epoch 005: 3870 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51903, ups=3, wpb=15176.392, bsz=561.540, num_updates=39309, lr=0.000159498, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11693, train_wall=10585 | |
| epoch 005: 3880 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51905, ups=3, wpb=15176.533, bsz=561.459, num_updates=39319, lr=0.000159477, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11696, train_wall=10588 | |
| epoch 005: 3890 / 8862 loss=4.082, nll_loss=2.429, ppl=5.38, wps=51905, ups=3, wpb=15176.546, bsz=561.711, num_updates=39329, lr=0.000159457, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11699, train_wall=10590 | |
| epoch 005: 3900 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51906, ups=3, wpb=15176.868, bsz=561.958, num_updates=39339, lr=0.000159437, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11702, train_wall=10593 | |
| epoch 005: 3910 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51906, ups=3, wpb=15176.786, bsz=562.072, num_updates=39349, lr=0.000159416, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11705, train_wall=10596 | |
| epoch 005: 3920 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51903, ups=3, wpb=15175.575, bsz=561.942, num_updates=39359, lr=0.000159396, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11708, train_wall=10598 | |
| epoch 005: 3930 / 8862 loss=4.081, nll_loss=2.429, ppl=5.38, wps=51906, ups=3, wpb=15176.020, bsz=561.828, num_updates=39369, lr=0.000159376, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11711, train_wall=10601 | |
| epoch 005: 3940 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51907, ups=3, wpb=15176.058, bsz=561.638, num_updates=39379, lr=0.000159356, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11714, train_wall=10604 | |
| epoch 005: 3950 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51910, ups=3, wpb=15176.381, bsz=561.616, num_updates=39389, lr=0.000159335, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11717, train_wall=10606 | |
| epoch 005: 3960 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51912, ups=3, wpb=15176.633, bsz=561.511, num_updates=39399, lr=0.000159315, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11719, train_wall=10609 | |
| epoch 005: 3970 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51915, ups=3, wpb=15177.518, bsz=561.386, num_updates=39409, lr=0.000159295, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11722, train_wall=10612 | |
| epoch 005: 3980 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51915, ups=3, wpb=15176.943, bsz=561.168, num_updates=39419, lr=0.000159275, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11725, train_wall=10614 | |
| epoch 005: 3990 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51916, ups=3, wpb=15176.947, bsz=561.301, num_updates=39429, lr=0.000159255, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11728, train_wall=10617 | |
| epoch 005: 4000 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51916, ups=3, wpb=15177.086, bsz=561.254, num_updates=39439, lr=0.000159234, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11731, train_wall=10620 | |
| epoch 005: 4010 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51919, ups=3, wpb=15177.798, bsz=561.334, num_updates=39449, lr=0.000159214, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11734, train_wall=10622 | |
| epoch 005: 4020 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51921, ups=3, wpb=15178.010, bsz=561.267, num_updates=39459, lr=0.000159194, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11737, train_wall=10625 | |
| epoch 005: 4030 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51922, ups=3, wpb=15178.082, bsz=561.223, num_updates=39469, lr=0.000159174, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11740, train_wall=10628 | |
| epoch 005: 4040 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51924, ups=3, wpb=15178.245, bsz=561.251, num_updates=39479, lr=0.000159154, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11743, train_wall=10630 | |
| epoch 005: 4050 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51926, ups=3, wpb=15178.456, bsz=561.177, num_updates=39489, lr=0.000159134, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11746, train_wall=10633 | |
| epoch 005: 4060 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51927, ups=3, wpb=15178.507, bsz=561.089, num_updates=39499, lr=0.000159113, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11748, train_wall=10636 | |
| epoch 005: 4070 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51928, ups=3, wpb=15178.450, bsz=560.900, num_updates=39509, lr=0.000159093, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11751, train_wall=10638 | |
| epoch 005: 4080 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51930, ups=3, wpb=15178.618, bsz=560.847, num_updates=39519, lr=0.000159073, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11754, train_wall=10641 | |
| epoch 005: 4090 / 8862 loss=4.080, nll_loss=2.428, ppl=5.38, wps=51928, ups=3, wpb=15178.394, bsz=561.273, num_updates=39529, lr=0.000159053, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11757, train_wall=10644 | |
| epoch 005: 4100 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51933, ups=3, wpb=15179.152, bsz=561.120, num_updates=39539, lr=0.000159033, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11760, train_wall=10647 | |
| epoch 005: 4110 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51929, ups=3, wpb=15178.928, bsz=561.596, num_updates=39549, lr=0.000159013, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11763, train_wall=10649 | |
| epoch 005: 4120 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51929, ups=3, wpb=15178.761, bsz=561.644, num_updates=39559, lr=0.000158993, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11766, train_wall=10652 | |
| epoch 005: 4130 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51930, ups=3, wpb=15178.645, bsz=561.408, num_updates=39569, lr=0.000158973, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11769, train_wall=10655 | |
| epoch 005: 4140 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51933, ups=3, wpb=15179.042, bsz=561.178, num_updates=39579, lr=0.000158953, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11772, train_wall=10657 | |
| epoch 005: 4150 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51935, ups=3, wpb=15179.394, bsz=561.224, num_updates=39589, lr=0.000158933, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11775, train_wall=10660 | |
| epoch 005: 4160 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51936, ups=3, wpb=15179.069, bsz=561.171, num_updates=39599, lr=0.000158912, gnorm=0.477, clip=0.000, oom=0.000, loss_scale=16.000, wall=11778, train_wall=10663 | |
| epoch 005: 4170 / 8862 loss=4.081, nll_loss=2.428, ppl=5.38, wps=51937, ups=3, wpb=15179.435, bsz=561.205, num_updates= |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment