borisdayma · June 12, 2020 18:34
diff --git a/gistfile1.txt b/gistfile1.txt
 ```
 boris@Desk-Ubuntu:~/Projects/tests$ python ../transformers/examples/text-classification/run_tf_glue.py --model_name_or_path bert-base-cased --task_name $TASK_NAME --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --num_train_epochs 3.0 --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir --logging_dir log --evaluate_during_training --eval_steps 50 --logging_steps 10
 06/12/2020 13:16:33 - INFO - transformers.training_args_tf -   Tensorflow: setting up strategy
 2020-06-12 13:16:33.776937: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
 2020-06-12 13:16:33.798135: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:33.798688: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
 pciBusID: 0000:01:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
 coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
 2020-06-12 13:16:33.798870: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
 2020-06-12 13:16:33.800300: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
 2020-06-12 13:16:33.801603: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
 2020-06-12 13:16:33.801954: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
 2020-06-12 13:16:33.803576: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
 2020-06-12 13:16:33.804394: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
 2020-06-12 13:16:33.807845: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
 2020-06-12 13:16:33.808030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:33.808639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:33.809191: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0
 06/12/2020 13:16:33 - INFO - __main__ -   n_gpu: 1, distributed training: False, 16-bits training: False
 06/12/2020 13:16:33 - INFO - __main__ -   Training/evaluation parameters TFTrainingArguments(output_dir='/tmp/MRPC/', overwrite_output_dir=True, do_train=True, do_eval=True, do_predict=False, evaluate_during_training=True, per_device_train_batch_size=32, per_device_eval_batch_size=8, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, learning_rate=2e-05, weight_decay=0.0, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, warmup_steps=0, logging_dir='log', logging_first_step=False, logging_steps=10, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level='O1', local_rank=-1, tpu_num_cores=None, tpu_metrics_debug=False, dataloader_drop_last=False, tpu_name=None, eval_steps=50, debug=False)
 06/12/2020 13:16:33 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json from cache at /home/boris/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391
 06/12/2020 13:16:33 - INFO - transformers.configuration_utils -   Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": "mrpc",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "type_vocab_size": 2,
  "vocab_size": 28996
 }

 06/12/2020 13:16:34 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json from cache at /home/boris/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.9da767be51e1327499df13488672789394e2ca38b877837e52618a67d7002391
 06/12/2020 13:16:34 - INFO - transformers.configuration_utils -   Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "type_vocab_size": 2,
  "vocab_size": 28996
 }

 06/12/2020 13:16:34 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /home/boris/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1
 06/12/2020 13:16:35 - INFO - transformers.modeling_tf_utils -   loading weights file https://cdn.huggingface.co/bert-base-cased-tf_model.h5 from cache at /home/boris/.cache/torch/transformers/17e64dc7dc200314bc70dd8198010773501bcabb65a493c1ae7183b8c9a5b1ff.908e74db1113031d6827eb22808cf370b0aeded6e6ac20d0f07af0a334e195cc.h5
 2020-06-12 13:16:35.215122: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
 2020-06-12 13:16:35.237139: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 3299770000 Hz
 2020-06-12 13:16:35.237442: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x560b206828a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
 2020-06-12 13:16:35.237463: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
 2020-06-12 13:16:35.237700: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:35.238413: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
 pciBusID: 0000:01:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
 coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
 2020-06-12 13:16:35.238454: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
 2020-06-12 13:16:35.238472: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
 2020-06-12 13:16:35.238489: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
 2020-06-12 13:16:35.238507: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
 2020-06-12 13:16:35.238523: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
 2020-06-12 13:16:35.238539: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
 2020-06-12 13:16:35.238555: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
 2020-06-12 13:16:35.238639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:35.239366: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:35.240021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0
 2020-06-12 13:16:35.240056: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
 2020-06-12 13:16:35.338437: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
 2020-06-12 13:16:35.338462: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      0 
 2020-06-12 13:16:35.338468: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 0:   N 
 2020-06-12 13:16:35.338667: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:35.339274: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:35.339849: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
 2020-06-12 13:16:35.340362: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10202 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5)
 2020-06-12 13:16:35.341869: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x560b24c7d2b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
 2020-06-12 13:16:35.341882: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): GeForce RTX 2080 Ti, Compute Capability 7.5
 2020-06-12 13:16:35.877674: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
 06/12/2020 13:16:36 - INFO - transformers.modeling_tf_utils -   Layers of TFBertForSequenceClassification not initialized from pretrained model: ['classifier', 'dropout_37']
 06/12/2020 13:16:36 - INFO - transformers.modeling_tf_utils -   Layers from pretrained model not used in TFBertForSequenceClassification: ['nsp___cls', 'mlm___cls']
 06/12/2020 13:16:36 - INFO - absl -   Load dataset info from /home/boris/tensorflow_datasets/glue/mrpc/1.0.0
 06/12/2020 13:16:36 - INFO - absl -   Reusing dataset glue (/home/boris/tensorflow_datasets/glue/mrpc/1.0.0)
 06/12/2020 13:16:36 - INFO - absl -   Constructing tf.data.Dataset for split train, from /home/boris/tensorflow_datasets/glue/mrpc/1.0.0
 06/12/2020 13:16:37 - INFO - transformers.data.processors.glue -   Using label list ['0', '1'] for task mrpc
 06/12/2020 13:16:37 - INFO - transformers.data.processors.glue -   Using output mode classification for task mrpc
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   guid: 1680
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 1109, 6742, 187, 24985, 1209, 2496, 1112, 24628, 25166, 1116, 117, 6205, 1111, 2554, 1104, 1763, 1447, 119, 102, 1109, 187, 24985, 2496, 1112, 24628, 25166, 1116, 117, 2232, 1113, 1565, 8089, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=0)
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   guid: 1456
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 13568, 1190, 1406, 3029, 1104, 25941, 112, 188, 3813, 1156, 1435, 1121, 1543, 17306, 1105, 2526, 1170, 1103, 3060, 2107, 7897, 4779, 1110, 2063, 119, 102, 13568, 1190, 1406, 3029, 1104, 25941, 112, 188, 3813, 1156, 1435, 1121, 1543, 17306, 1105, 2526, 1170, 1103, 3060, 2107, 7897, 4779, 1110, 2335, 117, 11577, 1343, 5028, 4597, 112, 189, 1962, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=0)
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   guid: 3017
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 8454, 118, 2268, 14779, 109, 12620, 119, 128, 1550, 1107, 1157, 1963, 1314, 1214, 1105, 1355, 1113, 1106, 4821, 109, 1969, 1495, 119, 128, 1550, 119, 102, 8454, 118, 2268, 117, 6317, 153, 2349, 118, 1492, 117, 14779, 109, 12620, 119, 128, 1550, 1107, 1157, 1148, 5138, 1105, 1355, 1113, 1106, 1321, 1107, 109, 1969, 1495, 119, 128, 1550, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=1)
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   guid: 2896
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 1109, 1617, 1248, 3861, 2686, 1274, 112, 189, 1511, 3736, 1121, 1412, 2053, 1120, 3291, 8223, 22540, 119, 102, 1109, 1214, 118, 2403, 2849, 1202, 1136, 1511, 3736, 1121, 3291, 8223, 22540, 6701, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=1)
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   guid: 499
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 8848, 126, 119, 126, 1110, 1907, 2786, 1107, 1103, 1244, 1311, 1105, 1803, 117, 1111, 170, 2547, 3945, 1104, 1164, 109, 1367, 117, 5689, 119, 102, 8848, 126, 119, 126, 1110, 1208, 1907, 1107, 1103, 158, 119, 156, 119, 1105, 1803, 1194, 6998, 3518, 20470, 1231, 25421, 1116, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=0)
 06/12/2020 13:16:38 - INFO - absl -   Load dataset info from /home/boris/tensorflow_datasets/glue/mrpc/1.0.0
 06/12/2020 13:16:38 - INFO - absl -   Reusing dataset glue (/home/boris/tensorflow_datasets/glue/mrpc/1.0.0)
 06/12/2020 13:16:38 - INFO - absl -   Constructing tf.data.Dataset for split validation, from /home/boris/tensorflow_datasets/glue/mrpc/1.0.0
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   Using label list ['0', '1'] for task mrpc
 06/12/2020 13:16:38 - INFO - transformers.data.processors.glue -   Using output mode classification for task mrpc
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   guid: 3155
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 1109, 1437, 112, 188, 8354, 4634, 1503, 118, 3861, 18155, 1679, 2934, 1118, 170, 24585, 119, 102, 1109, 1419, 1163, 1142, 20968, 18155, 1118, 170, 24585, 170, 2934, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=1)
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   guid: 2472
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 12008, 14791, 20452, 10652, 6005, 117, 3614, 117, 9315, 1103, 7791, 1120, 1103, 9720, 2410, 1298, 16358, 20080, 4396, 1187, 1131, 1144, 1151, 1690, 1111, 1317, 1201, 117, 1163, 1123, 1401, 117, 3162, 20452, 8265, 18078, 119, 102, 1109, 7159, 1108, 2856, 9031, 1121, 12008, 14791, 20452, 10652, 6005, 117, 3614, 117, 1120, 1103, 9720, 2410, 118, 1298, 16358, 20080, 4396, 1187, 1131, 1144, 2077, 1111, 1317, 1201, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=1)
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   guid: 3584
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 1109, 2025, 117, 1502, 6356, 1107, 1103, 4897, 22175, 15240, 2713, 117, 1110, 2620, 1106, 1145, 6058, 1106, 3612, 117, 1157, 5752, 1163, 119, 102, 1109, 2025, 117, 3303, 1113, 1103, 16570, 1104, 4297, 14105, 117, 1108, 1217, 1502, 2052, 1107, 1103, 4897, 22175, 15240, 2713, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=0)
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   guid: 3523
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 1135, 1145, 3272, 170, 1434, 118, 1107, 151, 14962, 2137, 6746, 10909, 6829, 1200, 1177, 1115, 1344, 118, 3476, 151, 14962, 2137, 6746, 2962, 1169, 1129, 1215, 1443, 1515, 1106, 17812, 1126, 2509, 1619, 11451, 119, 102, 1109, 156, 1495, 1658, 19598, 12882, 1144, 170, 1434, 118, 1107, 151, 14962, 2137, 6746, 10909, 6829, 1200, 117, 1111, 1859, 117, 1177, 1115, 1344, 118, 3476, 151, 14962, 2137, 6746, 2962, 1169, 1129, 4631, 1443, 1126, 2509, 1619, 11451, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=1)
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   *** Example ***
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   guid: 1782
 06/12/2020 13:16:39 - INFO - transformers.data.processors.glue -   features: InputFeatures(input_ids=[101, 1987, 2392, 150, 1665, 1658, 21649, 117, 4947, 112, 188, 1266, 3995, 117, 1163, 1191, 1103, 3850, 1125, 1151, 8318, 2206, 4947, 1156, 1138, 5366, 1167, 1104, 1117, 3575, 4226, 119, 102, 1987, 2392, 150, 1665, 1658, 21649, 117, 1103, 1266, 112, 188, 15175, 117, 1163, 1125, 1103, 3850, 1151, 8318, 1106, 4947, 2206, 117, 1119, 1156, 1138, 5366, 1167, 1104, 1117, 3575, 3053, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], attention_mask=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], token_type_ids=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], label=1)
 06/12/2020 13:16:39 - INFO - transformers.trainer_tf -   ***** Running training *****
 06/12/2020 13:16:39 - INFO - transformers.trainer_tf -     Num examples = 3668
 06/12/2020 13:16:39 - INFO - transformers.trainer_tf -     Num Epochs = 3
 06/12/2020 13:16:39 - INFO - transformers.trainer_tf -     Total optimization steps = 115
 WARNING:tensorflow:From /home/boris/Projects/temp/transformers/src/transformers/trainer_tf.py:355: StrategyBase.experimental_run_v2 (from tensorflow.python.distribute.distribute_lib) is deprecated and will be removed in a future version.
 Instructions for updating:
 renamed to `run`
 06/12/2020 13:16:40 - WARNING - tensorflow -   From /home/boris/Projects/temp/transformers/src/transformers/trainer_tf.py:355: StrategyBase.experimental_run_v2 (from tensorflow.python.distribute.distribute_lib) is deprecated and will be removed in a future version.
 Instructions for updating:
 renamed to `run`
 WARNING:tensorflow:From /home/boris/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/ops/resource_variable_ops.py:1813: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
 Instructions for updating:
 If using Keras pass *_constraint arguments to layers.
 06/12/2020 13:16:45 - WARNING - tensorflow -   From /home/boris/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/ops/resource_variable_ops.py:1813: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
 Instructions for updating:
 If using Keras pass *_constraint arguments to layers.
 /home/boris/miniconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  warnings.warn(
 06/12/2020 13:17:19 - INFO - transformers.trainer_tf -   Epoch 1 Step 10 Train Loss 0.6540
 06/12/2020 13:17:22 - INFO - transformers.trainer_tf -   Epoch 1 Step 20 Train Loss 0.5929
 06/12/2020 13:17:25 - INFO - transformers.trainer_tf -   Epoch 1 Step 30 Train Loss 0.5432
 06/12/2020 13:17:28 - INFO - transformers.trainer_tf -   Epoch 1 Step 40 Train Loss 0.5231
 06/12/2020 13:17:31 - INFO - transformers.trainer_tf -   ***** Running Evaluation *****
 06/12/2020 13:17:31 - INFO - transformers.trainer_tf -     Batch size = 8
 06/12/2020 13:17:34 - INFO - transformers.trainer_tf -   Epoch 1 Step 50 Validation Metrics {'eval_eval_loss': 0.45943707, 'eval_eval_acc': 0.7132352941176471, 'eval_eval_f1': 0.8245877061469266, 'eval_eval_acc_and_f1': 0.7689115001322868, 'learning_rate': 1.1304346e-05}
 06/12/2020 13:17:34 - INFO - transformers.trainer_tf -   Epoch 1 Step 50 Train Loss 0.5019
 06/12/2020 13:17:38 - INFO - transformers.trainer_tf -   Epoch 1 Step 60 Train Loss 0.5071
 06/12/2020 13:17:41 - INFO - transformers.trainer_tf -   Epoch 1 Step 70 Train Loss 0.5175
 06/12/2020 13:17:44 - INFO - transformers.trainer_tf -   Epoch 1 Step 80 Train Loss 0.5219
 06/12/2020 13:17:47 - INFO - transformers.trainer_tf -   Epoch 1 Step 90 Train Loss 0.5415
 06/12/2020 13:17:51 - INFO - transformers.trainer_tf -   ***** Running Evaluation *****
 06/12/2020 13:17:51 - INFO - transformers.trainer_tf -     Batch size = 8
 06/12/2020 13:17:52 - INFO - transformers.trainer_tf -   Epoch 1 Step 100 Validation Metrics {'eval_eval_loss': 0.47651273, 'eval_eval_acc': 0.7671568627450981, 'eval_eval_f1': 0.8470209339774558, 'eval_eval_acc_and_f1': 0.807088898361277, 'learning_rate': 2.6086961e-06}
 06/12/2020 13:17:52 - INFO - transformers.trainer_tf -   Epoch 1 Step 100 Train Loss 0.4664
 06/12/2020 13:17:55 - INFO - transformers.trainer_tf -   Epoch 1 Step 110 Train Loss 0.5018
 06/12/2020 13:18:07 - INFO - transformers.trainer_tf -   Epoch 2 Step 120 Train Loss 0.5442
 06/12/2020 13:18:11 - INFO - transformers.trainer_tf -   Epoch 2 Step 130 Train Loss 0.4833
 06/12/2020 13:18:14 - INFO - transformers.trainer_tf -   Epoch 2 Step 140 Train Loss 0.5221
 06/12/2020 13:18:17 - INFO - transformers.trainer_tf -   ***** Running Evaluation *****
 06/12/2020 13:18:17 - INFO - transformers.trainer_tf -     Batch size = 8
 06/12/2020 13:18:19 - INFO - transformers.trainer_tf -   Epoch 2 Step 150 Validation Metrics {'eval_eval_loss': 0.45965827, 'eval_eval_acc': 0.7671568627450981, 'eval_eval_f1': 0.8484848484848485, 'eval_eval_acc_and_f1': 0.8078208556149733, 'learning_rate': 0.0}
 06/12/2020 13:18:19 - INFO - transformers.trainer_tf -   Epoch 2 Step 150 Train Loss 0.4236
 06/12/2020 13:18:22 - INFO - transformers.trainer_tf -   Epoch 2 Step 160 Train Loss 0.6103
 06/12/2020 13:18:25 - INFO - transformers.trainer_tf -   Epoch 2 Step 170 Train Loss 0.4055
 06/12/2020 13:18:28 - INFO - transformers.trainer_tf -   Epoch 2 Step 180 Train Loss 0.4773
 06/12/2020 13:18:32 - INFO - transformers.trainer_tf -   Epoch 2 Step 190 Train Loss 0.6860
 06/12/2020 13:18:35 - INFO - transformers.trainer_tf -   ***** Running Evaluation *****
 06/12/2020 13:18:35 - INFO - transformers.trainer_tf -     Batch size = 8
 06/12/2020 13:18:36 - INFO - transformers.trainer_tf -   Epoch 2 Step 200 Validation Metrics {'eval_eval_loss': 0.4596582, 'eval_eval_acc': 0.7671568627450981, 'eval_eval_f1': 0.8484848484848485, 'eval_eval_acc_and_f1': 0.8078208556149733, 'learning_rate': 0.0}
 06/12/2020 13:18:36 - INFO - transformers.trainer_tf -   Epoch 2 Step 200 Train Loss 0.4985
 06/12/2020 13:18:40 - INFO - transformers.trainer_tf -   Epoch 2 Step 210 Train Loss 0.5482
 06/12/2020 13:18:43 - INFO - transformers.trainer_tf -   Epoch 2 Step 220 Train Loss 0.5836
 06/12/2020 13:18:46 - INFO - transformers.trainer_tf -   Epoch 2 Step 230 Train Loss 0.3954
 06/12/2020 13:18:59 - INFO - transformers.trainer_tf -   Epoch 3 Step 240 Train Loss 0.5232
 06/12/2020 13:19:02 - INFO - transformers.trainer_tf -   ***** Running Evaluation *****
 06/12/2020 13:19:02 - INFO - transformers.trainer_tf -     Batch size = 8
 06/12/2020 13:19:03 - INFO - transformers.trainer_tf -   Epoch 3 Step 250 Validation Metrics {'eval_eval_loss': 0.4596582, 'eval_eval_acc': 0.7671568627450981, 'eval_eval_f1': 0.8484848484848485, 'eval_eval_acc_and_f1': 0.8078208556149733, 'learning_rate': 0.0}
 06/12/2020 13:19:03 - INFO - transformers.trainer_tf -   Epoch 3 Step 250 Train Loss 0.4859
 06/12/2020 13:19:06 - INFO - transformers.trainer_tf -   Epoch 3 Step 260 Train Loss 0.5354
 06/12/2020 13:19:10 - INFO - transformers.trainer_tf -   Epoch 3 Step 270 Train Loss 0.5821
 06/12/2020 13:19:13 - INFO - transformers.trainer_tf -   Epoch 3 Step 280 Train Loss 0.5463
 06/12/2020 13:19:16 - INFO - transformers.trainer_tf -   Epoch 3 Step 290 Train Loss 0.3933
 06/12/2020 13:19:20 - INFO - transformers.trainer_tf -   ***** Running Evaluation *****
 06/12/2020 13:19:20 - INFO - transformers.trainer_tf -     Batch size = 8
 06/12/2020 13:19:21 - INFO - transformers.trainer_tf -   Epoch 3 Step 300 Validation Metrics {'eval_eval_loss': 0.4596582, 'eval_eval_acc': 0.7671568627450981, 'eval_eval_f1': 0.8484848484848485, 'eval_eval_acc_and_f1': 0.8078208556149733, 'learning_rate': 0.0}
 06/12/2020 13:19:21 - INFO - transformers.trainer_tf -   Epoch 3 Step 300 Train Loss 0.5546
 06/12/2020 13:19:24 - INFO - transformers.trainer_tf -   Epoch 3 Step 310 Train Loss 0.5301
 06/12/2020 13:19:28 - INFO - transformers.trainer_tf -   Epoch 3 Step 320 Train Loss 0.5231
 06/12/2020 13:19:31 - INFO - transformers.trainer_tf -   Epoch 3 Step 330 Train Loss 0.5541
 06/12/2020 13:19:34 - INFO - transformers.trainer_tf -   Epoch 3 Step 340 Train Loss 0.4751
 06/12/2020 13:19:36 - INFO - transformers.trainer_tf -   Saving model in /tmp/MRPC/
 06/12/2020 13:19:36 - INFO - transformers.configuration_utils -   Configuration saved in /tmp/MRPC/config.json
 06/12/2020 13:19:37 - INFO - transformers.modeling_tf_utils -   Model weights saved in /tmp/MRPC/tf_model.h5
 06/12/2020 13:19:37 - INFO - __main__ -   *** Evaluate ***
 06/12/2020 13:19:37 - INFO - transformers.trainer_tf -   ***** Running Evaluation *****
 06/12/2020 13:19:37 - INFO - transformers.trainer_tf -     Batch size = 8
 06/12/2020 13:19:39 - INFO - __main__ -   ***** Eval results *****
 06/12/2020 13:19:39 - INFO - __main__ -     eval_loss = 0.45965827
 06/12/2020 13:19:39 - INFO - __main__ -     eval_acc = 0.7671568627450981
 06/12/2020 13:19:39 - INFO - __main__ -     eval_f1 = 0.8484848484848485
 06/12/2020 13:19:39 - INFO - __main__ -     eval_acc_and_f1 = 0.8078208556149733
 ```