Skip to content

Instantly share code, notes, and snippets.

@sergei-mironov
Created October 26, 2019 22:01
Show Gist options
  • Save sergei-mironov/bc845b597ceaf7b089cd25de9d3fbe30 to your computer and use it in GitHub Desktop.
Save sergei-mironov/bc845b597ceaf7b089cd25de9d3fbe30 to your computer and use it in GitHub Desktop.
Global assignment followed by an errot
import os
import json
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
assert tf.version.VERSION.startswith('2.')
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(
gpus[0],
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5.5*1024)])
from official.nlp.bert.input_pipeline import decode_record
from official.nlp.bert_models import classifier_model
from official.nlp.bert_modeling import BertConfig
from official.nlp.optimization import create_optimizer
from keras_radam import RAdam
from typing import Optional
task_name = 'MRPC'
is_glue = True
bert_model = 'uncased_L-12_H-768_A-12'
bert_dir = os.environ.get('MRCNLP_ROOT','.')+'/_datasets/tf20/uncased_L-12_H-768_A-12'
bert_config_file = bert_dir+'/'+'bert_config.json'
bert_ckpt_files = bert_dir+'/'+'bert_model.ckpt'
input_tfrecord_path = os.environ.get('MRCNLP_ROOT','.')+\
'/_datasets/'+ ("glue_" if is_glue else "") + task_name+'_tfrecord'
input_meta_data_path = input_tfrecord_path+'/'+task_name+'_meta_data'
train_data_path = input_tfrecord_path+'/'+task_name+'_train.tf_record'
eval_data_path = input_tfrecord_path+'/'+task_name+'_eval.tf_record'
work_dir = os.environ.get('MRCNLP_ROOT','.')+'/_logs/bert_model_dir'
def _read_json_file(json_file:str)->dict:
with tf.io.gfile.GFile(json_file, "r") as reader:
text = reader.read()
return json.loads(text)
bert_config = BertConfig.from_dict(_read_json_file(bert_config_file))
task_config = _read_json_file(input_meta_data_path)
batch_size = 8
train_batch_size = batch_size
eval_batch_size = batch_size
train_data_size = int(task_config['train_data_size']*0.95)
valid_data_size = int(task_config['train_data_size'])-train_data_size
eval_data_size = task_config['eval_data_size']
train_steps_per_epoch = int(train_data_size / train_batch_size)
valid_steps_per_epoch = int(valid_data_size / train_batch_size)
eval_steps_per_epoch = int(eval_data_size / eval_batch_size)
train_epoches = 3
train_warmup_steps = int(train_epoches * train_data_size * 0.1 / train_batch_size)
class Model:
model:tf.keras.Model
def __init__(self):
pass
def dataset(data_path):
max_seq_length = task_config['max_seq_length']
d = tf.data.TFRecordDataset(data_path)
name_to_features = {
'input_ids': tf.io.FixedLenFeature([max_seq_length], tf.int64),
'input_mask': tf.io.FixedLenFeature([max_seq_length], tf.int64),
'segment_ids': tf.io.FixedLenFeature([max_seq_length], tf.int64),
'label_ids': tf.io.FixedLenFeature([], tf.int64),
'is_real_example': tf.io.FixedLenFeature([], tf.int64),
}
d = d.map(lambda record: decode_record(record, name_to_features))
def _select_data_from_record(record):
x = {
'input_word_ids': record['input_ids'],
'input_mask': record['input_mask'],
'input_type_ids': record['segment_ids']
}
y = record['label_ids']
return (x, y)
d = d.map(_select_data_from_record)
return d
def dataset_train():
d = dataset(train_data_path)
d = d.shuffle(100)
dtrain=d.take(train_data_size)
dtrain=dtrain.repeat()
dtrain=dtrain.batch(train_batch_size, drop_remainder=True)
dtrain=dtrain.prefetch(1024)
dvalid=d.skip(train_data_size)
dvalid=dvalid.batch(train_batch_size, drop_remainder=False)
return dtrain,dvalid
def dataset_eval():
d = dataset(eval_data_path)
d = d.batch(eval_batch_size, drop_remainder=False)
d = d.prefetch(1024)
return d
def build(m):
num_labels = task_config['num_labels']
max_seq_length = task_config['max_seq_length']
keras_model,m.bert_model = classifier_model(bert_config, tf.float32,
num_labels, max_seq_length)
output = tf.keras.layers.Activation('softmax')(keras_model.outputs[0])
model = tf.keras.Model(inputs=keras_model.inputs, outputs=output)
optimizer = create_optimizer(2e-5, train_steps_per_epoch*train_epoches, train_warmup_steps)
model.compile(optimizer,
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
m.model = model
# print('Before loading checkpoint')
# m.model.evaluate(dataset_eval(), steps=eval_steps_per_epoch)
# print(m.bert_model.get_layer('bert_model')._layers[3].kernel)
checkpoint = tf.train.Checkpoint(model=m.bert_model)
checkpoint.restore(bert_ckpt_files).assert_consumed()
# print(m.bert_model.get_layer('bert_model')._layers[3].kernel)
#
print('After loading checkpoint')
m.model.evaluate(dataset_eval(), steps=eval_steps_per_epoch)
return m
def train(m):
print('Training')
m.dt, m.dv = dataset_train()
m.model.fit(
m.dt,
steps_per_epoch=train_steps_per_epoch,
validation_data=m.dv,
validation_steps=valid_steps_per_epoch,
epochs=train_epoches)
print('After training')
m.de = dataset_eval()
m.model.evaluate(m.de, steps=eval_steps_per_epoch)
return m
M:Optional[Model]=Model()
def run(m=M):
build(m)
train(m)
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment