Skip to content

Instantly share code, notes, and snippets.

View hanneshapke's full-sized avatar

Hannes Hapke hanneshapke

View GitHub Profile
bert_layer = load_bert_layer()
pooled_output, _ = bert_layer(
[input_word_ids,
input_mask,
input_type_ids
]
)
feature_spec = tf_transform_output.transformed_feature_spec()
feature_spec.pop(_LABEL_KEY)
inputs = {
key: tf.keras.layers.Input(
shape=(max_seq_length),
name=key,
dtype=tf.int32)
for key in feature_spec.keys()
}
tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
train_dataset = _input_fn(fn_args.train_files, tf_transform_output, 32)
eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, 32)
...
model.fit(
train_dataset,
validation_data=eval_dataset,
...
)
{
'input_mask': array(
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
'input_type_ids': array(
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
'input_word_ids': array(
[ 101, 2023, 3319, 3397, 27594, 2545, 2005, 2216, 2040, ..., 2014, 102]),
'label': array([0])
}
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
model = get_model(tf_transform_output=tf_transform_output)
def run_fn(fn_args: TrainerFnArgs):
tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
train_dataset = _input_fn(
fn_args.train_files, tf_transform_output, 32)
eval_dataset = _input_fn(
fn_args.eval_files, tf_transform_output, 32)
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
model = get_model(tf_transform_output=tf_transform_output)
input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)
feature_spec = tf_transform_output.transformed_feature_spec()
feature_spec.pop(_LABEL_KEY)
inputs = {key: tf.keras.layers.Input(shape=(max_seq_length),
name=key, dtype=tf.int32)
for key in feature_spec.keys()}
input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)
input_type_ids = tf.zeros_like(input_mask)
def preprocessing_fn(inputs):
def tokenize_text(text, sequence_length=MAX_SEQ_LEN):
...
return tf.reshape(tokens, [-1, sequence_length])
def preprocess_bert_input(text, segment_id=0):
input_word_ids = tokenize_text(text)
...
return (