Hannes Hapke hanneshapke

Machine Learning @digits Google Developer Expert for Machine Learning Co-author of ML Publications https://bit.ly/hannes-hapke-books

hanneshapke / bert_data_structure.py

Created March 9, 2020 18:03

hanneshapke / mirrored_strategy.py

Created March 9, 2020 18:01

	mirrored_strategy = tf.distribute.MirroredStrategy()
	with mirrored_strategy.scope():
	model = get_model(tf_transform_output=tf_transform_output)

hanneshapke / run_fn.py

Created March 9, 2020 18:00

	def run_fn(fn_args: TrainerFnArgs):
	tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
	train_dataset = _input_fn(
	fn_args.train_files, tf_transform_output, 32)
	eval_dataset = _input_fn(
	fn_args.eval_files, tf_transform_output, 32)

	mirrored_strategy = tf.distribute.MirroredStrategy()
	with mirrored_strategy.scope():
	model = get_model(tf_transform_output=tf_transform_output)

hanneshapke / cast_between_tft_tfhub.py

Created March 9, 2020 18:00

	input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
	input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
	input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)

hanneshapke / model_architecture.py

Last active March 20, 2020 18:43

	feature_spec = tf_transform_output.transformed_feature_spec()
	feature_spec.pop(_LABEL_KEY)

	inputs = {key: tf.keras.layers.Input(shape=(max_seq_length),
	name=key, dtype=tf.int32)
	for key in feature_spec.keys()}

	input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
	input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
	input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)

hanneshapke / input_type_ids.py

Created March 9, 2020 17:50

input_type_ids = tf.zeros_like(input_mask)

hanneshapke / preprocessing_fn.py

Created March 9, 2020 17:48

	def preprocessing_fn(inputs):

	def tokenize_text(text, sequence_length=MAX_SEQ_LEN):
	...
	return tf.reshape(tokens, [-1, sequence_length])

	def preprocess_bert_input(text, segment_id=0):
	input_word_ids = tokenize_text(text)
	...
	return (

hanneshapke / adding_of_CLS_and_SEP_tokens.py

Created March 9, 2020 17:47

adding_of_CLS_and_SEP_tokens

	CLS_ID = tf.constant(101, dtype=tf.int64)
	SEP_ID = tf.constant(102, dtype=tf.int64)

	start_tokens = tf.fill([tf.shape(text)[0], 1], CLS_ID)
	end_tokens = tf.fill([tf.shape(text)[0], 1], SEP_ID)

	tokens = tokens[:, :sequence_length - 2]
	tokens = tf.concat([start_tokens, tokens, end_tokens], axis=1)

hanneshapke / call_berttokenizer.py

Created March 9, 2020 17:45

tokens = bert_tokenizer.tokenize(text)

hanneshapke / partial_setup_of_berttokenizer_part_3.py

Created March 9, 2020 16:59

	bert_tokenizer = text.BertTokenizer(
	vocab_lookup_table=vocab_file_path,
	token_out_type=tf.int64,
	lower_case=do_lower_case
	)