eileen-code4fun’s gists

eileen-code4fun / translation_call.py

Created January 21, 2022 05:58

Translation Call

	class Spa2EngTranslator(tf.keras.Model):
	def __init__(self, eng_text_processor, spa_text_processor, unit=512):
	pass

	def call(self, eng_text, spa_text):
	spa_tokens = self.spa_text_processor(spa_text) # Shape: (batch, Ts)
	spa_vectors = self.spa_embedding(spa_tokens) # Shape: (batch, Ts, embedding_dim)
	spa_rnn_out, fhstate, fcstate, bhstate, bcstate = self.spa_rnn(spa_vectors) # Shape: (batch, Ts, bi_rnn_output_dim), (batch, rnn_output_dim) ...
	spa_hstate = tf.concat([fhstate, bhstate], -1)
	spa_cstate = tf.concat([fcstate, bcstate], -1)

eileen-code4fun / translation_train.py

Created January 21, 2022 05:57

Translation Train

	import matplotlib.pyplot as plt
	import numpy as np

	def train(epochs, model, batch=64, shuffle=1000):
	loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy(
	from_logits=True,
	reduction=tf.keras.losses.Reduction.NONE)
	opt = tf.keras.optimizers.Adam()
	losses = []
	ds = dataset.shuffle(shuffle).batch(batch).cache()

eileen-code4fun / translate.py

Created January 21, 2022 05:56

Translation Code

	def translate(spa_text, model, max_seq=100):
	spa_tokens = model.spa_text_processor([spa_text]) # Shape: (1, Ts)
	spa_vectors = model.spa_embedding(spa_tokens, training=False) # Shape: (1, Ts, embedding_dim)
	spa_rnn_out, fhstate, fcstate, bhstate, bcstate = model.spa_rnn(spa_vectors, training=False) # Shape: (batch, rnn_output_dim)
	spa_hstate = tf.concat([fhstate, bhstate], -1)
	spa_cstate = tf.concat([fcstate, bcstate], -1)
	state = [spa_hstate, spa_cstate]
	print(spa_rnn_out.shape)

	index_from_string = tf.keras.layers.StringLookup(

eileen-code4fun / plot_attention.py

Created January 21, 2022 05:55

Plot Attention

	import matplotlib.pyplot as plt
	import matplotlib.ticker as ticker

	def plot_attention(attention, spa, eng):
	spa = standardize(spa).numpy().decode().split()
	eng = standardize(eng).numpy().decode().split()[1:]
	fig = plt.figure(figsize=(10, 10))
	ax = fig.add_subplot(1, 1, 1)

	attention = tf.squeeze(attention).numpy()

eileen-code4fun / nlp_sw.py

Created January 17, 2022 20:49

	from nltk.corpus import stopwords

	def lower_remove_punctuation_lemmatize_remove_stopwords(txt):
	txt = lower_remove_punctuation_lemmatize(txt)
	stop_words = set(stopwords.words('english'))
	words = [w for w in txt.split() if w not in stop_words]
	return ' '.join(words)

	train = preprocess(train_dataset, lower_remove_punctuation_lemmatize_remove_stopwords)
	test = preprocess(test_dataset, lower_remove_punctuation_lemmatize_remove_stopwords)

eileen-code4fun / nlp_stem_lem.py

Created January 17, 2022 20:48

	from nltk.stem import PorterStemmer
	from nltk.stem import WordNetLemmatizer

	def lower_remove_punctuation_stem(txt):
	txt = lower_remove_punctuation(txt)
	ps = PorterStemmer()
	words = [ps.stem(w) for w in txt.split()]
	return ' '.join(words)

	def lower_remove_punctuation_lemmatize(txt):

eileen-code4fun / nlp_lower_punc.py

Last active January 17, 2022 20:50

	import re
	def lower_remove_punctuation(txt):
	txt = txt.lower()
	return re.sub(r'[^\w\s]','',txt)

	train = preprocess(train_dataset, lower_remove_punctuation)
	test = preprocess(test_dataset, lower_remove_punctuation)

eileen-code4fun / nlp_lower.py

Last active January 17, 2022 20:50

nlp_ower

	def lower(txt):
	return txt.lower()

	train = preprocess(train_dataset, lower)
	test = preprocess(test_dataset, lower)

eileen-code4fun / no_op.py

Last active January 17, 2022 20:44

NO_OP

	def preprocess(dataset, fn):
	features = []
	labels = []
	for example, label in dataset:
	features.append(fn(example.numpy().decode('utf-8')))
	labels.append(label.numpy())
	features_dataset = tf.data.Dataset.from_tensor_slices(features)
	labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
	return tf.data.Dataset.zip((features_dataset, labels_dataset))

eileen-code4fun / nlp_classification.py

Created January 17, 2022 20:40

NLP Classification

	import tensorflow_datasets as tfds
	import tensorflow as tf

	dataset, info = tfds.load('imdb_reviews', with_info=True,
	as_supervised=True)
	train_dataset, test_dataset = dataset['train'], dataset['test']

	def experiment(train, test):
	VOCAB_SIZE = 1000
	encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=VOCAB_SIZE, standardize=None)