Skip to content

Instantly share code, notes, and snippets.

@Shreyz-max
Created March 15, 2021 11:29
Show Gist options
  • Save Shreyz-max/b2385e5ff38c48cf59f3e87d668ab293 to your computer and use it in GitHub Desktop.
Save Shreyz-max/b2385e5ff38c48cf59f3e87d668ab293 to your computer and use it in GitHub Desktop.
loading data using custom generator
def load_datatest(train_path, epochs=100, x_data=x_data, tokenizer=tokenizer, num_decoder_tokens=1500,training_list=train_list, batch_size=32, maxlen=10):
encoder_input_data = []
decoder_input_data = []
decoder_target_data = []
videoId = []
videoSeq = []
# separating the videoId and the video captions
for idx, cap in enumerate(training_list):
caption = cap[0]
videoId.append(cap[1])
videoSeq.append(caption)
# converting the captions to tokens and padding them to equal sizes
train_sequences = tokenizer.texts_to_sequences(videoSeq)
train_sequences = np.array(train_sequences)
train_sequences = pad_sequences(train_sequences, padding='post',truncating='post', maxlen=maxlen)
max_seq_length = train_sequences.shape[1]
filesize = len(train_sequences)
X_data = []
y_data = []
vCount = 0
n = 0
for i in range(epochs):
for idx in range(0,filesize):
n += 1
encoder_input_data.append(x_data[videoId[idx]])
y = to_categorical(train_sequences[idx], num_decoder_tokens)
decoder_input_data.append(y[:-1])
decoder_target_data.append(y[1:])
if n == batch_size:
encoder_input = np.array(encoder_input_data)
decoder_input = np.array(decoder_input_data)
decoder_target = np.array(decoder_target_data)
encoder_input_data = []
decoder_input_data = []
decoder_target_data = []
n = 0
yield ([encoder_input, decoder_input], decoder_target)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment