Shreyz-max · March 15, 2021 11:29
diff --git a/custom_generator.py b/custom_generator.py
 def load_datatest(train_path, epochs=100, x_data=x_data, tokenizer=tokenizer, num_decoder_tokens=1500,training_list=train_list, batch_size=32, maxlen=10):
    encoder_input_data = []
    decoder_input_data = []
    decoder_target_data = []
    videoId = []
    videoSeq = []
    # separating the videoId and the video captions
    for idx, cap in enumerate(training_list):
        caption = cap[0]
        videoId.append(cap[1])
        videoSeq.append(caption)
    # converting the captions to tokens and padding them to equal sizes
    train_sequences = tokenizer.texts_to_sequences(videoSeq)
    train_sequences = np.array(train_sequences)
    train_sequences = pad_sequences(train_sequences, padding='post',truncating='post', maxlen=maxlen)
    max_seq_length = train_sequences.shape[1]
    filesize = len(train_sequences)
    X_data = []
    y_data = []
    vCount = 0
    n = 0
    for i in range(epochs):
      for idx in  range(0,filesize):
        n += 1
        encoder_input_data.append(x_data[videoId[idx]])
        y = to_categorical(train_sequences[idx], num_decoder_tokens)
        decoder_input_data.append(y[:-1])
        decoder_target_data.append(y[1:])
        if n == batch_size:
          encoder_input = np.array(encoder_input_data)
          decoder_input = np.array(decoder_input_data)
          decoder_target = np.array(decoder_target_data)
          encoder_input_data = []
          decoder_input_data = []
          decoder_target_data = []
          n = 0
          yield ([encoder_input, decoder_input], decoder_target)
	def load_datatest(train_path, epochs=100, x_data=x_data, tokenizer=tokenizer, num_decoder_tokens=1500,training_list=train_list, batch_size=32, maxlen=10):
	encoder_input_data = []
	decoder_input_data = []
	decoder_target_data = []
	videoId = []
	videoSeq = []
	# separating the videoId and the video captions
	for idx, cap in enumerate(training_list):
	caption = cap[0]
	videoId.append(cap[1])
	videoSeq.append(caption)
	# converting the captions to tokens and padding them to equal sizes
	train_sequences = tokenizer.texts_to_sequences(videoSeq)
	train_sequences = np.array(train_sequences)
	train_sequences = pad_sequences(train_sequences, padding='post',truncating='post', maxlen=maxlen)
	max_seq_length = train_sequences.shape[1]
	filesize = len(train_sequences)
	X_data = []
	y_data = []
	vCount = 0
	n = 0
	for i in range(epochs):
	for idx in range(0,filesize):
	n += 1
	encoder_input_data.append(x_data[videoId[idx]])
	y = to_categorical(train_sequences[idx], num_decoder_tokens)
	decoder_input_data.append(y[:-1])
	decoder_target_data.append(y[1:])
	if n == batch_size:
	encoder_input = np.array(encoder_input_data)
	decoder_input = np.array(decoder_input_data)
	decoder_target = np.array(decoder_target_data)
	encoder_input_data = []
	decoder_input_data = []
	decoder_target_data = []
	n = 0
	yield ([encoder_input, decoder_input], decoder_target)