Skip to content

Instantly share code, notes, and snippets.

# create word embeddings
import spacy
nlp = spacy.load('en_core_web_lg')
# create word embeddings
embedding_dimension = 300
embedding_matrix = np.zeros((vocab_len, embedding_dimension))
# travel through every word in vocabulary and get its corresponding vector
for word, index in tokenizer.word_index.items():
import keras
def create_model(max_caption_length, vocab_length):
# sub network for handling the image feature part
input_layer1 = keras.Input(shape=(18432))
feature1 = keras.layers.Dropout(0.2)(input_layer1)
feature2 = keras.layers.Dense(max_caption_length*4, activation='relu')(feature1)
feature3 = keras.layers.Dense(max_caption_length*4, activation='relu')(feature2)
feature4 = keras.layers.Dense(max_caption_length*4, activation='relu')(feature3)
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
# generator function to generate inputs for model
def create_trianing_data(captions, images, tokenizer, max_caption_length, vocab_len, photos_per_batch):
X1, X2, y = list(), list(), list()
n=0
# loop through every image
# compute length of vocabulary and maximum length of a caption (for padding)
vocab_len = len(tokenizer.word_counts) + 1
print(f"Vocabulary length - {vocab_len}")
max_caption_len = max([len(x.split(" ")) for x in all_captions])
print(f"Maximum length of caption - {max_caption_len}")
import spacy
nlp = spacy.load('en', disable=['tagger', 'parser', 'ner'])
# tokenize evry captions, remove punctuations, lowercase everything
for key, value in train_image_captions.items():
ls = []
for v in value:
doc = nlp(v)
new_v = " "
for token in doc:
# loading captions from captions file
import pandas as pd
# loading captions.txt
captions = pd.read_csv('/kaggle/input/flickr8k/captions.txt', sep=",")
captions = captions.rename(columns=lambda x: x.strip().lower())
captions['image'] = captions['image'].apply(lambda x: x.split(".")[0])
captions = captions[['image', 'caption']]
# adding <start> and <end> to every caption
captions['caption'] = "<start> " + captions['caption'] + " <end>"
import tensorflow as tf
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.models import Model
# load the ResNet50 Model
feature_extractor = ResNet50(weights='imagenet', include_top=False)
feature_extractor_new = Model(feature_extractor.input, feature_extractor.layers[-2].output)
feature_extractor_new.summary()
import tensorflow as tf
from keras.preprocessing import image
import numpy as np
# function to extract features from image
def extract_image_features():
model = tf.keras.models.Sequential()
# import statements
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
# loading training data
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
# adding a flatten layer to CNN
cnn.add(tf.keras.layers.Flatten())
# adding fully connected layers
cnn.add(tf.keras.layers.Dense(128, activation='relu'))
cnn.add(tf.keras.layers.Dense(64, activation='relu'))
# output layer -> 6 Neurons for 6 different classes
# activation function used for multiclass classification is softmax, for binary use sigmoid as activation fxn
cnn.add(tf.keras.layers.Dense(6, activation='softmax'))