This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create word embeddings | |
import spacy | |
nlp = spacy.load('en_core_web_lg') | |
# create word embeddings | |
embedding_dimension = 300 | |
embedding_matrix = np.zeros((vocab_len, embedding_dimension)) | |
# travel through every word in vocabulary and get its corresponding vector | |
for word, index in tokenizer.word_index.items(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import keras | |
def create_model(max_caption_length, vocab_length): | |
# sub network for handling the image feature part | |
input_layer1 = keras.Input(shape=(18432)) | |
feature1 = keras.layers.Dropout(0.2)(input_layer1) | |
feature2 = keras.layers.Dense(max_caption_length*4, activation='relu')(feature1) | |
feature3 = keras.layers.Dense(max_caption_length*4, activation='relu')(feature2) | |
feature4 = keras.layers.Dense(max_caption_length*4, activation='relu')(feature3) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.preprocessing.sequence import pad_sequences | |
from keras.utils import to_categorical | |
# generator function to generate inputs for model | |
def create_trianing_data(captions, images, tokenizer, max_caption_length, vocab_len, photos_per_batch): | |
X1, X2, y = list(), list(), list() | |
n=0 | |
# loop through every image |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compute length of vocabulary and maximum length of a caption (for padding) | |
vocab_len = len(tokenizer.word_counts) + 1 | |
print(f"Vocabulary length - {vocab_len}") | |
max_caption_len = max([len(x.split(" ")) for x in all_captions]) | |
print(f"Maximum length of caption - {max_caption_len}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
nlp = spacy.load('en', disable=['tagger', 'parser', 'ner']) | |
# tokenize evry captions, remove punctuations, lowercase everything | |
for key, value in train_image_captions.items(): | |
ls = [] | |
for v in value: | |
doc = nlp(v) | |
new_v = " " | |
for token in doc: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# loading captions from captions file | |
import pandas as pd | |
# loading captions.txt | |
captions = pd.read_csv('/kaggle/input/flickr8k/captions.txt', sep=",") | |
captions = captions.rename(columns=lambda x: x.strip().lower()) | |
captions['image'] = captions['image'].apply(lambda x: x.split(".")[0]) | |
captions = captions[['image', 'caption']] | |
# adding <start> and <end> to every caption | |
captions['caption'] = "<start> " + captions['caption'] + " <end>" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from keras.preprocessing import image | |
from keras.applications.resnet50 import ResNet50 | |
from keras.applications.resnet50 import preprocess_input | |
from keras.models import Model | |
# load the ResNet50 Model | |
feature_extractor = ResNet50(weights='imagenet', include_top=False) | |
feature_extractor_new = Model(feature_extractor.input, feature_extractor.layers[-2].output) | |
feature_extractor_new.summary() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from keras.preprocessing import image | |
import numpy as np | |
# function to extract features from image | |
def extract_image_features(): | |
model = tf.keras.models.Sequential() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import statements | |
import tensorflow as tf | |
from keras.preprocessing.image import ImageDataGenerator | |
import numpy as np | |
# loading training data | |
train_datagen = ImageDataGenerator( | |
rescale=1./255, | |
shear_range=0.2, | |
zoom_range=0.2, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# adding a flatten layer to CNN | |
cnn.add(tf.keras.layers.Flatten()) | |
# adding fully connected layers | |
cnn.add(tf.keras.layers.Dense(128, activation='relu')) | |
cnn.add(tf.keras.layers.Dense(64, activation='relu')) | |
# output layer -> 6 Neurons for 6 different classes | |
# activation function used for multiclass classification is softmax, for binary use sigmoid as activation fxn | |
cnn.add(tf.keras.layers.Dense(6, activation='softmax')) |