Last active
May 14, 2020 11:47
-
-
Save gauravbansal98/22720008a175b01ecb7174c20fe14895 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# map an integer to a word | |
def word_for_id(integer, tokenizer): | |
for word, index in tokenizer.word_index.items(): | |
if index == integer: | |
return word | |
return None | |
# generate a description for an image | |
def generate_desc(model, tokenizer, photo, max_length): | |
# seed the generation process | |
in_text = 'startseq' | |
# iterate over the whole length of the sequence | |
for i in range(max_length): | |
# integer encode input sequence | |
sequence = tokenizer.texts_to_sequences([in_text])[0] | |
# pad input | |
sequence = pad_sequences([sequence], maxlen=max_length) | |
# predict next word | |
yhat = model.predict([photo,sequence], verbose=0) | |
# convert probability to integer | |
yhat = argmax(yhat) | |
# map integer to word | |
word = word_for_id(yhat, tokenizer) | |
# stop if we cannot map the word | |
if word is None: | |
break | |
# append as input for generating the next word | |
in_text += ' ' + word | |
# stop if we predict the end of the sequence | |
if word == 'endseq': | |
break | |
return in_text |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment