Created
May 13, 2020 12:35
-
-
Save gauravbansal98/dafdafb8a51137ed85b402326a8af589 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # load clean descriptions into memory | |
| def load_clean_descriptions(filename, dataset): | |
| # load document | |
| doc = load_doc(filename) | |
| descriptions = dict() | |
| for line in doc.split('\n'): | |
| # split line by white space | |
| tokens = line.split() | |
| # split id from description | |
| image_id, image_desc = tokens[0], tokens[1:] | |
| # skip images not in the set | |
| if image_id in dataset: | |
| # create list | |
| if image_id not in descriptions: | |
| descriptions[image_id] = list() | |
| # wrap description in tokens | |
| desc = 'startseq ' + ' '.join(image_desc) + ' endseq' | |
| # store | |
| descriptions[image_id].append(desc) | |
| return descriptions |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment