Created
May 13, 2020 12:17
-
-
Save gauravbansal98/9155a41cb98f35d814264e7b1ca59a57 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # extract descriptions for images | |
| def load_descriptions(doc): | |
| mapping = dict() | |
| # process lines | |
| for line in doc.split('\n'): | |
| # split line by white space | |
| tokens = line.split() | |
| if len(line) < 2: | |
| continue | |
| # take the first token as the image id, the rest as the description | |
| image_id, image_desc = tokens[0], tokens[1:] | |
| # remove filename from image id | |
| image_id = image_id.split('.')[0] | |
| # convert description tokens back to string | |
| image_desc = ' '.join(image_desc) | |
| # create the list if needed | |
| if image_id not in mapping: | |
| mapping[image_id] = list() | |
| # store description | |
| mapping[image_id].append(image_desc) | |
| return mapping | |
| # parse descriptions | |
| descriptions = load_descriptions(doc) | |
| print('Loaded: %d ' % len(descriptions)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment