Gaurav Bansal gauravbansal98

gauravbansal98 / load_clean_descriptions

Created May 13, 2020 12:35

	# load clean descriptions into memory
	def load_clean_descriptions(filename, dataset):
	# load document
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
	# split line by white space
	tokens = line.split()
	# split id from description
	image_id, image_desc = tokens[0], tokens[1:]

gauravbansal98 / load_set

Created May 13, 2020 12:33

gauravbansal98 / description

Created May 13, 2020 12:29

	2252123185_487f21e336 bunch on people are seated in stadium
	2252123185_487f21e336 crowded stadium is full of people watching an event
	2252123185_487f21e336 crowd of people fill up packed stadium
	2252123185_487f21e336 crowd sitting in an indoor stadium
	2252123185_487f21e336 stadium full of people watch game
	...

gauravbansal98 / save_descriptions

Created May 13, 2020 12:22

	# save descriptions to file, one per line
	def save_descriptions(descriptions, filename):
	lines = list()
	for key, desc_list in descriptions.items():
	for desc in desc_list:
	lines.append(key + ' ' + desc)
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()

gauravbansal98 / to_vocabulary

Created May 13, 2020 12:20

	# convert the loaded descriptions into a vocabulary of words
	def to_vocabulary(descriptions):
	# build a list of all description strings
	all_desc = set()
	for key in descriptions.keys():
	[all_desc.update(d.split()) for d in descriptions[key]]
	return all_desc

	# summarize vocabulary
	vocabulary = to_vocabulary(descriptions)

gauravbansal98 / clean_description

Created May 13, 2020 12:18

	import string

	def clean_descriptions(descriptions):
	# prepare translation table for removing punctuation
	table = str.maketrans('', '', string.punctuation)
	for key, desc_list in descriptions.items():
	for i in range(len(desc_list)):
	desc = desc_list[i]
	# tokenize
	desc = desc.split()

gauravbansal98 / load_description

Created May 13, 2020 12:17

	# extract descriptions for images
	def load_descriptions(doc):
	mapping = dict()
	# process lines
	for line in doc.split('\n'):
	# split line by white space
	tokens = line.split()
	if len(line) < 2:
	continue
	# take the first token as the image id, the rest as the description

gauravbansal98 / load_description

Created May 13, 2020 12:17

gauravbansal98 / load_doc

Created May 13, 2020 12:15

gauravbansal98 / load_doc

Created May 13, 2020 12:14