MarksCode · March 8, 2016 06:07
diff --git a/thing.py b/thing.py
 import string

 ######## Begin code which needs to be modified ##########

 # Implementation 1
 class MyChainDict(object):
 	def __init__(self):
 		self.dictSize = 10000
 		self.mydict = []
 		for i in range(self.dictSize):
 			self.mydict.append([])
 		return
 	
 	def insert(self, key, value=None):
 		bucketNum = self.hashFunc(key)
 		doesContain = self.contains(key)
 		if not doesContain:
 			self.mydict[bucketNum].append([key, value])
 			return
 		for x in self.mydict[bucketNum]:
 			if (key in x):
 				x[1] = value
 				return

 	def contains(self, key):
 		bucketNum = self.hashFunc(key);
 		doesContain = False
 		for x in self.mydict[bucketNum]:
 			if (key in x):
 				doesContain = True
 		return (doesContain)

 	def value(self, key):
 		bucketNum = self.hashFunc(key);
 		doesContain = self.contains(key)
 		if not doesContain:
 			return False
 		else:
 			for x in self.mydict[bucketNum]:
 				if (key in x):
 					return x[1]
 			
 	def delete(self, key):
 		bucketNum = self.hashFunc(key)
 		index = 0
 		for x in self.mydict[bucketNum]:
 			if (key == x[0]):
 				self.mydict[bucketNum].pop(index)
 			else:
 				index = index + 1
 		return

 	def get_key_values(self):
 		myArray = []
 		for x in self.mydict:
 				for y in x:
 					myArray.append((y[0], y[1]))
 		return myArray
 	
 	def dump(self):
 		for i in self.mydict:
 			for x in i:
 				print (x)
 		
 			
 	def hashFunc(self, key):
 		return hash(key)%self.dictSize;
 			
 			
 # Implementation 2
 class MyOpenLinearDict(object):
 	def __init__(self):
 		self.dictSize = 500000
 		self.mydict = []
 		for i in range(self.dictSize):
 			self.mydict.append(["q", -1])
 		return
 	
 	def insert(self, key, value=None):
 		iterations = 0
 		bucketNum = self.hashFunc(key)
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
 			bucketNum = bucketNum + 1
 			iterations = iterations + 1
 			if (bucketNum > self.dictSize-1):
 				bucketNum = 0
 		self.mydict[bucketNum] = [key, value]

 	def contains(self, key):
 		iterations = 0
 		bucketNum = self.hashFunc(key)
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
 			bucketNum = bucketNum + 1
 			iterations = iterations + 1
 			if (bucketNum > self.dictSize-1):
 				bucketNum = 0
 		if (self.mydict[bucketNum][0] == key):
 			return True
 		else:
 			return False

 	def value(self, key):
 		bucketNum = self.hashFunc(key)
 		iterations = 0
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
 			bucketNum = bucketNum + 1
 			iterations = iterations + 1
 			if (bucketNum > self.dictSize-1):
 				bucketNum = 0
 		if (self.mydict[bucketNum][0] == key):
 			return self.mydict[bucketNum][1]

 	def delete(self, key):
 		bucketNum = self.hashFunc(key)
 		iterations = 0
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
 			bucketNum = bucketNum + 1
 			iterations = iterations + 1
 			if (bucketNum > self.dictSize-1):
 				bucketNum = 0
 		if (self.mydict[bucketNum][0] == key):
 			self.mydict[bucketNum] = ["Deleted", -1]
 		return

 	def get_key_values(self):
 		myArray = []
 		for x in self.mydict:
 			if (x[0] != "q"):
 				myArray.append((x[0], x[1]))
 		return myArray
 	
 	def dump(self):
 		for x in self.mydict:
 			print (x[0], x[1])
 	
 	def hashFunc(self, key):
 		return hash(key)%self.dictSize
 	
 # Implementation 3
 class MyOpenQuadDict(object):
 	def __init__(self):
 		self.dictSize = 500000
 		self.mydict = []
 		for i in range(self.dictSize):
 			self.mydict.append(["q", -1])
 		return
 	
 	def insert(self, key, value=None):
 		iterations = 0
 		bucketNum = self.hashFunc(key, iterations)
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
 			iterations = iterations + 1
 			bucketNum = self.hashFunc(key, iterations)
 		self.mydict[bucketNum] = [key, value]

 	def contains(self, key):
 		iterations = 0
 		bucketNum = self.hashFunc(key, iterations)
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
 			iterations = iterations + 1
 			bucketNum = self.hashFunc(key, iterations)
 		if (self.mydict[bucketNum][0] == key):
 			return True
 		else:
 			return False

 	def value(self, key):
 		iterations = 0
 		bucketNum = self.hashFunc(key, iterations)
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
 			iterations = iterations + 1
 			bucketNum = self.hashFunc(key, iterations)
 		if (self.mydict[bucketNum][0] == key):
 			return self.mydict[bucketNum][1]

 	def delete(self, key):
 		iterations = 0
 		bucketNum = self.hashFunc(key, iterations)
 		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
 			iterations = iterations + 1
 			bucketNum = self.hashFunc(key, iterations)
 		if (self.mydict[bucketNum][0] == key):
 			self.mydict[bucketNum] = ["Deleted", -1]
 		return

 	def get_key_values(self):
 		myArray = []
 		for x in self.mydict:
 			if (x[0] != "q"):
 				myArray.append((x[0], x[1]))
 		return myArray
 	
 	def dump(self):
 		for x in self.mydict:
 			if (x[0] != "q"):
 				print ((x[0], x[1]))
 			
 	def hashFunc(self, key, iteration):
 		return (hash(key)+10*iteration+10*iteration^2)%self.dictSize
 		
 			
 class MySet(object):
 	def __init__(self):
 		self.myset = MyOpenQuadDict()
 		return

 	def insert(self, key):
 		self.myset.insert(key)
 		return
 	
 	def contains(self, key):
 		return self.myset.contains(key)

 	def dump(self):
 		self.myset.dump()

 ######## End code which needs to be modified ##########


 # Store the set of stop words in a set object
 stop_words_file = "stopwords.txt"
 stop_words = MySet()

 with open(stop_words_file) as f:
 	for l in f:
 		stop_words.insert(l.strip())


 # Download file from https://snap.stanford.edu/data/finefoods.txt.gz        
 # Remember to gunzip before using
 review_file = "foods.txt"

 review_words = []
 for i in range(5):
 	review_words.append(MyOpenQuadDict())
 	
 with open(review_file, encoding='LATIN-1') as f:
 	lines = f.readlines()
 	idx = 0
 	num_lines = len(lines) - 2
 	while idx < num_lines:
 		while not lines[idx].startswith("review/score"):
 			idx = idx + 1

 		# Jump through hoops to satisfy the Unicode encodings 
 		l = lines[idx].encode('ascii', 'ignore').decode('ascii')
 		l = l.strip().split(":")[1].strip()

 		# Extract rating
 		rating = int(float(l))
 		while not lines[idx].startswith("review/text"):
 			idx = idx + 1
 			
 		l = lines[idx].encode('ascii', 'ignore').decode('ascii').strip().lower()
 		text = l.split(":")[1].strip()
 		text = text.translate(str.maketrans("","", string.punctuation))

 		# Extract words in the text 
 		words = text.split(" ")
 		# words = [x.strip(string.punctuation) for x in text.split(" ")]
 		for w in words:
 			if len(w) and not stop_words.contains(w):
 				if review_words[rating - 1].contains(w):
 					count = review_words[rating - 1].value(w) + 1
 				else:
 					count = 1
 				review_words[rating - 1].insert(w, count)

 # Print out the top words for each of the five ratings 

 for i in range(5):
 	freq_words = sorted(review_words[i].get_key_values(), key=lambda tup: tup[1], reverse=True)
 	print(i+1, freq_words[0:20])
	import string

	######## Begin code which needs to be modified ##########

	# Implementation 1
	class MyChainDict(object):
	def __init__(self):
	self.dictSize = 10000
	self.mydict = []
	for i in range(self.dictSize):
	self.mydict.append([])
	return

	def insert(self, key, value=None):
	bucketNum = self.hashFunc(key)
	doesContain = self.contains(key)
	if not doesContain:
	self.mydict[bucketNum].append([key, value])
	return
	for x in self.mydict[bucketNum]:
	if (key in x):
	x[1] = value
	return

	def contains(self, key):
	bucketNum = self.hashFunc(key);
	doesContain = False
	for x in self.mydict[bucketNum]:
	if (key in x):
	doesContain = True
	return (doesContain)

	def value(self, key):
	bucketNum = self.hashFunc(key);
	doesContain = self.contains(key)
	if not doesContain:
	return False
	else:
	for x in self.mydict[bucketNum]:
	if (key in x):
	return x[1]

	def delete(self, key):
	bucketNum = self.hashFunc(key)
	index = 0
	for x in self.mydict[bucketNum]:
	if (key == x[0]):
	self.mydict[bucketNum].pop(index)
	else:
	index = index + 1
	return

	def get_key_values(self):
	myArray = []
	for x in self.mydict:
	for y in x:
	myArray.append((y[0], y[1]))
	return myArray

	def dump(self):
	for i in self.mydict:
	for x in i:
	print (x)


	def hashFunc(self, key):
	return hash(key)%self.dictSize;


	# Implementation 2
	class MyOpenLinearDict(object):
	def __init__(self):
	self.dictSize = 500000
	self.mydict = []
	for i in range(self.dictSize):
	self.mydict.append(["q", -1])
	return

	def insert(self, key, value=None):
	iterations = 0
	bucketNum = self.hashFunc(key)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	self.mydict[bucketNum] = [key, value]

	def contains(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	if (self.mydict[bucketNum][0] == key):
	return True
	else:
	return False

	def value(self, key):
	bucketNum = self.hashFunc(key)
	iterations = 0
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	if (self.mydict[bucketNum][0] == key):
	return self.mydict[bucketNum][1]

	def delete(self, key):
	bucketNum = self.hashFunc(key)
	iterations = 0
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	if (self.mydict[bucketNum][0] == key):
	self.mydict[bucketNum] = ["Deleted", -1]
	return

	def get_key_values(self):
	myArray = []
	for x in self.mydict:
	if (x[0] != "q"):
	myArray.append((x[0], x[1]))
	return myArray

	def dump(self):
	for x in self.mydict:
	print (x[0], x[1])

	def hashFunc(self, key):
	return hash(key)%self.dictSize

	# Implementation 3
	class MyOpenQuadDict(object):
	def __init__(self):
	self.dictSize = 500000
	self.mydict = []
	for i in range(self.dictSize):
	self.mydict.append(["q", -1])
	return

	def insert(self, key, value=None):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	self.mydict[bucketNum] = [key, value]

	def contains(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	if (self.mydict[bucketNum][0] == key):
	return True
	else:
	return False

	def value(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	if (self.mydict[bucketNum][0] == key):
	return self.mydict[bucketNum][1]

	def delete(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	if (self.mydict[bucketNum][0] == key):
	self.mydict[bucketNum] = ["Deleted", -1]
	return

	def get_key_values(self):
	myArray = []
	for x in self.mydict:
	if (x[0] != "q"):
	myArray.append((x[0], x[1]))
	return myArray

	def dump(self):
	for x in self.mydict:
	if (x[0] != "q"):
	print ((x[0], x[1]))

	def hashFunc(self, key, iteration):
	return (hash(key)+10iteration+10iteration^2)%self.dictSize


	class MySet(object):
	def __init__(self):
	self.myset = MyOpenQuadDict()
	return

	def insert(self, key):
	self.myset.insert(key)
	return

	def contains(self, key):
	return self.myset.contains(key)

	def dump(self):
	self.myset.dump()

	######## End code which needs to be modified ##########


	# Store the set of stop words in a set object
	stop_words_file = "stopwords.txt"
	stop_words = MySet()

	with open(stop_words_file) as f:
	for l in f:
	stop_words.insert(l.strip())


	# Download file from https://snap.stanford.edu/data/finefoods.txt.gz
	# Remember to gunzip before using
	review_file = "foods.txt"

	review_words = []
	for i in range(5):
	review_words.append(MyOpenQuadDict())

	with open(review_file, encoding='LATIN-1') as f:
	lines = f.readlines()
	idx = 0
	num_lines = len(lines) - 2
	while idx < num_lines:
	while not lines[idx].startswith("review/score"):
	idx = idx + 1

	# Jump through hoops to satisfy the Unicode encodings
	l = lines[idx].encode('ascii', 'ignore').decode('ascii')
	l = l.strip().split(":")[1].strip()

	# Extract rating
	rating = int(float(l))
	while not lines[idx].startswith("review/text"):
	idx = idx + 1

	l = lines[idx].encode('ascii', 'ignore').decode('ascii').strip().lower()
	text = l.split(":")[1].strip()
	text = text.translate(str.maketrans("","", string.punctuation))

	# Extract words in the text
	words = text.split(" ")
	# words = [x.strip(string.punctuation) for x in text.split(" ")]
	for w in words:
	if len(w) and not stop_words.contains(w):
	if review_words[rating - 1].contains(w):
	count = review_words[rating - 1].value(w) + 1
	else:
	count = 1
	review_words[rating - 1].insert(w, count)

	# Print out the top words for each of the five ratings

	for i in range(5):
	freq_words = sorted(review_words[i].get_key_values(), key=lambda tup: tup[1], reverse=True)
	print(i+1, freq_words[0:20])
No results found