rohit-gupta · September 19, 2017 10:29
diff --git a/breitbart_comment_gen.py b/breitbart_comment_gen.py
 from keras.utils import Sequence

 class BreitbartCommentsSequence(Sequence):
 def __init__(self, comments, vocab_size, previous_words, batch_size):
    all_comments_text = ' '.join(comments)
    all_words = all_comments_text.split(" ")
    vocabulary = collections.Counter(all_words)
    self.vocabulary = [word for word, count in vocabulary.most_common(vocab_size)]
    self.previous_words = previous_words
    self.batch_size = batch_size
    self.comments = comments

 def __len__(self):
    return len(self.comments) // self.batch_size

 def __getitem__(self,idx):
    # TODO preprocess comments into beam 
    batch = comments[idx*self.batch_size:(idx+1)*self.batch_size]
    batch_x = 
    batch_y = 

    return np.array([
    resize(imread(file_name), (200,200))
       for file_name in batch_x]), np.array(batch_y)


 import pandas
 comments = pandas.read_pickle("breitbart.pickle")
 comment_texts = comments['comment_text']

 VOCAB_SIZE = 5000
 BATCH_SIZE = 16
 PREV_WORDS = 16

 train_generator = BreitbartCommentsSequence(comment_texts, VOCAB_SIZE, PREV_WORDS, BATCH_SIZE)
	from keras.utils import Sequence

	class BreitbartCommentsSequence(Sequence):
	def __init__(self, comments, vocab_size, previous_words, batch_size):
	all_comments_text = ' '.join(comments)
	all_words = all_comments_text.split(" ")
	vocabulary = collections.Counter(all_words)
	self.vocabulary = [word for word, count in vocabulary.most_common(vocab_size)]
	self.previous_words = previous_words
	self.batch_size = batch_size
	self.comments = comments

	def __len__(self):
	return len(self.comments) // self.batch_size

	def __getitem__(self,idx):
	# TODO preprocess comments into beam
	batch = comments[idxself.batch_size:(idx+1)self.batch_size]
	batch_x =
	batch_y =

	return np.array([
	resize(imread(file_name), (200,200))
	for file_name in batch_x]), np.array(batch_y)


	import pandas
	comments = pandas.read_pickle("breitbart.pickle")
	comment_texts = comments['comment_text']

	VOCAB_SIZE = 5000
	BATCH_SIZE = 16
	PREV_WORDS = 16

	train_generator = BreitbartCommentsSequence(comment_texts, VOCAB_SIZE, PREV_WORDS, BATCH_SIZE)