a-agmon · June 29, 2020 04:09
diff --git a/s_token.py b/s_token.py

 VOCAB_SIZE = 750
 # take just the target feature
 clean_sequences = sequences.loc[:,FEAT_FIELD]
 # create a tokenizer with 750 'words' - 
 # we will have a number representing each of the top 750 wordsx
 tokenizer = Tokenizer(num_words=VOCAB_SIZE)
 # fit the tokenizer on our data
 tokenizer.fit_on_texts(clean_sequences)

 dictionary = tokenizer.word_index

	VOCAB_SIZE = 750
	# take just the target feature
	clean_sequences = sequences.loc[:,FEAT_FIELD]
	# create a tokenizer with 750 'words' -
	# we will have a number representing each of the top 750 wordsx
	tokenizer = Tokenizer(num_words=VOCAB_SIZE)
	# fit the tokenizer on our data
	tokenizer.fit_on_texts(clean_sequences)

	dictionary = tokenizer.word_index