analyticsindiamagazine · December 2, 2019 03:26
diff --git a/testing_prep.py b/testing_prep.py
 #TESTING
 max_seq_length = 128  #This number will determine the number of tokens
 
 #An example for tokenization
 s1 = train['STORY'].iloc[0]
 stokens1 = tokenizer.tokenize(s1)
 stokens1 = ["[CLS]"] + stokens1 + ["[SEP]"]
 
 input_ids1 = get_ids(stokens1, tokenizer, max_seq_length)
 input_masks1 = get_masks(stokens1, max_seq_length)
 input_segments1 = get_segments(stokens1, max_seq_length)
 
 print("IDS # len:" , len(input_ids1), " ::: ",input_ids1)
 print("MASKS # len:" , len(input_masks1), " ::: ",input_masks1)
 print("SEGEMNTS # len:" , len(input_segments1), " ::: ",input_segments1)
	#TESTING
	max_seq_length = 128 #This number will determine the number of tokens

	#An example for tokenization
	s1 = train['STORY'].iloc[0]
	stokens1 = tokenizer.tokenize(s1)
	stokens1 = ["[CLS]"] + stokens1 + ["[SEP]"]

	input_ids1 = get_ids(stokens1, tokenizer, max_seq_length)
	input_masks1 = get_masks(stokens1, max_seq_length)
	input_segments1 = get_segments(stokens1, max_seq_length)

	print("IDS # len:" , len(input_ids1), " ::: ",input_ids1)
	print("MASKS # len:" , len(input_masks1), " ::: ",input_masks1)
	print("SEGEMNTS # len:" , len(input_segments1), " ::: ",input_segments1)