kratsg · July 19, 2021 20:34
diff --git a/README.md b/README.md
diff --git a/split.py b/split.py
 $ cat split.py 
 import re
 output= open("new_parsed.txt","a+")
 from nltk.tokenize import word_tokenize
 path = '2021-01-28 Machine Learning.txt'

 n = 10                                           #number of words in each fragment
 f= open(path,'r')
 g=f.readlines()

 for x in g:
 	if (len(x.split())) > n:
 		str1 = ""
 		i = 1
 		for ele in x.split():
 			str1 += ele+' '
 			i += 1
 			if (i) % n == 0:
 				output.write(str1+'\n')
 				str1 = ''
 			if (i-1) == len(x.split()):
 				output.write(str1+'\n')
 	elif (((len(x.split())) > 0) and ( (len(x.split())) <= n )):
 		output.write(x+'\n')
 f.close()
	$ cat split.py
	import re
	output= open("new_parsed.txt","a+")
	from nltk.tokenize import word_tokenize
	path = '2021-01-28 Machine Learning.txt'

	n = 10 #number of words in each fragment
	f= open(path,'r')
	g=f.readlines()

	for x in g:
	if (len(x.split())) > n:
	str1 = ""
	i = 1
	for ele in x.split():
	str1 += ele+' '
	i += 1
	if (i) % n == 0:
	output.write(str1+'\n')
	str1 = ''
	if (i-1) == len(x.split()):
	output.write(str1+'\n')
	elif (((len(x.split())) > 0) and ( (len(x.split())) <= n )):
	output.write(x+'\n')
	f.close()