Madhivarman · January 17, 2018 16:44
diff --git a/text_segment.py b/text_segment.py
 """Sentence segmentation, means, to split a given paragraph of text into sentences, by identifying the sentence boundaries.
 In many cases, a full stop is all that is required to identify the end of a sentence, but the task is not all that simple.
 This is an open ended challenge to which there are no perfect solutions. Try to break up given paragraphs into text into 
 individual sentences. Even if you don't manage to segment the text perfectly, the more sentences you identify and display 
 correctly, the more you will score."""


 import nltk
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize

 class TextSegmentation():

 	def  segmentation(self,text,pos_tagging):
 		count = 0
 		complete_sentence=[]
 		#loop through the sentence
 		for words in pos_tagging:
 			#count number of sentences 
 			if words[1] == '.':
 				count += 1

 		print("Number of sentences:{}".format(count))

 		#now split the sentences
 		for words in pos_tagging:

 			if words[1] == '.':
 				replace = words[0].replace(".","stopped")
 				complete_sentence.append(replace)
 			else:
 				complete_sentence.append(words[0])

 		print("Complete Sentence:{}".format(complete_sentence))

 		#now we can split
 		join_sentence = " ".join(complete_sentence)
 		#print("joined_sentence:{}".format(join_sentence))
 		final_splitted_sentence = join_sentence.split("stopped")
 		print("Splitted Sentence is:{}\n".format(final_splitted_sentence))

 		return final_splitted_sentence

 	def printoutput(self,result):
 		count = 1
 		print("Final result is:")
 		print("-------------------------------------------------------------")
 		for sent in result:
 			print(count,sent)
 			count +=1

 def main(text):

 	word_tokens = word_tokenize(text)

 	#pos tagging
 	pos_tag_sentence =  nltk.pos_tag(word_tokens)
 	
 	return pos_tag_sentence

 def isconditionistrue(user_text,pos):

 	#list to count number of characters
 	char = []

 	for w in user_text:
 		char.append(w)

 	if len(w) <= 10000:
 		if len(pos) <= 1000:
 			return "1"
 	else:
 		return "0"

 if __name__ == '__main__':

 	text = raw_input("Enter your text here:\n")
 	#pos_tag sentence 
 	#To find its grammer
 	pos_tagging = main(text) #return as list
 	#print(pos_tagging)
 	#condition to meet certain constraints
 	if(isconditionistrue(text,pos_tagging) == '1'):
 		obj = TextSegmentation()
 		split = obj.segmentation(text,pos_tagging)
 		obj.printoutput(split)
 	else:
 		print("Certain constraints failed")
	"""Sentence segmentation, means, to split a given paragraph of text into sentences, by identifying the sentence boundaries.
	In many cases, a full stop is all that is required to identify the end of a sentence, but the task is not all that simple.
	This is an open ended challenge to which there are no perfect solutions. Try to break up given paragraphs into text into
	individual sentences. Even if you don't manage to segment the text perfectly, the more sentences you identify and display
	correctly, the more you will score."""


	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize

	class TextSegmentation():

	def segmentation(self,text,pos_tagging):
	count = 0
	complete_sentence=[]
	#loop through the sentence
	for words in pos_tagging:
	#count number of sentences
	if words[1] == '.':
	count += 1

	print("Number of sentences:{}".format(count))

	#now split the sentences
	for words in pos_tagging:

	if words[1] == '.':
	replace = words[0].replace(".","stopped")
	complete_sentence.append(replace)
	else:
	complete_sentence.append(words[0])

	print("Complete Sentence:{}".format(complete_sentence))

	#now we can split
	join_sentence = " ".join(complete_sentence)
	#print("joined_sentence:{}".format(join_sentence))
	final_splitted_sentence = join_sentence.split("stopped")
	print("Splitted Sentence is:{}\n".format(final_splitted_sentence))

	return final_splitted_sentence

	def printoutput(self,result):
	count = 1
	print("Final result is:")
	print("-------------------------------------------------------------")
	for sent in result:
	print(count,sent)
	count +=1

	def main(text):

	word_tokens = word_tokenize(text)

	#pos tagging
	pos_tag_sentence = nltk.pos_tag(word_tokens)

	return pos_tag_sentence

	def isconditionistrue(user_text,pos):

	#list to count number of characters
	char = []

	for w in user_text:
	char.append(w)

	if len(w) <= 10000:
	if len(pos) <= 1000:
	return "1"
	else:
	return "0"

	if __name__ == '__main__':

	text = raw_input("Enter your text here:\n")
	#pos_tag sentence
	#To find its grammer
	pos_tagging = main(text) #return as list
	#print(pos_tagging)
	#condition to meet certain constraints
	if(isconditionistrue(text,pos_tagging) == '1'):
	obj = TextSegmentation()
	split = obj.segmentation(text,pos_tagging)
	obj.printoutput(split)
	else:
	print("Certain constraints failed")
No results found