anmolj7 · November 20, 2018 21:02
diff --git a/extractiveAlgorithm.py b/extractiveAlgorithm.py
 #This code is written and tested in python 2.7
 #The library NLTK has to be installed first.
 import re, nltk, heapq 
 class Summary:
 	def summary(self, article_text, n=5): # n indicates the number of lines of summary required 
 		# article_text = re.sub(r'\[[0-9]*\]', ' ', text)
 		# article_text = re.sub(r'\s+', ' ', article_text)
 		formatted_article_text = re.sub('[^a-zA-Z]', ' ', article_text )  
 		formatted_article_text = re.sub(r'\s+', ' ', formatted_article_text)  
 		sentence_list = nltk.sent_tokenize(article_text)  
 		stopwords = nltk.corpus.stopwords.words('english')
 		word_freq = {}
 		for word in nltk.word_tokenize(formatted_article_text):
 			if word not in stopwords:
 				if word not in word_freq.keys():
 					word_freq[word] = 1 
 				else:
 					word_freq[word] += 1 
 		maximum_freq = max(word_freq.values())
 		for word in word_freq.keys():
 			word_freq[word] = (word_freq[word]/float(maximum_freq))
 		sentence_scores = {}
 		for sent in sentence_list:
 			for word in nltk.word_tokenize(sent.lower()):
 				if word in word_freq.keys():
 					if len(sent.split(' ')) < 30:
 						if sent not in sentence_scores.keys():
 							sentence_scores[sent] = word_freq[word]
 						else:
 							sentence_scores[sent] += word_freq[word]
 		summary_sents = heapq.nlargest(n, sentence_scores, key=sentence_scores.get)
 		summary = ' '.join(summary_sents)
 		return summary

 article_text = raw_input('Enter The Input Article Text: ')
 S = Summary()
 print(S.summary(article_text))
	#This code is written and tested in python 2.7
	#The library NLTK has to be installed first.
	import re, nltk, heapq
	class Summary:
	def summary(self, article_text, n=5): # n indicates the number of lines of summary required
	# article_text = re.sub(r'\[[0-9]*\]', ' ', text)
	# article_text = re.sub(r'\s+', ' ', article_text)
	formatted_article_text = re.sub('[^a-zA-Z]', ' ', article_text )
	formatted_article_text = re.sub(r'\s+', ' ', formatted_article_text)
	sentence_list = nltk.sent_tokenize(article_text)
	stopwords = nltk.corpus.stopwords.words('english')
	word_freq = {}
	for word in nltk.word_tokenize(formatted_article_text):
	if word not in stopwords:
	if word not in word_freq.keys():
	word_freq[word] = 1
	else:
	word_freq[word] += 1
	maximum_freq = max(word_freq.values())
	for word in word_freq.keys():
	word_freq[word] = (word_freq[word]/float(maximum_freq))
	sentence_scores = {}
	for sent in sentence_list:
	for word in nltk.word_tokenize(sent.lower()):
	if word in word_freq.keys():
	if len(sent.split(' ')) < 30:
	if sent not in sentence_scores.keys():
	sentence_scores[sent] = word_freq[word]
	else:
	sentence_scores[sent] += word_freq[word]
	summary_sents = heapq.nlargest(n, sentence_scores, key=sentence_scores.get)
	summary = ' '.join(summary_sents)
	return summary

	article_text = raw_input('Enter The Input Article Text: ')
	S = Summary()
	print(S.summary(article_text))