TutorialDoctor · May 5, 2020 10:36
diff --git a/student.py b/student.py
 import urllib,pprint
 from bs4 import BeautifulSoup

 def train(url):
 	html=urllib.urlopen(url).read()
 	soup = BeautifulSoup(html)
 	# kill all script and style elements
 	for script in soup(["script", "style"]):
 		script.extract()    # rip it out
 	text = soup.get_text()
 	# break into lines and remove leading and trailing space on each
 	lines = (line.strip() for line in text.splitlines())
 	# break multi-headlines into a line each
 	chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
 	# drop blank lines
 	text = '\n'.join(chunk for chunk in chunks if chunk)
 	
 	dic={}
 	#dic=set()
 	y=text.split('.')
 	#print(y)
 	for i in y:
 		if ' is ' in i:
 			#dic[i.split('is')[0].strip()]=i.split('is')[1].strip() #
 			dic.update({i.split('is')[0].strip():i.split(' is ')[1].strip()}) #unique?
 	pprint.PrettyPrinter(depth=2).pprint(dic)
 	print('COMPLETED TRAINING')

 train("https://en.m.wikipedia.org/wiki/Telugu_language")


 """
 # PYTHONISTA VERSION
 import urllib.request,pprint
 from bs4 import BeautifulSoup

 def train(url):
 	html=urllib.request.urlopen(url).read()
 	soup = BeautifulSoup(html,'html5lib')
 	# kill all script and style elements
 	for script in soup(["script", "style"]):
 		script.extract()    # rip it out
 	text = soup.get_text()
 	# break into lines and remove leading and trailing space on each
 	lines = (line.strip() for line in text.splitlines())
 	# break multi-headlines into a line each
 	chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
 	# drop blank lines
 	text = '\n'.join(chunk for chunk in chunks if chunk)
 	
 	dic={}
 	#dic=set()
 	y=text.split('.')
 	#print(y)
 	for i in y:
 		if ' is ' in i:
 			#dic[i.split('is')[0].strip()]=i.split('is')[1].strip() #
 			dic.update({i.split('is')[0].strip():i.split(' is ')[1].strip()}) #unique?
 	pprint.PrettyPrinter(depth=2).pprint(dic)
 	print('COMPLETED TRAINING')

 train("https://en.m.wikipedia.org/wiki/Telugu_language")
 """
	import urllib,pprint
	from bs4 import BeautifulSoup

	def train(url):
	html=urllib.urlopen(url).read()
	soup = BeautifulSoup(html)
	# kill all script and style elements
	for script in soup(["script", "style"]):
	script.extract() # rip it out
	text = soup.get_text()
	# break into lines and remove leading and trailing space on each
	lines = (line.strip() for line in text.splitlines())
	# break multi-headlines into a line each
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	# drop blank lines
	text = '\n'.join(chunk for chunk in chunks if chunk)

	dic={}
	#dic=set()
	y=text.split('.')
	#print(y)
	for i in y:
	if ' is ' in i:
	#dic[i.split('is')[0].strip()]=i.split('is')[1].strip() #
	dic.update({i.split('is')[0].strip():i.split(' is ')[1].strip()}) #unique?
	pprint.PrettyPrinter(depth=2).pprint(dic)
	print('COMPLETED TRAINING')

	train("https://en.m.wikipedia.org/wiki/Telugu_language")


	"""
	# PYTHONISTA VERSION
	import urllib.request,pprint
	from bs4 import BeautifulSoup

	def train(url):
	html=urllib.request.urlopen(url).read()
	soup = BeautifulSoup(html,'html5lib')
	# kill all script and style elements
	for script in soup(["script", "style"]):
	script.extract() # rip it out
	text = soup.get_text()
	# break into lines and remove leading and trailing space on each
	lines = (line.strip() for line in text.splitlines())
	# break multi-headlines into a line each
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	# drop blank lines
	text = '\n'.join(chunk for chunk in chunks if chunk)

	dic={}
	#dic=set()
	y=text.split('.')
	#print(y)
	for i in y:
	if ' is ' in i:
	#dic[i.split('is')[0].strip()]=i.split('is')[1].strip() #
	dic.update({i.split('is')[0].strip():i.split(' is ')[1].strip()}) #unique?
	pprint.PrettyPrinter(depth=2).pprint(dic)
	print('COMPLETED TRAINING')

	train("https://en.m.wikipedia.org/wiki/Telugu_language")
	"""
No results found