sharvaridhote · January 10, 2021 23:21
diff --git a/scraper.py b/scraper.py
 # crawling website
 def getLinks(url):
    html_page = urlopen(url)
    soup = BeautifulSoup(html_page)
    total_pages = []
    try:
        for link in soup.find_all('a', href=True):
            if link.get('href') not in total_pages:
                total_pages.append(link.get('href'))
    except:
          print("An exception occurred")
    return total_pages
    
 total_links = getLinks("https://en.wikipedia.org/wiki/Wikipedia:Featured_articles") 
 print(len(total_links))
	# crawling website
	def getLinks(url):
	html_page = urlopen(url)
	soup = BeautifulSoup(html_page)
	total_pages = []
	try:
	for link in soup.find_all('a', href=True):
	if link.get('href') not in total_pages:
	total_pages.append(link.get('href'))
	except:
	print("An exception occurred")
	return total_pages

	total_links = getLinks("https://en.wikipedia.org/wiki/Wikipedia:Featured_articles")
	print(len(total_links))
No results found