calthoff · December 4, 2019 03:29
diff --git a/scraper_file.py b/scraper_file.py
 import urllib.request
 from bs4 import BeautifulSoup


 class Scraper:
    def __init__(self, site):
        self.site = site

    def scrape(self):
        response = urllib.request.urlopen(self.site)
        html = response.read()
        sp = BeautifulSoup(html, 'html.parser')
        with open("output.txt", "w") as f:
            for tag in sp.find_all('a'):
                url = tag.get('href')
                if url and 'html' in url:
                    print("\n" + url)
                    f.write(url + "\n")

 Scraper('https://news.google.com/').scrape()
	import urllib.request
	from bs4 import BeautifulSoup


	class Scraper:
	def __init__(self, site):
	self.site = site

	def scrape(self):
	response = urllib.request.urlopen(self.site)
	html = response.read()
	sp = BeautifulSoup(html, 'html.parser')
	with open("output.txt", "w") as f:
	for tag in sp.find_all('a'):
	url = tag.get('href')
	if url and 'html' in url:
	print("\n" + url)
	f.write(url + "\n")

	Scraper('https://news.google.com/').scrape()
No results found