LordGhostX · January 14, 2021 15:29
diff --git a/quote-scraper.py b/quote-scraper.py
 import requests
 from bs4 import BeautifulSoup


 def get_quotes(tag, page):
    quotes = []

    r = requests.get(f"http://quotes.toscrape.com/tag/{tag}/page/{page}/")
    if r.status_code == 200:
        quotes_page = BeautifulSoup(r.text, "html.parser")
        for i in quotes_page.find_all("div", {"class": "quote"}):
            try:
                quotes.append({
                    "text": i.find("span").text.strip()[1:-1],
                    "author": i.find("small").text.strip(),
                    "tags": [j.text.strip() for j in i.find("div", {"class": "tags"}).find_all("a")]
                })
            except:
                pass

    return quotes


 def main(tag, start, end):
    quotes = []

    if end == -1:
        end = start
        while True:
            new_quotes = get_quotes(tag, end)
            quotes += new_quotes
            if new_quotes == []:
                break
            end += 1
        end -= 1
    else:
        for page in range(start, end + 1):
            quotes += get_quotes(tag, page)

    with open(f"{tag}-quotes-{start}-{end}.csv", "w") as f:
        f.write("author;text;tags\n")
        for i in quotes:
            f.write(f"{i['author']};{i['text']};{','.join(i['tags'])}\n")


 if __name__ == "__main__":
    tag = input("Enter quotes tag you want to scrape e.g love, life: ")
    start = int(input("Enter page to start scraping from e.g 1, 5, 3: "))
    end = int(input("Enter page to stop scraping from (-1 means unlimited): "))

    main(tag, start, end)
	import requests
	from bs4 import BeautifulSoup


	def get_quotes(tag, page):
	quotes = []

	r = requests.get(f"http://quotes.toscrape.com/tag/{tag}/page/{page}/")
	if r.status_code == 200:
	quotes_page = BeautifulSoup(r.text, "html.parser")
	for i in quotes_page.find_all("div", {"class": "quote"}):
	try:
	quotes.append({
	"text": i.find("span").text.strip()[1:-1],
	"author": i.find("small").text.strip(),
	"tags": [j.text.strip() for j in i.find("div", {"class": "tags"}).find_all("a")]
	})
	except:
	pass

	return quotes


	def main(tag, start, end):
	quotes = []

	if end == -1:
	end = start
	while True:
	new_quotes = get_quotes(tag, end)
	quotes += new_quotes
	if new_quotes == []:
	break
	end += 1
	end -= 1
	else:
	for page in range(start, end + 1):
	quotes += get_quotes(tag, page)

	with open(f"{tag}-quotes-{start}-{end}.csv", "w") as f:
	f.write("author;text;tags\n")
	for i in quotes:
	f.write(f"{i['author']};{i['text']};{','.join(i['tags'])}\n")


	if __name__ == "__main__":
	tag = input("Enter quotes tag you want to scrape e.g love, life: ")
	start = int(input("Enter page to start scraping from e.g 1, 5, 3: "))
	end = int(input("Enter page to stop scraping from (-1 means unlimited): "))

	main(tag, start, end)
No results found