Skip to content

Instantly share code, notes, and snippets.

@reefwing
Created March 26, 2023 01:12
Show Gist options
  • Save reefwing/63b2e77acb22e9346a4971ecfdc8ca08 to your computer and use it in GitHub Desktop.
Save reefwing/63b2e77acb22e9346a4971ecfdc8ca08 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
# Initialize variables
base_url = "http://quotes.toscrape.com"
current_url = base_url
quotes = []
# Loop through all pages of quotes
while True:
# Send a request to the current page and get its content
response = requests.get(current_url)
content = response.content
# Parse the content with BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
# Find all the quote and author elements on the page
for quote in soup.find_all("div", class_="quote"):
text = quote.find("span", class_="text").text
author = quote.find("small", class_="author").text
quotes.append({"quote": text, "author": author})
# Check if there is a "Next" button on the page
next_button = soup.find("li", class_="next")
if next_button:
# If there is a "Next" button, update the current URL to the next page
next_url = next_button.find("a")["href"]
current_url = base_url + next_url
else:
# If there is no "Next" button, break out of the loop
break
# Print the list of quotes
print(quotes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment