triangletodd · August 14, 2024 02:15
diff --git a/scrape_goodreads_quotes.py b/scrape_goodreads_quotes.py
 #!/usr/bin/env python3
 import json
 import requests
 from bs4 import BeautifulSoup

 quotes = []
 for i in range(1, 101):
    url = f'https://www.goodreads.com/quotes?page={i}'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    target_class = 'quoteText'
    divs = soup.find_all('div', class_=target_class)

    for div in divs:
        parts = div.get_text(strip=True).split("ΓÇò")
        if len(parts) < 2:
            continue

        quote = parts[0].strip().replace("ΓÇ£", '').replace("ΓÇ¥", '')
        author_title = parts[1].split(",", 1)
        
        if len(author_title) < 2:
            author = parts[1].strip()
            title = ''
        else:
            author = author_title[0].strip()
            title = author_title[1].strip()

        quote_map = {
            "quote": quote,
            "author": author,
            "title": title
        }

        quotes.append(quote_map)

 json_string = json.dumps(quotes, indent=4)

 print(json_string)
	#!/usr/bin/env python3
	import json
	import requests
	from bs4 import BeautifulSoup

	quotes = []
	for i in range(1, 101):
	url = f'https://www.goodreads.com/quotes?page={i}'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
	}
	response = requests.get(url, headers=headers)
	soup = BeautifulSoup(response.content, 'html.parser')
	target_class = 'quoteText'
	divs = soup.find_all('div', class_=target_class)

	for div in divs:
	parts = div.get_text(strip=True).split("ΓÇò")
	if len(parts) < 2:
	continue

	quote = parts[0].strip().replace("ΓÇ£", '').replace("ΓÇ¥", '')
	author_title = parts[1].split(",", 1)

	if len(author_title) < 2:
	author = parts[1].strip()
	title = ''
	else:
	author = author_title[0].strip()
	title = author_title[1].strip()

	quote_map = {
	"quote": quote,
	"author": author,
	"title": title
	}

	quotes.append(quote_map)

	json_string = json.dumps(quotes, indent=4)

	print(json_string)
No results found