Last active
August 5, 2024 22:24
-
-
Save LCPallares/52f1bf4cfa44ae8458484ed1579d5c99 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import csv | |
from datetime import datetime | |
def scrape_amazon_bestsellers(): | |
url = "https://www.amazon.com/best-sellers-books-Amazon/zgbs/books/" | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
} | |
response = requests.get(url, headers=headers) | |
''' | |
with open('amazon-bestsellers-scraper.html', 'r') as file: | |
html = file.read() | |
soup = BeautifulSoup(html, 'html.parser') | |
''' | |
soup = BeautifulSoup(requests.content, 'html.parser') | |
books = [] | |
for item in soup.find_all("div", id="gridItemRoot"): | |
rank = item.find("span", class_="zg-bdg-text").text.strip().replace("#", "") | |
title = item.find_all("a", class_="a-link-normal")[1].text.strip() | |
author = item.find("div", class_="a-row a-size-small").text.strip() | |
score = item.find("span", class_="a-icon-alt") | |
score = score.text.split(" ")[0] if score else "0.0" | |
price = item.find("span", class_="_cDEzb_p13n-sc-price_3mJ9Z") | |
price = price.text[1:] if price else "0.0" | |
type_cover = item.find("span", class_="a-size-small a-color-secondary a-text-normal").text | |
numbers_reviews = item.find("span", class_="a-size-small").text.replace(",", "") | |
numbers_reviews = int(numbers_reviews) if numbers_reviews.isdigit() else 0 | |
books.append({ | |
"rank": rank, | |
"title": title, | |
"author": author, | |
"price": float(price), | |
"score": float(score), | |
"type_cover": type_cover, | |
"numbers_reviews": numbers_reviews, | |
"date_scraped": datetime.now().strftime("%Y-%m-%d") | |
}) | |
print(books) | |
return books | |
def save_to_csv(books, filename): | |
keys = books[0].keys() | |
with open(filename, 'w', newline='', encoding='utf-8') as output_file: | |
dict_writer = csv.DictWriter(output_file, keys) | |
dict_writer.writeheader() | |
dict_writer.writerows(books) | |
if __name__ == "__main__": | |
bestsellers = scrape_amazon_bestsellers() | |
save_to_csv(bestsellers, "amazon_bestsellers.csv") | |
print(f"Scraped {len(bestsellers)} books and saved to amazon_bestsellers.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.io as pio | |
# Asumiendo que 'fig' es tu gráfico de Plotly | |
html_string = pio.to_html(fig, full_html=False, include_plotlyjs='cdn') | |
# Opcional: guardar en un archivo | |
with open('mi_grafico.html', 'w') as f: | |
f.write(html_string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment