Created
March 5, 2023 04:59
-
-
Save kazi331/e660659ad84cb10a7dc5d2d37fea4e70 to your computer and use it in GitHub Desktop.
scrap products from website with python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
from bs4 import BeautifulSoup | |
# The URL of the website to scrape | |
url = "https://www.startech.com.bd/laptop-notebook" | |
# Send a GET request to the website and get the HTML content | |
response = requests.get(url) | |
html_content = response.content | |
# Parse the HTML content using BeautifulSoup | |
soup = BeautifulSoup(html_content, "html.parser") | |
# Create an empty list to store the product data | |
product_data = [] | |
# Find all the product elements | |
product_elements = soup.find_all("div", {"class": "p-item-inner"}) | |
print(f"Number of products found: {len(product_elements)}") # Debugging line | |
# Loop through the product elements and extract relevant data | |
for product in product_elements: | |
# Extract the product name | |
product_name = product.find("h4", {"class": "p-item-name"}).text.strip() | |
# find all short descriptions | |
short_description_list = product.find("div", { | |
"class": "short-description" | |
}).find_all("ul") | |
# loop through description items | |
descs = [] | |
for description in short_description_list: | |
desc = description.find_all("li") | |
desc_texts = [d.text.strip() for d in desc] | |
descs.extend(desc_texts) | |
# Extract the product price | |
price_text = product.find("div", {"class": "p-item-price"}).find("span").text.strip() | |
product_price_number = price_text.split("\u09f3")[0].replace(",", "") | |
product_price = int(product_price_number) | |
# Extract the product image URL | |
product_image = product.find("img")["src"] | |
# Create a dictionary with the product data | |
product_dict = { | |
"name": product_name, | |
"price": product_price, | |
"image_url": product_image, | |
"descs": descs | |
} | |
# Append the product dictionary to the product data list | |
product_data.append(product_dict) | |
# Print the length of the product data list and the product data list itself | |
print(f"Number of products scraped: {len(product_data)}") # Debugging line | |
# print(product_data) # Debugging line | |
# Convert the product data to a JSON string | |
json_data = json.dumps(product_data) | |
# Print the JSON data | |
print(json_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment