Last active
April 21, 2024 18:32
-
-
Save scrapehero/2a1be61eb28cfa577e379e2b69b31c90 to your computer and use it in GitHub Desktop.
Python 3 Code to scrape prices from ebay.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from pprint import pprint | |
from traceback import format_exc | |
import requests | |
import unicodecsv as csv | |
from lxml import html | |
def parse(brand): | |
url = 'https://www.ebay.com/sch/i.html?_nkw={0}&_sacat=0'.format(brand) | |
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'} | |
failed = False | |
# Retries for handling network errors | |
for _ in range(5): | |
print ("Retrieving %s"%(url)) | |
response = requests.get(url, headers=headers, verify=False) | |
parser = html.fromstring(response.text) | |
print ("Parsing page") | |
if response.status_code!=200: | |
failed = True | |
continue | |
else: | |
failed = False | |
break | |
if failed: | |
return [] | |
product_listings = parser.xpath('//li[contains(@id,"results-listing")]') | |
raw_result_count = parser.xpath("//h1[contains(@class,'count-heading')]//text()") | |
result_count = ''.join(raw_result_count).strip() | |
print ("Found {0} for {1}".format(result_count,brand)) | |
scraped_products = [] | |
for product in product_listings: | |
raw_url = product.xpath('.//a[contains(@class,"item__link")]/@href') | |
raw_title = product.xpath('.//h3[contains(@class,"item__title")]//text()') | |
raw_product_type = product.xpath('.//h3[contains(@class,"item__title")]/span[@class="LIGHT_HIGHLIGHT"]/text()') | |
raw_price = product.xpath('.//span[contains(@class,"s-item__price")]//text()') | |
price = ' '.join(' '.join(raw_price).split()) | |
title = ' '.join(' '.join(raw_title).split()) | |
product_type = ''.join(raw_product_type) | |
title = title.replace(product_type, '').strip() | |
data = { | |
'url':raw_url[0], | |
'title':title, | |
'price':price | |
} | |
scraped_products.append(data) | |
return scraped_products | |
if __name__=="__main__": | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('brand',help = 'Brand Name') | |
args = argparser.parse_args() | |
brand = args.brand | |
scraped_data = parse(brand) | |
if scraped_data: | |
print ("Writing scraped data to %s-ebay-scraped-data.csv"%(brand)) | |
with open('%s-ebay-scraped-data.csv'%(brand),'wb') as csvfile: | |
fieldnames = ["title","price","url"] | |
writer = csv.DictWriter(csvfile,fieldnames = fieldnames,quoting=csv.QUOTE_ALL) | |
writer.writeheader() | |
for data in scraped_data: | |
writer.writerow(data) | |
else: | |
print("No data scraped") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@jenaalsup It may not have anything to do with what you are doing. I'm running into the same thing when I run the code and last year it worked just fine. I have a feeling it has to do with the way the code is parsing the html. If you print response.text you will see that there are definitely results being returned. My only suggestion would be to tackle each statement that is transforming the data by printing the results after the transformation to see what is happening.