Skip to content

Instantly share code, notes, and snippets.

@ahmedengu
Created June 17, 2018 09:15
Show Gist options
  • Save ahmedengu/fcb42c2abf04d8d636d9fc665e2ce691 to your computer and use it in GitHub Desktop.
Save ahmedengu/fcb42c2abf04d8d636d9fc665e2ce691 to your computer and use it in GitHub Desktop.
web scrape of amazon seller reviews using python
import csv
import re
import sys
import time
import xlrd
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys
chromeOptions = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2}
chromeOptions.add_experimental_option("prefs", prefs)
chromeOptions.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chromeOptions)
driver.maximize_window()
amznLink1 = "https://www.amazon.com/gp/profile/amzn1.account.AECVUQ2HIJB47HBFZ2PM5GQ5IPVA"
driver.get(amznLink1);
ProductBlock = driver.find_elements_by_xpath(
'//div [@class="desktop card profile-at-card undefined reviews-card"]') # ('//div [@data-story-id]')
for p in ProductBlock:
ProductName = p.find_element_by_xpath(
'//div [@class="a-section profile-at-product-title-container profile-at-product-box-element"]/span').text.encode(
'utf-8', 'ignore')
asin = re.match("http[s]?://www.amazon.(\w+)(.*)/(dp|gp/product)/(?P<asin>\w+).*",
p.find_element_by_xpath('//a [@class="a-link-normal a-text-normal"]').get_attribute("href"),
flags=re.IGNORECASE)
ProductLink = "https://www.amazon.com/dp/" + str(asin)
Stars = p.find_element_by_xpath('//div [@class="a-row profile-at-product-review-stars"]').text.encode('utf-8',
'ignore')
Star = p.find_element_by_xpath('//span [@class="a-icon-alt"]').get_attribute('innerHTML').split(' ')[0]
Comment = p.find_element_by_xpath(
'.//p [@class="a-spacing-small a-spacing-top-mini a-color-base"]').text.encode('utf-8', 'ignore')
DateofReview = p.find_element_by_xpath('.//span [@class="a-profile-descriptor"]').text.split(' · ')[1]
ReviewLink = p.find_element_by_xpath(
'.//a [@class="a-link-normal profile-at-review-link a-text-normal"]').get_attribute("href")
print(ProductName,
asin,
ProductLink,
Stars,
Star,
Comment,
DateofReview,
ReviewLink
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment