Last active
July 9, 2019 07:07
-
-
Save aijogja/46aa9d1d810f0159cc9679b2938077f2 to your computer and use it in GitHub Desktop.
Selenium Tokopedia Search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
selenium==3.141.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib import parse | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import StaleElementReferenceException | |
PENCARIAN = { | |
'keyword': 'keyword', | |
'nama_toko': 'shop name', | |
'deep': '10', # limit pagination | |
} | |
def main(): | |
chrome_options = Options() | |
chrome_options.add_experimental_option("detach", True) | |
driver = webdriver.Chrome( | |
executable_path='./chromedriver', | |
# options=chrome_options | |
) | |
driver.get('https://www.tokopedia.com') | |
print('Opening the browser ...') | |
searchbox_el = driver.find_element_by_id('search-keyword') | |
searchbox_el.send_keys(PENCARIAN['keyword']) | |
searchbox_el.submit() | |
print('Submit the keyword') | |
def pagination_clicker(): | |
""" | |
Pagination Loop | |
""" | |
# wait the results | |
paginations = WebDriverWait(driver, 10).until( | |
EC.presence_of_all_elements_located((By.CLASS_NAME, "GUHElpkt")) | |
) | |
products_toko = driver.find_elements_by_class_name('vlEGRFVq') | |
# print(driver.current_url) | |
url = parse.urlsplit(driver.current_url) | |
qs = parse.parse_qs(url.query) | |
if 'page' in qs: | |
print('Page %s' % qs.get('page')[0]) | |
if qs.get('page')[0] == PENCARIAN['deep']: | |
print('Selesai') | |
return None | |
else: | |
print('Page 1') | |
# list products | |
try: | |
for prd in products_toko: | |
product_link = prd.find_element_by_class_name('_2rQtYSxg') | |
if product_link.text == PENCARIAN['nama_toko']: | |
product_title = prd.find_element_by_tag_name('h3') | |
print(product_title.text) | |
for pagination in paginations: | |
if pagination.text == '>': | |
driver.execute_script("arguments[0].click();", pagination) | |
pagination_clicker() | |
except StaleElementReferenceException: | |
pagination_clicker() | |
pagination_clicker() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment