Skip to content

Instantly share code, notes, and snippets.

@valuex
Created June 10, 2024 14:58
Show Gist options
  • Save valuex/3c8497584c0d78f6aee1e7144005583e to your computer and use it in GitHub Desktop.
Save valuex/3c8497584c0d78f6aee1e7144005583e to your computer and use it in GitHub Desktop.
1
from playwright.sync_api import sync_playwright
from playwright.sync_api import Page, expect
import re
import time
# def get_exhibit_items()
def main():
with sync_playwright() as p:
browser=p.chromium.launch(headless=False,timeout=0)
# page=browser.new_page()
context = browser.new_context()
page = context.new_page()
for ipage in range(11):
page_url='https://unifiedsearch.jcdbizmatch.jp/jpca2024/jp/jpca/product?page='+str(ipage)+'&items=3&order=1'
page.goto(page_url,timeout=60000)
page.wait_for_selector('text=製品',timeout=12000)
time.sleep(3)
item_divs=page.locator('div[class=\"col-inner\"]').all()
for each_item in item_divs:
try:
item_href=each_item.get_attribute('onclick')
item_name=each_item.locator('strong').inner_text()
# print(item_href)
# print(item_name)
except:
pass
grandparent = each_item.locator('xpath=../..')
label_divs=grandparent.locator('div').all()
try:
img_obj=label_divs[2]
post_img=img_obj.locator('img').all()[1]
post_img_url=post_img.get_attribute('src')
except:
post_img_url=""
exhibit_div=label_divs[3]
exhibit_name=exhibit_div.inner_text()
exhibit_item=item_href+'\t'+item_name+'\t'+exhibit_name+'\t'+post_img_url
print(exhibit_item)
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment