Created
November 9, 2015 07:01
-
-
Save kurozumi/0b37e8f929c540a6e185 to your computer and use it in GitHub Desktop.
【Python】アマゾン・ホビーカテゴリの予約商品のasinと発売日を取得
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from selenium import webdriver | |
import re | |
asins=[] | |
driver = webdriver.PhantomJS() | |
for page in range(1,3): | |
url = "http://www.amazon.co.jp/s/?node=2277721051&field-enc-merchantbin=AN1VRQENFRJN5&page=%s" % page | |
driver.get(url) | |
elems = driver.find_elements_by_css_selector("ul#s-results-list-atf li") | |
for e in elems: | |
asin = {} | |
if e.get_attribute("data-asin"): | |
try: | |
text = e.find_element_by_xpath(".//span[contains(text(), '%s')]" % "発売予定").text | |
release_date = re.search(u"[0-9]{4}/[0-9]{1,2}/[0-9]{1,2}", text) | |
asin["asin"] = e.get_attribute("data-asin") | |
asin["title"] = e.find_element_by_tag_name("h2").text | |
asin["release_data"] = release_date.group() | |
print asin | |
except: | |
pass | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment