Last active
August 29, 2015 14:27
-
-
Save neoneo40/840b05a534edb678eb51 to your computer and use it in GitHub Desktop.
자바스크립트로 동작하는 버튼이 있을 때 다음 페이지로 가는 방식을 구현한 scrapy spider, 네이버 지식쇼핑 다음페이지 숫자 링크 클릭할 때 동작하는 스크립트
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#출처: https://ide.c9.io/redapple/so_18810850 | |
from scrapy.spider import BaseSpider | |
from scrapy.http import Request, FormRequest | |
from scrapy.selector import HtmlXPathSelector | |
from scrapy.contrib.spiders import Rule | |
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor | |
class MySpider(BaseSpider): | |
name = "commu" | |
allowed_domains = ["saintbarnabas.hodesiq.com"] | |
start_urls = ["http://saintbarnabas.hodesiq.com/joblist.asp?user_id=", | |
] | |
extractor = SgmlLinkExtractor(allow=('\d+'), restrict_xpaths=('*')) | |
def parse(self, response): | |
try: | |
links = self.extractor.extract_links(response) | |
if links: | |
for link in links: | |
yield Request(url=link.url, callback=self.parse_items) | |
except: | |
pass | |
hxs = HtmlXPathSelector(response) | |
next_links = hxs.select('//td[@align="right"]/a[contains(., "Next")]') | |
if next_links: | |
freq = FormRequest.from_response(response, formname="frm", | |
formdata={'move_indicator': 'next'}) | |
if freq: | |
yield freq | |
def parse_items(self, response): | |
print "parse_items" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 네이버 지식쇼핑 페이지에서 개발자도구로 따온 스크립트 | |
this.goPage = function(e){ | |
if (e<1) { | |
this.options.pagingIndex=1; | |
} else { | |
this.options.pagingIndex=e; | |
} | |
this.options.frm="NVSHPAG"; | |
this._ajaxLoad(false); | |
jQuery("#_content_area").get(0).scrollIntoView(); | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
from scrapy.http import Request, FormRequest | |
from wise_shop.items import WiseShopItem | |
class WiseSpider(scrapy.Spider): | |
name = "wise" | |
allowed_domains = ["shopping.naver.com"] | |
start_urls = [ | |
"http://shopping.naver.com/search/all_search.nhn?where=all&frm=NVSCTAB&query=%EB%85%B8%ED%8A%B8%EB%B6%81" | |
] | |
e = 1 | |
def parse(self, response): | |
for sel in response.xpath('//ul[@class="goods_list"]/li'): | |
item = WiseShopItem() | |
item['product_name'] = sel.xpath('div[@class="info"]/a/@title').extract()[0].encode('utf-8') | |
item['min_price'] = sel.xpath('div[@class="info"]/span[@class="price"]/em/span/text()').extract()[0].encode('utf-8') | |
#item['max_price'] = sel.xpath('div[@class="info"]/span[@class="price"][strong]/span/text()').extract()[0].encode('utf-8') | |
yield item | |
next_page = response.xpath('//div[@class="co_paginate"]/strong/following-sibling::*').extract()[0].encode('utf-8') | |
# WiseSpider.e = response.xpath('//div[@class="co_paginate"]/strong/following-sibling::*/text()').extract()[0] | |
# 위 코드가 계속 2만 리턴해서 값을 대입합니다. | |
# 페이지가 넘어갈 때마다 다음 페이지를 가리키는 값을 입력하려고 했는데 계속 2만 입력되어서 다른 방법을 썼습니다. | |
if next_page: | |
WiseSpider.e += 1 | |
yield FormRequest.from_response(response, formname="frm", formdata={'e': str(WiseSpider.e)}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment