matiskay · January 6, 2012 01:19 · bmanojlovic · Jan 25, 2013
diff --git a/README.md b/README.md
diff --git a/spider.py b/spider.py
 def parse(self,response):
        hxs = HtmlXPathSelector(response) 
        companies=hxs.select('//div[contains(@class,"rezultatPretrage")]/h2/a/@href').extract()
        for url in companies:
            yield Request(urljoin_rfc(get_base_url(response), url), callback=self.parseCompanyData)

        nexturl=hxs.select('//div[@class="stranicePretrage"]/a/img[@alt="Next"]/..').select('@href').extract()
        if nexturl:
            yield Request(urljoin_rfc(get_base_url(response), nexturl[0]), callback=self.parse)

    def parseCompanyData(self, response):
	def parse(self,response):
	hxs = HtmlXPathSelector(response)
	companies=hxs.select('//div[contains(@class,"rezultatPretrage")]/h2/a/@href').extract()
	for url in companies:
	yield Request(urljoin_rfc(get_base_url(response), url), callback=self.parseCompanyData)

	nexturl=hxs.select('//div[@class="stranicePretrage"]/a/img[@alt="Next"]/..').select('@href').extract()
	if nexturl:
	yield Request(urljoin_rfc(get_base_url(response), nexturl[0]), callback=self.parse)

	def parseCompanyData(self, response):