mebusw · May 15, 2017 13:02
diff --git a/scrapy_csx.py b/scrapy_csx.py
 import scrapy

 class CsxCourseSpider(scrapy.Spider):
    name = "csx"
    start_urls = ['https://www.scrumalliance.org/courses-events/course.aspx?pageCount=50&country=&state=&city=&zip=&type=Csd;&trainer=&language=&startdate=5/15/2017%2012:00:00%20AM&enddate=1/1/1900%2012:00:00%20AM&discount=False&page=1&orderby=StartDate&sortdir=asc&radius=0&view=map']

    # https://www.scrumalliance.org/courses-events/courses/csd/us/ohio/columbus/2017/may/201702145-csd

    def parse(self, response):
        # print '}}}}}}}}', response.url, response.xpath('*//tr')

        for book in response.xpath('*//tr'):
            name = book.css('td > a::text').extract_first()
            tds = book.css('td::text').extract()
            if len(tds)>5:
                trainer = tds[3].strip()
                location = tds[4].strip()
                date = tds[5].strip()

                yield {
                    'name':name,
                    'trainer': trainer,
                    'location': location,
                    'date': date,
                }

        for url in response.xpath('//a[@class="UnselectedPage"]'):
            u = url.xpath('./@href').extract_first()
            yield scrapy.Request(response.urljoin(u), callback=self.parse)
	import scrapy

	class CsxCourseSpider(scrapy.Spider):
	name = "csx"
	start_urls = ['https://www.scrumalliance.org/courses-events/course.aspx?pageCount=50&country=&state=&city=&zip=&type=Csd;&trainer=&language=&startdate=5/15/2017%2012:00:00%20AM&enddate=1/1/1900%2012:00:00%20AM&discount=False&page=1&orderby=StartDate&sortdir=asc&radius=0&view=map']

	# https://www.scrumalliance.org/courses-events/courses/csd/us/ohio/columbus/2017/may/201702145-csd

	def parse(self, response):
	# print '}}}}}}}}', response.url, response.xpath('*//tr')

	for book in response.xpath('*//tr'):
	name = book.css('td > a::text').extract_first()
	tds = book.css('td::text').extract()
	if len(tds)>5:
	trainer = tds[3].strip()
	location = tds[4].strip()
	date = tds[5].strip()

	yield {
	'name':name,
	'trainer': trainer,
	'location': location,
	'date': date,
	}

	for url in response.xpath('//a[@class="UnselectedPage"]'):
	u = url.xpath('./@href').extract_first()
	yield scrapy.Request(response.urljoin(u), callback=self.parse)