narate · October 22, 2015 10:53
diff --git a/school.py b/school.py
 # -*- coding: utf-8 -*-
 import scrapy
 from thaischool.items import ThaischoolItem

 class SchoolSpider(scrapy.Spider):
    name = "school"
    allowed_domains = ["thaischool.in.th"]
    base_url = 'http://www.thaischool.in.th/sitemap.php?page=%s&school_area=&province_id=&txtsearch='
    start_urls = []
    for i in range(1,236):
        start_urls.append(base_url % i)
    def parse(self, response):
        for sel in response.xpath('/html/body/center/form/table[2]/tr[position()>1]'):
            td = sel.xpath('td')
            no = td[0].xpath('text()').extract()[0]
            name = td[1].xpath('a/text()').extract()[0]
            addr = td[2].xpath('text()').extract()
            addr = addr[0] if len(addr) > 0 else '-'
            phone = td[3].xpath('text()').extract()
            phone = phone[0] if len(phone) > 0 else '-'
            zone = td[4].xpath('text()').extract()
            zone = zone[0] if len(zone) > 0 else '-'
            item = ThaischoolItem({'no': no, 'name': name, 'address': addr, 'phone': phone, 'zone': zone })
            yield item
	# -- coding: utf-8 --
	import scrapy
	from thaischool.items import ThaischoolItem

	class SchoolSpider(scrapy.Spider):
	name = "school"
	allowed_domains = ["thaischool.in.th"]
	base_url = 'http://www.thaischool.in.th/sitemap.php?page=%s&school_area=&province_id=&txtsearch='
	start_urls = []
	for i in range(1,236):
	start_urls.append(base_url % i)
	def parse(self, response):
	for sel in response.xpath('/html/body/center/form/table[2]/tr[position()>1]'):
	td = sel.xpath('td')
	no = td[0].xpath('text()').extract()[0]
	name = td[1].xpath('a/text()').extract()[0]
	addr = td[2].xpath('text()').extract()
	addr = addr[0] if len(addr) > 0 else '-'
	phone = td[3].xpath('text()').extract()
	phone = phone[0] if len(phone) > 0 else '-'
	zone = td[4].xpath('text()').extract()
	zone = zone[0] if len(zone) > 0 else '-'
	item = ThaischoolItem({'no': no, 'name': name, 'address': addr, 'phone': phone, 'zone': zone })
	yield item