Skip to content

Instantly share code, notes, and snippets.

@hieuhani
Created March 21, 2024 16:40
Show Gist options
  • Save hieuhani/296e559a2b75a5e8aeaedb5d3dd96825 to your computer and use it in GitHub Desktop.
Save hieuhani/296e559a2b75a5e8aeaedb5d3dd96825 to your computer and use it in GitHub Desktop.
import scrapy
class DienmayxanhSpider(scrapy.Spider):
name = "dienmayxanh"
def start_requests(self):
urls = [
"https://www.dienmayxanh.com",
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
for item in response.css('#lst-prov .listing-locale a'):
province_id = item.css('::attr(data-value)').get()
province = item.css('::text').get()
yield scrapy.FormRequest(
url='https://www.dienmayxanh.com/Store/GetAllDistrictsByProvinceV2',
formdata={'provinceId': province_id,
'viewName': 'ListDistrict'},
callback=self.parse_district,
meta={'province_id': province_id,
'province': province.strip()},
)
def parse_district(self, response):
province_id = response.meta['province_id']
province = response.meta['province']
for item in response.css('.listing-locale a'):
district_id = item.css('::attr(data-dis)').get()
district = item.css('::text').get()
yield scrapy.FormRequest(
url='https://www.dienmayxanh.com/Store/GetAllWardsByProvinceV2',
formdata={'districtId': district_id,
'provinceId': province_id, 'viewName': 'ListWard'},
callback=self.parse_ward,
meta={
'province_id': province_id,
'province': province,
'district_id': district_id,
'district': district.strip()
},
)
def parse_ward(self, response):
province_id = response.meta['province_id']
province = response.meta['province']
district_id = response.meta['district_id']
district = response.meta['district']
for item in response.css('.listing-locale a'):
ward_id = item.css('::attr(data-value)').get()
ward = item.css('::text').get()
yield {
'province_id': province_id,
'province': province,
'district_id': district_id,
'district': district,
'ward_id': ward_id,
'ward': ward.strip()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment