Skip to content

Instantly share code, notes, and snippets.

@ap-Codkelden
Created March 23, 2018 21:31
Show Gist options
  • Save ap-Codkelden/dfb23652d4fe7fa7ca694fad5febc4b1 to your computer and use it in GitHub Desktop.
Save ap-Codkelden/dfb23652d4fe7fa7ca694fad5febc4b1 to your computer and use it in GitHub Desktop.
Kryviy Rig Streets from official site
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import lxml.html
import requests
import sqlite3
import time
db = sqlite3.connect('kr_streets.sqlite')
c = db.cursor()
c.execute('CREATE TABLE IF NOT EXISTS street'
'(type text, name text, raj text);')
def insert(*data):
c.execute('INSERT INTO street VALUES (?,?,?);', *data,)
HEADERS = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 '
'Firefox/57.0',
}
SERVER = 'https://krmisto.gov.ua'
PATH = '/ua/streets/list/sort/name/page/'
LAST_PAGE = 16
for p in range(1, LAST_PAGE + 1):
print(f'Page {p} in process')
nested = False
url = SERVER+PATH+f'{p}.html'
res = requests.get(url, headers=HEADERS)
if res.status_code != 200:
print(f'Oops @ {p}')
continue
page = lxml.html.fromstring(res.text)
rows = page.xpath('//div[@role="main"]/table[@class="full"]/tbody/tr')
for r in rows:
nested = False
raj = None
type_ = r.xpath('./td[1]/text()')
name = r.xpath('./td[2]/a/text()')
_ = [x.strip() for x in r.xpath('./td[3]/text()') if x.strip()]
if _:
raj = _[0]
else:
if r.xpath('./td[3]/ul/li'):
nested = True
raj = r.xpath('./td[3]/ul/li/text()')
else:
raj = None
if nested and isinstance(raj, list):
for li in raj:
d = (type_[0], name[0], li)
else:
d = (type_[0], name[0], raj)
insert(d)
time.sleep(2)
db.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment