Skip to content

Instantly share code, notes, and snippets.

@ap-Codkelden
Created January 28, 2019 17:48
Show Gist options
  • Save ap-Codkelden/4afd07ac38bc89e68d2fa8cc6732640d to your computer and use it in GitHub Desktop.
Save ap-Codkelden/4afd07ac38bc89e68d2fa8cc6732640d to your computer and use it in GitHub Desktop.
Grab State Voters Registry Precincts GeoJSON data
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import lxml.html
import os
import re
import requests
import time
KOATUU = (1, 5, 7, 12, 14, 18, 21, 23, 26, 32, 35, 44, 46, 48, 51, 53, 56,
59, 61, 63, 65, 68, 71, 73, 74, 80, 85)
DIR_PLY = 'poly'
DIR_MRK = 'marker'
HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/6.0)"
}
DRV_URL = "https://www.drv.gov.ua"
DRV_PATH = "/ords/portal/!cm_core.cm_index"
URL_POLY_PATH = "/ords/portal/gis$core.Gis_DistrPoly"
URL_MARKER_PATH = "/ords/portal/gis$core.Gis_DistrMarker"
def get_geojson(ato_name, url_parameter, koatuu):
state_code = str(koatuu).zfill(2)
ato_name = ato_name.replace(' ', '_')
params = {'p_f3001': url_parameter}
url_poly = DRV_URL + URL_POLY_PATH
url_marker = DRV_URL + URL_MARKER_PATH
# get_poly
res = requests.get(url_poly, params=params)
json_poly_filename = f"{state_code}_ply_{ato_name}.json"
# print(json_poly_filename)
with open(os.path.join(DIR_PLY, json_poly_filename), 'w') as jf:
json.dump(res.json(), jf)
time.sleep(1.15)
# get markers
res = requests.get(url_marker, params=params)
json_marker_filename = f"{state_code}_mrk_{ato_name}.json"
# print(json_marker_filename)
with open(os.path.join(DIR_MRK, json_marker_filename), 'w') as jf:
json.dump(res.json(), jf)
def get_urlparam(c):
k = dict([x.split('=') for x in c.split('?')[1].split('&')])
return k['pf3001']
for d in [DIR_PLY, DIR_MRK]:
if not os.path.exists(d):
os.mkdir(d)
for code in KOATUU:
params = {
'option': 'ext_dvk',
"pid100": code,
"prejim": 2
}
url = DRV_URL + DRV_PATH
res = requests.get(url, params=params)
if not res.status_code == 200:
print(f"No data for koatuu {code}")
continue
content = lxml.html.fromstring(res.text)
names = content.xpath(
'//table[@id="tab3"]/tr/td/a[@class="main" or @class="sub"]/text()'
)
links = content.xpath(
'//table[@id="tab3"]/tr/td/a[@class="main" or @class="sub"]/@href'
)
_ = [dict([tuple(x)]) for x in zip(names, links)]
data_links = {}
for item in _:
k, v = list(item.items())[0]
data_links[k] = get_urlparam(v)
for key in data_links.keys():
print(key, data_links[key])
get_geojson(key, data_links[key], code)
time.sleep(2.43)
time.sleep(0.6)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment