Created
November 20, 2014 12:26
-
-
Save LEW21/69556dd0a7cac5125655 to your computer and use it in GitHub Desktop.
Skrypt pobierający wszystkie dane PKW ze strony http://klk.kbw.gov.pl/kalkulator/20141116/000000/SMD/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
import time | |
import sys | |
def URL(teryt): | |
while teryt[-2:] == "00": | |
teryt = teryt[:-2] | |
url = "http://klk.kbw.gov.pl/kalkulator/20141116/000000/SMD" | |
if len(teryt) > 0: | |
url += "/" + teryt[0:2] + "0000" | |
if len(teryt) > 2: | |
url += "/" + teryt[0:4] + "00" | |
if len(teryt) > 4: | |
url += "/" + teryt | |
return url | |
def getOKW(teryt, okw): | |
url = "http://klk.kbw.gov.pl/TMP/" + teryt + "/" + okw + "/" + teryt + "-" + okw + ".zip" | |
print(url) | |
sys.stdout.flush() | |
MAX = 100 | |
while MAX: | |
MAX -= 1 | |
try: | |
requests.get(URL(teryt) + "/" + okw) | |
res = requests.get(url) | |
if res.ok: | |
print(res) | |
sys.stdout.flush() | |
return res.content | |
except: | |
pass | |
print("Error, sleeping 1s") | |
sys.stdout.flush() | |
time.sleep(1) | |
def saveOKW(teryt, okw): | |
if teryt < "026401": | |
return | |
data = getOKW(teryt, okw) | |
with open(teryt + "-" + okw + ".zip", "wb") as f: | |
f.write(data) | |
def ls(teryt): | |
url = URL(teryt) | |
data = requests.get(url) | |
if not data.ok: | |
print(data) | |
sys.stdout.flush() | |
list = re.findall(b'href="([^"]*)"', data.content) | |
return [e.decode().split("/")[-1] for e in list] | |
for woj in ls(""): | |
for powiat in ls(woj): | |
for gmina in ls(powiat): | |
for okw in ls(gmina): | |
saveOKW(gmina, okw) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment