Created
July 15, 2020 13:52
-
-
Save sirex/300c1bf94c48068042a6b6eeee41aa50 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import json | |
import urllib.parse | |
from operator import itemgetter | |
import click | |
import lxml.etree | |
import requests | |
import requests_cache | |
import tqdm | |
from lxml import etree | |
from lxml.etree import XMLSyntaxError | |
@click.group() | |
def main(): | |
pass | |
@main.command() | |
def update(): | |
with open('data.jsonl', 'w') as f: | |
s = requests_cache.CachedSession() | |
for sesija in pbar(lrs(s, 'ad_seimo_sesijos', ar_visos='T').xpath('//SeimoSesija'), "Sesijos"): | |
for posedis in pbar(lrs(s, 'ad_seimo_posedziai', sesijos_id=sesija.attrib['sesijos_id']).xpath('//SeimoPosėdis'), "Posėdžiai"): | |
pradzia = posedis.attrib['pradžia'].split()[0] | |
for klausimas in lrs(s, 'ad_seimo_pos_darb', posedzio_id=posedis.attrib['posėdžio_id']).xpath('//DarbotvarkėsKlausimas'): | |
row = { | |
'pradzia': pradzia, | |
'pavadinimas': klausimas.attrib['pavadinimas'], | |
'klausimai': [ | |
{ | |
'darbotvarkės_klausimo_id': ks.attrib['darbotvarkės_klausimo_id'], | |
'pavadinimas': ks.attrib['pavadinimas'], | |
'dokumento_nuoroda': ks.attrib['dokumento_nuoroda'], | |
} | |
for ks in klausimas.findall('KlausimoStadija') | |
] | |
} | |
print(json.dumps(row), file=f) | |
@main.command() | |
@click.argument('q') | |
def search(q): | |
for day, rows in itertools.groupby(itersearch(q), key=itemgetter('pradzia')): | |
day = day.replace('-', '') | |
print(f"- https://www.lrs.lt/sip/portal.show?p_r=35727&p_k=1&p_a=sale_darbotvarke&p_moment={day}") | |
for row in rows: | |
print(f" {row['pavadinimas']}") | |
print() | |
def itersearch(q: str): | |
with open('data.jsonl') as f: | |
for line in f: | |
row = json.loads(line.strip()) | |
if q in row['pavadinimas']: | |
yield row | |
def lrs(s: requests.Session, method: str, **kwargs) -> etree.ElementTree: | |
url = 'http://apps.lrs.lt/sip/p2b.%s' % method | |
if kwargs: | |
url += '?' + urllib.parse.urlencode(kwargs) | |
resp = s.get(url) | |
resp.raise_for_status() | |
try: | |
return lxml.etree.fromstring(resp.content) | |
except XMLSyntaxError as e: | |
print() | |
print() | |
print(f"Error while parsing {url}") | |
print(e) | |
return etree.XML('<?xml version="1.0"?><root></root>') | |
def pbar(*args): | |
return tqdm.tqdm(*args, ascii=True, leave=False) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
click | |
lxml | |
requests | |
requests-cache | |
tqdm |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment