Created
October 7, 2015 19:22
-
-
Save domenkozar/372638c76e5db2ed40bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| return toolz.compose( | |
| # filter out meaningless values | |
| curried.filter(lambda x: x not in ('IME PREDPISA', '')), | |
| # extract data from each row | |
| curried.map(lambda tr: pq(tr).find('td').eq(1).text()), | |
| # get all rows in tables | |
| curried.mapcat(lambda page: page('table.MsoNormalTable tr')), | |
| # get all subpages | |
| curried.map(do_request), | |
| # let's skip empty urls/strings | |
| curried.filter(lambda a: a), | |
| # get menu links | |
| curried.map(lambda a: pq(a).attr('href')), | |
| # get menu elements | |
| lambda doc: doc('.moduletable_menu a'), | |
| # get main page | |
| do_request, | |
| )(VIOLATION_URL + '/index.php') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment