Created
October 4, 2023 16:53
-
-
Save zufanka/6f386b388711a6f92aafb02d6ff004fa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def scrape_contributions(soup, org_id): | |
| ''' | |
| Scrapes the table next to the text "contributions in the closed financial year" | |
| Returns a list of lists with contributions | |
| ''' | |
| data = [] | |
| txt = ".*Contributions in the closed financial year.*" | |
| table = soup.find(text = re.compile(txt)).parent.find_next_sibling("div").table | |
| for tr in table.select("tr")[1:]: # skipping the header | |
| row = {"org_id" : org_id} | |
| row["name"] = tr.select("td")[0].text.strip() | |
| row["contribution"] = tr.select("td")[1].text.strip() | |
| data.append(row) | |
| for id in list_of_ids: | |
| www = f"https://ec.europa.eu/transparencyregister/public/consultation/displaylobbyist.do?id={org_id}" | |
| r = requests.get(www) | |
| soup = bs(r.text) | |
| scrape_contributions(soup, id) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment