Skip to content

Instantly share code, notes, and snippets.

@zufanka
Created October 4, 2023 16:53
Show Gist options
  • Select an option

  • Save zufanka/6f386b388711a6f92aafb02d6ff004fa to your computer and use it in GitHub Desktop.

Select an option

Save zufanka/6f386b388711a6f92aafb02d6ff004fa to your computer and use it in GitHub Desktop.
def scrape_contributions(soup, org_id):
'''
Scrapes the table next to the text "contributions in the closed financial year"
Returns a list of lists with contributions
'''
data = []
txt = ".*Contributions in the closed financial year.*"
table = soup.find(text = re.compile(txt)).parent.find_next_sibling("div").table
for tr in table.select("tr")[1:]: # skipping the header
row = {"org_id" : org_id}
row["name"] = tr.select("td")[0].text.strip()
row["contribution"] = tr.select("td")[1].text.strip()
data.append(row)
for id in list_of_ids:
www = f"https://ec.europa.eu/transparencyregister/public/consultation/displaylobbyist.do?id={org_id}"
r = requests.get(www)
soup = bs(r.text)
scrape_contributions(soup, id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment