Skip to content

Instantly share code, notes, and snippets.

@bstancil
Created April 14, 2020 23:16
Show Gist options
  • Save bstancil/2b42199e2191b514bb65097b95f27ff4 to your computer and use it in GitHub Desktop.
Save bstancil/2b42199e2191b514bb65097b95f27ff4 to your computer and use it in GitHub Desktop.
import requests
import pandas as pd
from bs4 import BeautifulSoup
def get_details(grant):
details = grant.findAll("div")
amount = details[0].text.strip()
year = details[1].text.strip()
category = details[2].text.strip()
desc = details[3].text.strip()
return {"amt": amount, "year": year, "category": category, "desc": desc}
def get_grants(card):
arr = []
grants = card.findAll("div",{"class","list-item-grant"})
for g in grants:
details = get_details(g)
arr.append(details)
return arr
def get_firm(card):
header = card.find("div",{"class","grantee-header"})
firm = header.find("h2").text.strip()
return firm
def get_card(card):
firm = get_firm(card)
grants = get_grants(card)
for g in grants:
g.update( {"firm":firm})
return grants
def get_cards(soup):
cards = soup.findAll("article",{"class":"card"})
data = []
for c in cards:
firm_data = get_card(c)
data += firm_data
return data
def get_page(n):
print("Getting page %i..." % n)
url = "https://www.mcknight.org/grants/search-our-grants/page/%i/?grant-keyword&date_start&date_end&grant_program#038;date_start&date_end&grant_program" % n
r = requests.get(url)
soup = BeautifulSoup(r.text)
data = get_cards(soup)
return data
results = []
for n in range(1,16):
page_data = get_page(n)
results += page_data
df = pd.DataFrame(results)
df.to_csv('donors.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment