Last active
May 14, 2020 02:12
-
-
Save dcalacci/99c931c198443b08dcc1eab762aedad2 to your computer and use it in GitHub Desktop.
Daily script to pull boston incident reports
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import time | |
import os | |
import requests | |
import urllib | |
f = urllib.request.urlopen('https://data.boston.gov/dataset/crime-incident-reports-august-2015-to-date-source-new-system') | |
soup = BeautifulSoup(f.fp) | |
# finds all the "primary" buttons on the page -- these are those blue ones -- | |
# that match the right name. If this script breaks, it's probably because this | |
# name changed. | |
maybe_buttons = soup.find(title="Crime Incident Reports (August 2015 - To Date) (Source - New System)").findNext("div").select('.btn-primary') | |
csv_url = None | |
for btn in maybe_buttons: | |
is_csv = btn.attrs["href"].split(".")[-1] == "csv" | |
if is_csv: | |
csv_url = btn.attrs["href"] | |
outdir = "crime-incident-reports-daily" | |
filename = time.strftime("%Y-%m-%d.csv") | |
if csv_url is not None: | |
response = requests.get(csv_url) | |
if not os.path.exists(outdir): | |
os.makedirs(outdir) | |
with open(os.path.join(outdir, filename), "wb") as f: | |
f.write(response.content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment