Skip to content

Instantly share code, notes, and snippets.

@higs4281
Created March 2, 2012 22:18
Show Gist options
  • Save higs4281/1961817 to your computer and use it in GitHub Desktop.
Save higs4281/1961817 to your computer and use it in GitHub Desktop.
bs4 crime scrape
from bs4 import BeautifulSoup
import requests
import csv
url = "http://www.crimemapping.com/DetailedReport.aspx?db=1/12/2012+00:00:00&de=1/25/2012+23:59:00&ccs=AR,AS,BU,DP,DR,DU,FR,HO,VT,RO,SX,TH,VA,VB,WE&xmin=-8577539.984326074&ymin=4369220.056681086&xmax=-8402193.441439813&ymax=4444739.840626868&faid=0b80bce5-5d21-468b-ae81-3d6e2ecf532e"
r = requests.get(url)
soup = BeautifulSoup(r.text)
table = soup.findAll('table')[0]
crimelist = []
for row in table.findAll('tr')[1:]:
if row.text.strip()=='':
pass
else:
cells = row.findAll('td')[1:]
entry = [cell.text for cell in cells]
crimelist.append(entry)
# then you could output to csv, like this:
header = ['Incident', 'ID', 'Address', 'Agency', 'Datetime']
outfile = "[PATH TO YOUR CSV FILE]"
with open(outfile, 'wb') as f:
cWriter=csv.writer(f)
cWriter.writerow(header)
for line in crimelist:
cWriter.writerow(line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment