Skip to content

Instantly share code, notes, and snippets.

@cjwinchester
Created August 10, 2015 21:25
Show Gist options
  • Save cjwinchester/e4ec5f312294683627fb to your computer and use it in GitHub Desktop.
Save cjwinchester/e4ec5f312294683627fb to your computer and use it in GitHub Desktop.
Douglas County Sheriff warrant scraper
import requests
from bs4 import *
from time import *
import re
from string import ascii_lowercase
f = open('dougwarrants.txt', 'wb')
baseurl = "http://omahasheriff.com/services/warrants/criminal?searchterm="
for letter in ascii_lowercase:
r = requests.get(baseurl + letter)
soup = BeautifulSoup(r.text)
links = []
table = soup.find('table', {"class" : "warrants"})
for row in table.findAll('tr')[1:]:
col = row.findAll('td')
warrantnumber = col[1].string.split("-")[0].strip()
links.append(warrantnumber)
lbase = "http://omahasheriff.com/services/warrants/criminal/warrant-details?wid="
for link in links:
x = requests.get(lbase + link)
serp = BeautifulSoup(x.text)
div = serp.find('div', {'id':'bodybottom'})
tab = div.find('table')
rec = []
for i, cell in enumerate(tab.findAll('td')):
if i % 2 != 0:
wut = re.sub(' +',' ',cell.text.strip().replace("\t",""))
rec.append(wut)
print wut
f.write("|".join(rec) + "\n")
sleep(1)
f.flush()
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment