Skip to content

Instantly share code, notes, and snippets.

View higs4281's full-sized avatar

william higgins higs4281

  • Consumer Financial Protection Bureau
View GitHub Profile
@higs4281
higs4281 / gist:1961817
Created March 2, 2012 22:18
bs4 crime scrape
from bs4 import BeautifulSoup
import requests
import csv
url = "http://www.crimemapping.com/DetailedReport.aspx?db=1/12/2012+00:00:00&de=1/25/2012+23:59:00&ccs=AR,AS,BU,DP,DR,DU,FR,HO,VT,RO,SX,TH,VA,VB,WE&xmin=-8577539.984326074&ymin=4369220.056681086&xmax=-8402193.441439813&ymax=4444739.840626868&faid=0b80bce5-5d21-468b-ae81-3d6e2ecf532e"
r = requests.get(url)
soup = BeautifulSoup(r.text)
table = soup.findAll('table')[0]
crimelist = []
for row in table.findAll('tr')[1:]:
import re
def kml_fixer(DIRECTORY, FILENAME):
file_in=DIRECTORY+'/'+FILENAME
file_out=DIRECTORY+'/NEW_'+FILENAME
with open(file_in, "r+") as f:
old = f.read()
x = re.findall(r'<Point>\n.*\n.*<\/Point>', old)
for each in x:
old = old.replace(each, "")
@higs4281
higs4281 / gist:3378487
Created August 17, 2012 12:36
widget string
prestring=['MugShotWidget2.serverResponse([\' <div class="ms_outer_container"><a href="http://www.tampabay.com/mugshots/" target="_blank"><img src="http://mugshots.s3.amazonaws.com/site-images/widget-header.gif" alt="Tampa Bay Mug Shots" /></a><div class="ms_toptext">Meet the latest eight people booked in Pinellas, Hillsborough, Manatee and Pasco counties.</div>']
poststring=[]
for each in mugs:
pair = "%s %s" % (each.first_name, each.last_name)
poststring.append('<a href="http://www.tampabay.com/mugshots/" target="_blank"><img class="ms_mug" src="%s" title="%s" alt="%s" /></a>' % (each.signed_photo, pair, pair))
for each in poststring:
prestring.append(each)
widget = "".join(prestring)+"\'])"
<style> #elex {color: #fff; width:578px; height:125px; background-image:url('http://tampabay.com/tbprojects/elections/2012/results/img/bunting578.jpg'); background-repeat:no-repeat; margin:0; padding:0;} .elex_logo {text-indent:-2000px;background-image: url('http://tampabay.com/tbprojects/elections/2012/results/img/election-title.png'); background-repeat:no-repeat; height:60px; width:390px; margin-left:auto;margin-right:auto;position:relative;top:20px;margin-bottom:20px;} #elex_brick_ul {color: #fff; list-style:none; margin:10px 20px; padding:0; overflow:hidden; font-family:helvetica,sans; font-weight:bold; width:578px; } .elex_brick_li {color: #fff; float:left; padding:2px; margin-top:1px; list-style-type:none; display:block; text-transform:uppercase; font-weight:bold;} .elex_brick_li a:hover { text-decoration:underline; background-color:#173f8a; font-weight:bold; } .elex_brick {display: block; color: #fff; padding-top: 3px; padding-right: 8px; padding-bottom: 3px; padding-left: 8px; width: auto; text-dec
@higs4281
higs4281 / gist:6358296
Last active December 21, 2015 19:59
scraper
# scraper for joni james
# author: bill higgins, 8/27/2013
import csvkit
import requests
from bs4 import BeautifulSoup as bs
import datetime
starter = datetime.datetime.now()
import csv
import json
import requests
url = 'https://www.googleapis.com/fusiontables/v1/query?key=AIzaSyAYkamn8YeEjHRpv892i26Mfv6i09eEdPM&sql=SELECT%20beach_id,%20name,%20county,%20mid_lat,%20mid_lon,%20samples,%20pct_samples_bav,%20loc_valid%20FROM%201uK8UlIIOG59txfGjH2WxrDIr_KIxR9lGixkqTple%20WHERE%20state%20=%20%27FL%27&typed=true&callback=jQuery17105043562010396272_1404232573870&_=1404232574998'
jtext = requests.get(url).text.split('jQuery17105043562010396272_1404232573870(')[1].strip(');')
beaches_dict = json.loads(jtext)
with open('beach_data.csv', 'w') as f:
writer = csv.writer(f)
@higs4281
higs4281 / django_queries.py
Last active August 29, 2015 14:03
django queries
"""
python data structures:
https://docs.python.org/2/tutorial/datastructures.html
reference pages
https://docs.djangoproject.com/en/1.5/
query filters
https://docs.djangoproject.com/en/1.5/ref/models/querysets/#methods-that-return-new-querysets
@higs4281
higs4281 / arrest_geo_query.py
Last active August 29, 2015 14:04
arrest geo query
from arrestee.models import Arrestee
from geopy.distance import distance as geo_distance
from geopy import Point
hildrugs = Arrestee.objects.filter(
booking_county__name='Hillsborough',
arrest_date__gte=datetime.datetime(2014, 2, 1),
arrest_date__lt=datetime.datetime(2014, 4, 1),
charge__code__category='Narcotics').distinct()#1,210 arrests
hits=[]# we'll use this list to collect arrests
@higs4281
higs4281 / provider.py
Created July 23, 2014 14:11
child care service scraper
# scraper for joni james
# author: bill higgins, 8/27/2013
import csvkit
import requests
from bs4 import BeautifulSoup as bs
import datetime
starter = datetime.datetime.now()
@higs4281
higs4281 / enrollment.py
Created August 23, 2014 01:27
extracting csv
from bs4 import BeautifulSoup as bs
from csvkit import CSVKitWriter as ckw
with open('enrollment.html', 'r') as f:
soup = bs(f.read())
rows = soup.find('table').findAll('tr')
header = [td.text for td in rows[0].findAll('td')]
with open('enrollment.csv', 'w') as f:
writer = ckw(f)