Skip to content

Instantly share code, notes, and snippets.

@nickjevershed
nickjevershed / getgenres.py
Created January 24, 2014 00:09
Classifies a csv of songs with last.fm tags, EchoNest terms and genres, gets the number of YouTube views, and a few other things
#!/usr/bin/env python
#coding=utf-8
import pylast
import csv
import urllib
import requests
import simplejson
import time
#Gets Last FM data - top 3 tags, duration of track, and lastFM listener count
@nickjevershed
nickjevershed / donations.py
Created February 3, 2014 05:02
Scraper for Australian political party annual returns
import csv
import mechanize
import lxml.html
import scraperwiki
annDonorsurl = "http://periodicdisclosures.aec.gov.au/AnalysisDonor.aspx"
annReportingPeriods={
"1998-1999":"1",
"1999-2000":"2",
@nickjevershed
nickjevershed / Labor-network-graph.html
Created March 31, 2014 22:37
Network graph for Labor funds via associated entities
<!DOCTYPE html>
<meta charset="utf-8">
<title>How money flows to the Labor party | The Guardian</title>
<style>
body {
margin:0;
}
.donorContainer {
@nickjevershed
nickjevershed / csv-read-write.py
Last active August 29, 2015 13:57
A template for reading a csv, then writing to a new csv with a new row
import csv
with open('input.csv','rU') as csvinput:
with open('output.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
reader = csv.reader(csvinput, lineterminator='\n')
#Get the headers from the old csv, add to new csv and add our new column header
headers = reader.next()
@nickjevershed
nickjevershed / checkloop.py
Created April 1, 2014 05:45
checks for source/target loops and tells you what they are
dons = #source-target dataset here
for s in dons:
for ss in dons:
if s['sourceName'] == ss['targetName'] and s['targetName'] == ss['sourceName']:
print s['sourceName'], s['targetName']
@nickjevershed
nickjevershed / parse_captcha.py
Created June 20, 2014 01:09
OCR for captchas
import sys
import os
import re
import subprocess
import tempfile
from PIL import Image
def parse_captcha(filename):
"""Return the text for thie image using Tesseract
import simplejson as json
with open("blah.json") as f:
blah = json.load(f)
print blah['features'][0]['properties']['headline']
@nickjevershed
nickjevershed / expenses-pdf-scraper.py
Created July 23, 2014 09:30
A scraper for getting politicians' travel expenses from PDF
#!/usr/bin/env python
import scraperwiki
import urllib2
import lxml.etree
urls = ["http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ABBOTT_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALBANESE_Anthony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALEXANDER_John.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BALDWIN_Bob.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BIRD_Sharon.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BISHOP_Bronwyn.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BOWEN_Chris.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BRADBURY_David.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BURKE_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_CAMERON_Doug.pdf","http://www.finance.gov.au/sites/defa
@nickjevershed
nickjevershed / detention-centres.csv
Created August 19, 2014 06:02
List of Australian detention centres and locations
name centre latitude longitude
Brisbane ITA Brisbane_ITA -27.401747 153.104782
Curtin IDC Curtin_IDC -17.38101 123.677216
Maribyrnong IDC Maribyrnong_IDC -37.780035 144.880142
Northern IDC Northern_IDC -12.425709 130.900211
Perth IDC Perth_IDC -31.934562 115.958118
Scherger IDC Scherger_IDC -12.633869 141.888428
Villawood IDC Villawood_IDC -33.878279 150.987339
Christmas Island Christmas_Island -10.488044 105.611572
Melbourne ITA Melbourne_ITA -37.841807 144.952068
@nickjevershed
nickjevershed / immi-contracts.py
Created August 22, 2014 01:35
classification of immigration department contracts
import csv
import re
fList = ['client','detention','detain','manus','nauru','cocos','keeling','christmas','refugee','unaccompanied','humanitarian','minor','staff accomodation','curtin','villawood','scherger','inverbrackie','derby','construction camp','ita','idc','apod','irh','darwin airport','berrimah','bladin','wickham','phosphate','aqua','lilac','maribyrnong','inverbrackie','serco','transfield','g4s','gsl','toll']
nList = ['pontville','CI','weipa','regional','processing','IMA','tamil','farsi','afghanistan','screening','woomera','yongah']
with open('immigration-contracts.csv','rU') as csvinput:
with open('output.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')