Rufus Pollock rufuspollock

Additions wanted - please just fork and add.

Tutorials

Parsing PDFs by Thomas Levine
[Get Started With Scraping – Extracting Simple Tables from PDF Documents][scoda-simple-tables]

	count \| sum \| DepartmentFamilyNameCanonical
	---------+-----------------------+----------------------------------------------------
	3159056 \| 393285808916.97 \| Department of Health
	238828 \| 227002821119.32 \| Department of Communities & Local Government
	239218 \| 179384601858.26 \| Department for Education
	197897 \| 117044628895.78 \| Department for Business, Innovation and Skills
	199113 \| 46335079639.94 \| Department For Transport
	35539 \| 24392624386.64 \| Home Office
	55776 \| 16089108023.16 \| Department for International Development
	109110 \| 15738053030.67 \| Ministry of Defence

	import json
	x = json.load(open('datapackage.json'))
	finfo = x['files'][0]
	for idx, f in enumerate(finfo['schema']['fields']):
	print idx, f['id']

	#!/usr/bin/env python
	# A simple Python script to convert csv files to sqlite (with type guessing)
	#
	# @author: Rufus Pollock
	# Placed in the Public Domain
	import csv
	import sqlite3

	def convert(filepath_or_fileobj, dbpath, table='data'):
	if isinstance(filepath_or_fileobj, basestring):

	// the location we want to GeoCode
	var location = 'London';

	// we are using MapQuest's Nominatim service
	var geocode = 'http://open.mapquestapi.com/search?format=json&q=' + location;

	// use jQuery to call the API and get the JSON results
	$.getJSON(geocode, function(data) {
	// get lat + lon from first match
	var latlng = [data[0].lat, data[0].lon]

	{
	"manifest_version": 1,
	"created": "2013-01-05T17:54:06.522Z",
	"scripts": [
	{
	"created": "2013-01-05T17:54:06.523Z",
	"last_modified": "2013-01-05T17:54:06.523Z",
	"language": "javascript",
	"content": "// correct the field type to date so it renders correctly\ndataset.fields[0].type = 'date';\n// save the dataset\nsaveDataset(dataset);",
	"id": "main.js"

	{
	"manifest_version": 1,
	"created": "2012-12-28T20:32:39.564Z",
	"scripts": [
	{
	"created": "2012-12-30T19:41:43.446Z",
	"last_modified": "2012-12-30T19:41:43.446Z",
	"language": "javascript",
	"content": "print(\"hello world\");\nprint('Fields: ', dataset.fields);\n// let's compute inflation\ndataset.data = _.map(dataset.data.slice(1), function(record, idx) {\n record.inflation = 100 * (dataset.data[idx+1].CDKO - dataset.data[idx].CDKO)/dataset.data[idx].CDKO;\n return record;\n});\ndataset.fields.push({id: 'inflation'});\nprint(dataset.data[0]);\nsaveDataset(dataset);\nprint('here again 6');",
	"id": "main.js"

	''' Upload datawrangling handbook to wordpress site.

	Copy this file to same directory as your sphinx build directory and then do

	python upload.py -h

	NB: You need to enable XML-RPC access to the wordpress site (via Settings -> Writing)

	NB: this requires pywordpress (pip install pywordpress) and associated config
	file - see https://github.com/rgrp/pywordpress

	var jsdom = require('jsdom');
	var fs = require('fs');
	// var jquery = fs.readFileSync("./jquery-1.7.1.min.js").toString();

	var linklist = 'http://police.uk/data';

	jsdom.env({
	html: linklist,
	scripts: [
	'http://code.jquery.com/jquery.js'

	{
	"datasets": {
	"adur_district_spending": {
	"author": "Lucy Chambers",
	"author_email": "",
	"extras": {
	"spatial-text": "Adur, West Sussex, South East England, England, United Kingdom",
	"spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [-0.3715, 50.8168],[-0.3715, 50.8747], [-0.2155, 50.8747], [-0.2155, 50.8168], [-0.3715, 50.8168] ] ] }"
	},
	"license": "License Not Specified",