Additions wanted - please just fork and add.
- Parsing PDFs by Thomas Levine
- [Get Started With Scraping – Extracting Simple Tables from PDF Documents][scoda-simple-tables]
Additions wanted - please just fork and add.
count | sum | DepartmentFamilyNameCanonical | |
---------+-----------------------+---------------------------------------------------- | |
3159056 | 393285808916.97 | Department of Health | |
238828 | 227002821119.32 | Department of Communities & Local Government | |
239218 | 179384601858.26 | Department for Education | |
197897 | 117044628895.78 | Department for Business, Innovation and Skills | |
199113 | 46335079639.94 | Department For Transport | |
35539 | 24392624386.64 | Home Office | |
55776 | 16089108023.16 | Department for International Development | |
109110 | 15738053030.67 | Ministry of Defence |
import json | |
x = json.load(open('datapackage.json')) | |
finfo = x['files'][0] | |
for idx, f in enumerate(finfo['schema']['fields']): | |
print idx, f['id'] |
#!/usr/bin/env python | |
# A simple Python script to convert csv files to sqlite (with type guessing) | |
# | |
# @author: Rufus Pollock | |
# Placed in the Public Domain | |
import csv | |
import sqlite3 | |
def convert(filepath_or_fileobj, dbpath, table='data'): | |
if isinstance(filepath_or_fileobj, basestring): |
// the location we want to GeoCode | |
var location = 'London'; | |
// we are using MapQuest's Nominatim service | |
var geocode = 'http://open.mapquestapi.com/search?format=json&q=' + location; | |
// use jQuery to call the API and get the JSON results | |
$.getJSON(geocode, function(data) { | |
// get lat + lon from first match | |
var latlng = [data[0].lat, data[0].lon] |
{ | |
"manifest_version": 1, | |
"created": "2013-01-05T17:54:06.522Z", | |
"scripts": [ | |
{ | |
"created": "2013-01-05T17:54:06.523Z", | |
"last_modified": "2013-01-05T17:54:06.523Z", | |
"language": "javascript", | |
"content": "// correct the field type to date so it renders correctly\ndataset.fields[0].type = 'date';\n// save the dataset\nsaveDataset(dataset);", | |
"id": "main.js" |
{ | |
"manifest_version": 1, | |
"created": "2012-12-28T20:32:39.564Z", | |
"scripts": [ | |
{ | |
"created": "2012-12-30T19:41:43.446Z", | |
"last_modified": "2012-12-30T19:41:43.446Z", | |
"language": "javascript", | |
"content": "print(\"hello world\");\nprint('Fields: ', dataset.fields);\n// let's compute inflation\ndataset.data = _.map(dataset.data.slice(1), function(record, idx) {\n record.inflation = 100 * (dataset.data[idx+1].CDKO - dataset.data[idx].CDKO)/dataset.data[idx].CDKO;\n return record;\n});\ndataset.fields.push({id: 'inflation'});\nprint(dataset.data[0]);\nsaveDataset(dataset);\nprint('here again 6');", | |
"id": "main.js" |
''' Upload datawrangling handbook to wordpress site. | |
Copy this file to same directory as your sphinx build directory and then do | |
python upload.py -h | |
NB: You need to enable XML-RPC access to the wordpress site (via Settings -> Writing) | |
NB: this requires pywordpress (pip install pywordpress) and associated config | |
file - see https://github.com/rgrp/pywordpress |
var jsdom = require('jsdom'); | |
var fs = require('fs'); | |
// var jquery = fs.readFileSync("./jquery-1.7.1.min.js").toString(); | |
var linklist = 'http://police.uk/data'; | |
jsdom.env({ | |
html: linklist, | |
scripts: [ | |
'http://code.jquery.com/jquery.js' |
{ | |
"datasets": { | |
"adur_district_spending": { | |
"author": "Lucy Chambers", | |
"author_email": "", | |
"extras": { | |
"spatial-text": "Adur, West Sussex, South East England, England, United Kingdom", | |
"spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [-0.3715, 50.8168],[-0.3715, 50.8747], [-0.2155, 50.8747], [-0.2155, 50.8168], [-0.3715, 50.8168] ] ] }" | |
}, | |
"license": "License Not Specified", |