Additions wanted - please just fork and add.
- Parsing PDFs by Thomas Levine
- [Get Started With Scraping – Extracting Simple Tables from PDF Documents][scoda-simple-tables]
Additions wanted - please just fork and add.
| count | sum | DepartmentFamilyNameCanonical | |
| ---------+-----------------------+---------------------------------------------------- | |
| 3159056 | 393285808916.97 | Department of Health | |
| 238828 | 227002821119.32 | Department of Communities & Local Government | |
| 239218 | 179384601858.26 | Department for Education | |
| 197897 | 117044628895.78 | Department for Business, Innovation and Skills | |
| 199113 | 46335079639.94 | Department For Transport | |
| 35539 | 24392624386.64 | Home Office | |
| 55776 | 16089108023.16 | Department for International Development | |
| 109110 | 15738053030.67 | Ministry of Defence |
| import json | |
| x = json.load(open('datapackage.json')) | |
| finfo = x['files'][0] | |
| for idx, f in enumerate(finfo['schema']['fields']): | |
| print idx, f['id'] |
| #!/usr/bin/env python | |
| # A simple Python script to convert csv files to sqlite (with type guessing) | |
| # | |
| # @author: Rufus Pollock | |
| # Placed in the Public Domain | |
| import csv | |
| import sqlite3 | |
| def convert(filepath_or_fileobj, dbpath, table='data'): | |
| if isinstance(filepath_or_fileobj, basestring): |
| // the location we want to GeoCode | |
| var location = 'London'; | |
| // we are using MapQuest's Nominatim service | |
| var geocode = 'http://open.mapquestapi.com/search?format=json&q=' + location; | |
| // use jQuery to call the API and get the JSON results | |
| $.getJSON(geocode, function(data) { | |
| // get lat + lon from first match | |
| var latlng = [data[0].lat, data[0].lon] |
| { | |
| "manifest_version": 1, | |
| "created": "2013-01-05T17:54:06.522Z", | |
| "scripts": [ | |
| { | |
| "created": "2013-01-05T17:54:06.523Z", | |
| "last_modified": "2013-01-05T17:54:06.523Z", | |
| "language": "javascript", | |
| "content": "// correct the field type to date so it renders correctly\ndataset.fields[0].type = 'date';\n// save the dataset\nsaveDataset(dataset);", | |
| "id": "main.js" |
| { | |
| "manifest_version": 1, | |
| "created": "2012-12-28T20:32:39.564Z", | |
| "scripts": [ | |
| { | |
| "created": "2012-12-30T19:41:43.446Z", | |
| "last_modified": "2012-12-30T19:41:43.446Z", | |
| "language": "javascript", | |
| "content": "print(\"hello world\");\nprint('Fields: ', dataset.fields);\n// let's compute inflation\ndataset.data = _.map(dataset.data.slice(1), function(record, idx) {\n record.inflation = 100 * (dataset.data[idx+1].CDKO - dataset.data[idx].CDKO)/dataset.data[idx].CDKO;\n return record;\n});\ndataset.fields.push({id: 'inflation'});\nprint(dataset.data[0]);\nsaveDataset(dataset);\nprint('here again 6');", | |
| "id": "main.js" |
| ''' Upload datawrangling handbook to wordpress site. | |
| Copy this file to same directory as your sphinx build directory and then do | |
| python upload.py -h | |
| NB: You need to enable XML-RPC access to the wordpress site (via Settings -> Writing) | |
| NB: this requires pywordpress (pip install pywordpress) and associated config | |
| file - see https://github.com/rgrp/pywordpress |
| var jsdom = require('jsdom'); | |
| var fs = require('fs'); | |
| // var jquery = fs.readFileSync("./jquery-1.7.1.min.js").toString(); | |
| var linklist = 'http://police.uk/data'; | |
| jsdom.env({ | |
| html: linklist, | |
| scripts: [ | |
| 'http://code.jquery.com/jquery.js' |
| { | |
| "datasets": { | |
| "adur_district_spending": { | |
| "author": "Lucy Chambers", | |
| "author_email": "", | |
| "extras": { | |
| "spatial-text": "Adur, West Sussex, South East England, England, United Kingdom", | |
| "spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [-0.3715, 50.8168],[-0.3715, 50.8747], [-0.2155, 50.8747], [-0.2155, 50.8168], [-0.3715, 50.8168] ] ] }" | |
| }, | |
| "license": "License Not Specified", |