Additions wanted - please just fork and add.
- Parsing PDFs by Thomas Levine
- [Get Started With Scraping – Extracting Simple Tables from PDF Documents][scoda-simple-tables]
Additions wanted - please just fork and add.
| 2010-11-P01.csv:4:Vendor,Expense Description,Amount,Doc No,,,^M | |
| 2010-11-P02.csv:6:Vendor,Expense Description,Amount,Doc No,,,^M | |
| 2010-11-P03.csv:6:Document No","Amount | |
| 2010-11-P04-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
| 2010-11-P05-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
| 2010-11-P06-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
| 2010-11-P07-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
| 2010-11-P08-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
| 2010-11-P09-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
| 2010-11-P10-500.csv:1:Vendor ID,Vendor Name,Cos |
| var nodeUtil = require("util"), | |
| PFParser = require("pdf2json") | |
| ; | |
| var pdfParser = new PFParser(); | |
| pdfParser.on("pdfParser_dataReady", function(data) { | |
| console.log('here'); | |
| console.log(data); | |
| console.log(data.data.Pages[0]); |
| // Geocoding using Mapquest Nominatim API | |
| // | |
| // Documentation for the API: http://wiki.openstreetmap.org/wiki/Nominatim | |
| // Here's an example query: http://open.mapquestapi.com/nominatim/v1/search?q=detroit&format=json | |
| // geocode function | |
| // | |
| // :param place: is a place name like "Detroit" or "London" | |
| // :callback: function receiving arguments (error, {lon: ..., lat: ...}) | |
| function geocode(place, callback) { |
| // convert a title to a slug | |
| // | |
| // lowercase, replace ' ' by '-' and remove everything that is not alphanumeric, underscore or dash | |
| var slug = title | |
| .toLowerCase() | |
| .replace(/ /g, '-') | |
| .replace(/--+/g, '-') | |
| .replace(/[^\w-]+/g, '') | |
| ; |
| import urlparse | |
| import json | |
| import requests | |
| # set your api key for this work | |
| apikey = 'XXXXX' | |
| datapusher_url = 'http://datapusher-test.herokuapp.com' | |
| ckan_url = 'http://datahub.io' | |
| # gold prices | |
| res_id = 'b9aae52b-b082-4159-b46f-7bb9c158d013' |
README is empty
README is empty
| '''Run this script and it will export a list of all CKAN extensions on github | |
| (guessed by repo name containing ckanext) to json and csv files in in this directory | |
| ''' | |
| import urllib | |
| import json | |
| import csv | |
| jsonfp = 'extensions-gh.json' | |
| csvfp = 'extensions-gh.csv' |