-
download the extension (crx is a zip)
curl -L -o memento.zip "https://clients2.google.com/service/update2/crx?response=redirect&x=id%3Djgbfpjledahoajcppakbgilmojkaghgm%26uc" -
unpack the extension
unzip -d memento memento.zip
| #!/bin/bash | |
| . heritrix.conf | |
| if [ -z "$1" ] || [ -z "$2" ]; then | |
| echo usage: $0 jobname seedsfile | |
| exit | |
| fi | |
| JOB=$1 |
| source "http://rubygems.org" | |
| gem "oai", :git => "https://github.com/tjdett/ruby-oai.git", :branch => "seamless-resumption" | |
| gem "redis" | |
| gem "libxml-ruby" |
| [ | |
| { | |
| "op": "core/column-addition", | |
| "description": "Create column url at index 2 based on column Column 2 using expression jython:import httplib\nconn = httplib.HTTPConnection(\"dx.doi.org\")\ndoi = \"/\"+value\nconn.request(\"HEAD\", doi)\nres = conn.getresponse()\nreturn res.getheader('location')", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "newColumnName": "url", | |
| "columnInsertIndex": 2, |
| ~ virtualenv env | |
| ~ source env/bin/activate | |
| ~ pip install git+https://github.com/nlevitt/warctools@tweaks | |
| ~ pip install pyOpenSSL | |
| ~ git clone git clone https://github.com/nlevitt/warcprox | |
| ~ cd warcprox | |
| ~ python warcprox.py --rollover-idle-time=7200 | |
| 2013-10-20 14:36:07,923 66818 MainThread INFO server_activate(warcprox.py:346) listening on 127.0.0.1:8080 | |
| 2013-10-20 14:36:07,924 66818 MainThread INFO _read_ca(warcprox.py:75) read CA key+cert from ./warcprox-ca.pem | |
| 2013-10-20 14:36:07,928 66818 WarcWriterThread INFO run(warcprox.py:510) WarcWriterThread starting, directory=/private/tmp/warcprox/warcs gzip=False rollover_size=1000000000 rollover_idle_time=7200 prefix=WARCPROX port=8080 |
| 15:02:16.722 [..r.operations.NERProcess] Extracting named entities in column Autore on row 4 of 4. (3ms) | |
| 15:02:17.029 [..r.operations.NERProcess] The extractor failed (307ms) | |
| java.lang.IllegalArgumentException: dataTXT request failed. | |
| at org.freeyourmetadata.ner.services.DataTXT.parseExtractionResponseEntity(DataTXT.java:69) | |
| at org.freeyourmetadata.ner.services.NERServiceBase.parseExtractionResponseEntity(NERServiceBase.java:196) | |
| at org.freeyourmetadata.ner.services.NERServiceBase.performExtractionRequest(NERServiceBase.java:128) | |
| at org.freeyourmetadata.ner.services.NERServiceBase.extractNamedEntities(NERServiceBase.java:100) | |
| at org.freeyourmetadata.ner.operations.NERProcess$Extractor.run(NERProcess.java:210) |
(requires jq and gnu parallel)
~ make get
➜ ~ gem install wikidata
➜ ~ wikidata find -r "Antonio Gramsci"
Antonio Gramsci
Italian writer, politician, theorist, sociologist and linguist
Wikidata ID: Q83003
Claims: 22
+---------------------------+------+-----------------------------------+
| Property Label | id | value |
+---------------------------+------+-----------------------------------+
➜ ng ng-version
NailGun server version 0.9.2-SNAPSHOT
➜ ng ng-cp
file:/Users/raf/Downloads/fits-0.8.0/
file:/Users/raf/Downloads/fits-0.8.0/fits-ngserver.sh/lib/*.jar
file:/Users/raf/Downloads/fits-0.8.0/fits-ngserver.sh/lib/droid/*.jar
file:/Users/raf/Downloads/fits-0.8.0/fits-ngserver.sh/lib/jhove/*.jar
| $ otool -L scantailor | |
| scantailor: | |
| /usr/local/lib/QtGui.framework/Versions/4/QtGui (compatibility version 4.8.0, current version 4.8.5) | |
| /usr/local/lib/QtXml.framework/Versions/4/QtXml (compatibility version 4.8.0, current version 4.8.5) | |
| /usr/local/lib/QtCore.framework/Versions/4/QtCore (compatibility version 4.8.0, current version 4.8.5) | |
| /opt/X11/lib/libXrender.1.dylib (compatibility version 5.0.0, current version 5.0.0) | |
| /usr/local/lib/libtiff.5.dylib (compatibility version 8.0.0, current version 8.0.0) | |
| /usr/local/lib/libpng15.15.dylib (compatibility version 34.0.0, current version 34.0.0) | |
| /usr/lib/libz.1.dylib (compatibility version 1.0.0, current version 1.2.5) | |
| /usr/local/lib/libjpeg.8.dylib (compatibility version 13.0.0, current version 13.0.0) |