This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// $ node read.js nt.00.nin | |
var fs = require('fs') | |
var int53 = require('int53') | |
f = process.argv[2] | |
function read (fd, start, end, cb) { | |
var len = end - start + 1 | |
var buf = new Buffer(len) | |
fs.read(fd, buf, 0, len, start, function (err) { | |
cb(err, buf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ENV NSPAWN_BOOTSTRAP_IMAGE_SIZE=10GB | |
FROM ubuntu:xenial | |
# set unlimited bash history | |
# nspawn needs resolv.conf to be set up for internet to work | |
# password gets changed so we can login later | |
RUN mkdir /usr/local/anacapa && \ | |
cd /usr/local/anacapa && \ | |
echo "export HISTFILESIZE=" >> .bashrc && \ | |
echo "export HISTSIZE=" >> .bashrc && \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var $ = require('cheerio') | |
var fs = require('fs') | |
var walker = require('folder-walker') | |
var transform = require('parallel-transform') | |
var ndjson = require('ndjson') | |
var walk = walker('./pageblobs') // generated by abstract-blob-store | |
var scraper = transform(10, scrape) | |
var out = ndjson.serialize() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var hyperdb = require('hyperdb') | |
var hyperdiscovery = require('hyperdiscovery') | |
var npmkey = '0f8a60595af5387d52b053af4a8a4aecd5d6d3799741c3993916798e71ea0730' | |
var db = hyperdb('./npm.db', npmkey, {sparse: true, valueEncoding: 'json'}) | |
db.on('ready', function () { | |
var swarm = hyperdiscovery(db, {live: true}) | |
db.once('remote-update', function () { | |
db.get('/modules/aws.js', function (err, data) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs') | |
var request = require('request') | |
var through = require('through2') | |
var ndjson = require('ndjson') | |
var once = require('once') | |
var pump = require('pump') | |
var concat = require('concat-stream') | |
var parallel = require('parallel-transform') | |
var hyperdb = require('hyperdb') | |
var db = hyperdb('./npm.db', {valueEncoding: 'json'}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// data from geojson lines file from https://mapzen.com/data/metro-extracts/ | |
cat portland_oregon_osm_line.geojson | jsonfilter features.* > lines.ndjson | |
cat lines.ndjson | jsonfilter --match="this.properties.bicycle && this.properties.bicycle !== 'no'" > sharedpaths.json | |
cat lines.ndjson | grep "cycleway" >> sharedpaths.json | |
cat sharedpaths.json | sort | uniq > dedupe.json | |
mv dedupe.json sharedpaths.json | |
cat sharedpaths.json | ndjson-reduce | ndjson-map '{type: "FeatureCollection", features: d}' > sharedpaths.geojson | |
mkdir shp | |
cd shp | |
ogr2ogr -f "ESRI Shapefile" sharedpaths.shp ../sharedpaths.geojson OGRGeoJSON |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
379490 referenceworks.brillonline.com | |
377682 doi.apa.org | |
244045 primarysources.brillonline.com | |
189587 f1000.com | |
106769 www.iucnredlist.org | |
78961 www.e-enlightenment.com | |
67194 doi.namesforlife.com | |
20335 www.degruyter.com | |
17940 www.icpsr.umich.edu | |
17044 www.scivee.tv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
672055 www.ccdc.cam.ac.uk | |
618996 figshare.com | |
493410 rgdoi.net | |
487454 plutof.ut.ee | |
378396 ba.e-pics.ethz.ch | |
376822 retro.seals.ch | |
373193 www.die-bonn.de | |
358476 doi.pangaea.de | |
313951 www.gbif.org | |
237629 www.hepdata.net |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
npm install gunzip-maybe xml-json jsonfilter nugget -g | |
curl "https://search.datacite.org/sitemaps/sitemap.xml.gz" | gunzip-maybe | xml-json sitemapindex | jsonfilter sitemap.*.loc | xargs nugget -d datacite | |
ls datacite | xargs -I {} sh -c "cat datacite/{} | gunzip-maybe | xml-json urlset | jsonfilter url.*.loc | grep works" >> urls.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var request = require('request') | |
var base = 'https://api.crossref.org/works?filter=type:dataset&rows=1000' | |
doNext() | |
function doNext (cursor) { | |
if (!cursor) cursor = '*' | |
var url = base + '&cursor=' + cursor | |
console.error('GET', url) |