Created
March 22, 2015 11:18
-
-
Save bobylito/e2d13e78ece07a97f0ba to your computer and use it in GitHub Desktop.
Import data to Algolia with Node.js streams.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// npm install agentkeepalive batch-stream csv-parse stream-transform | |
var HttpsAgent = require('agentkeepalive').HttpsAgent; | |
var Algolia = require('algolia-search'); | |
var stream = require( 'stream' ); | |
var parse = require('csv-parse'); | |
var fs = require('fs') | |
var transform = require('stream-transform'); | |
var Batch = require( 'batch-stream' ); | |
var args = process.argv.slice(2); | |
if( args.length < 2 ) { | |
console.log( "Usage : node import.js APP_ID API_KEY" ); | |
return 1; | |
} | |
var fileStream = fs.createReadStream( 'datasets/datasets.csv', { autoclose : true } ); | |
var parser = parse( { comment: '#', delimiter : ";" } ); | |
fileStream.pipe( parser ) | |
.pipe( transform( toAlgoliaRecord ) ) | |
.pipe( new Batch( { size : 10000 } ) ) | |
.pipe( algoliaSaveStream( args ) ); | |
function toAlgoliaRecord( data, cb ) { | |
var record = { | |
"objectID" : data[0], | |
"title" : data[1], | |
"slug" : data[2], | |
"url" : data[3], | |
"organization" : data[4], | |
"organization_id" : data[5], | |
"supplier" : data[6], | |
"description" : data[7], | |
"frequency" : data[8], | |
"license" : data[9], | |
"private" : ( data[10] === "True" ? true : false ), | |
"featured" : ( data[11] === "True" ? true : false ), | |
"created_at" : Date.parse( data[12] ), | |
"last_modified" : Date.parse( data[13] ), | |
"tags" : data[14].split( "," ), | |
"metric.nb_uniq_visitors" : parseInt( data[15] ), | |
"metric.views" : parseInt( data[16] ), | |
"metric.followers" : parseInt( data[17] ), | |
"metric.reuses" : parseInt( data[18] ), | |
"metric.nb_visits" : parseInt( data[19] ), | |
"metric.nb_hits" : parseInt( data[20] ), | |
"metric.issues" : parseInt( data[21] ) | |
}; | |
cb( null, record ); | |
} | |
function algoliaSaveStream( parameters ) { | |
var appId = parameters[ 0 ]; | |
var apiKey = parameters[ 1 ]; | |
var keepaliveAgent = new HttpsAgent({ | |
maxSockets: 1, | |
maxKeepAliveRequests: 0, // no limit on max requests per keepalive socket | |
maxKeepAliveTime: 30000 // keepalive for 30 seconds | |
}); | |
var client = new Algolia(appId, apiKey, keepaliveAgent); | |
var index = client.initIndex( "opendatafrance" ); | |
var streamToAlgolia = new stream.Stream() | |
streamToAlgolia.writable = true; | |
streamToAlgolia.write = function ( data ) { | |
index.saveObjects( data, function( error, content ) { | |
if ( error ) console.error( "ERROR: %s", content.message ); | |
} ); | |
return true; | |
} | |
streamToAlgolia.end = function (data) { | |
} | |
return streamToAlgolia; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Looks good, feel free to use http://www.realtime-search.com/ to share your engine!