Last active
October 24, 2015 10:25
-
-
Save unusualbob/51ef4e501ea5a9f0ded1 to your computer and use it in GitHub Desktop.
A quick script to import a csv from a stream
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'); | |
var csv = require('csv-stream'); | |
var MongoClient = require('mongodb').MongoClient; | |
//Put the absolute path to your csv file here, example '/home/windam/file.csv' | |
var fileToLoad = ''; | |
//This assumes mongo is running on the current host on default port, and that your db is called 'myDbName' | |
//If not then change it :) | |
var mongodbConnectionString = 'mongodb://127.0.0.1:27017/myDbName'; | |
var csvStream = csv.createStream({ | |
//You should replace these with the actual field names | |
columns: ['fieldName1', 'fieldName2', 'fieldName3', 'fieldName4', 'fieldName5'] | |
}); | |
//By default the parser will assume every value is a string, if you have other data types such as numbers you can put those | |
//field names here and it they will be converted correctly, you can leave it empty if there are none ex: fieldsWhichAreNumbers = []; | |
var fieldsWhichAreNumbers = ['fieldName2', 'fieldName5']; | |
var fieldsWhichAreDates = ['fieldName3']; | |
var fieldsWhichAreBooleans = ['fieldName4']; | |
var i = 0; | |
MongoClient.connect(mongodbConnectionString, function(err, db) { | |
if(err) throw err; | |
fs.createReadStream(fileName).pipe(csvStream); | |
csvStream.on('error', function(err) { | |
console.log("Error parsing csv", err); | |
}); | |
csvStream.on('data', function(data) { | |
//Converts data to types you want | |
Object.keys(data).forEach(function(fieldName) { | |
if (fieldsWhichAreNumbers.indexOf(fieldName) !== -1) { | |
data[fieldName] = castToNumber(data[fieldName]); | |
} | |
else if (fieldsWhichAreDates.indexOf(fieldName) !== -1) { | |
data[fieldName] = castToTimestamp(data[fieldName]); | |
} | |
else if (fieldsWhichAreBooleans.indexOf(fieldName) !== -1) { | |
data[fieldName] = castToBoolean(data[fieldName]); | |
} | |
}); | |
i++; | |
if (i % 10 === 0) { | |
console.log("Imported lines:", i); | |
} | |
db.collection('consumer').insertOne(data); | |
}); | |
function castToNumber(value) { | |
if (!value) return 0; | |
return Number(value); | |
} | |
function castToTimestamp(value) { | |
return new Date(value); | |
} | |
function castToBoolean(value) { | |
return value.trim().toLowerCase() == "true"; | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment