Skip to content

Instantly share code, notes, and snippets.

@omeroot
Last active January 22, 2016 20:43
Show Gist options
  • Select an option

  • Save omeroot/a4cb9b778e2a1fe2ec8a to your computer and use it in GitHub Desktop.

Select an option

Save omeroot/a4cb9b778e2a1fe2ec8a to your computer and use it in GitHub Desktop.
/*
*
* <<<< exclude the lines have same value by you want column on multiple csv files >>>>
*
*
* _ _
* | | (_)
* _ __ __ _ ___ | | __ __ _ __ _ ___ _ ___ ___ _ __
* | '_ \ / _` | / __|| |/ // _` | / _` | / _ \ | |/ __| / _ \ | '_ \
* | |_) || (_| || (__ | <| (_| || (_| || __/ _ | |\__ \| (_) || | | |
* | .__/ \__,_| \___||_|\_\\__,_| \__, | \___|(_)| ||___/ \___/ |_| |_|
* | | __/ | _/ |
* |_| |___/ |__/
*
*
* {
* "name": "node-csv-uniq",
* "version": "1.0.0",
* "description": "",
* "main": "app.js",
* "scripts": {
* "test": "echo \"Error: no test specified\" && exit 1"
* },
* "author": "demircanomer <demircanomer91@gmail.com>",
* "license": "ISC",
* "devDependencies": {
* "fast-csv": "^1.0.0"
* }
* }
*
*
*/
/*
*
*
* _ _ _____ _____ ______
* | | | | / ____| /\ / ____|| ____|
* | | | || (___ / \ | | __ | |__
* | | | | \___ \ / /\ \ | | |_ || __|
* | |__| | ____) |/ ____ \| |__| || |____
* \____/ |_____//_/ \_\\_____||______|
*
*
* $> npm install
*
* var excluder = new CSVExcluder();
*
* excluder.setInputDirectory("archive/") //set input directory (all files in directory)
* .setModel({
* name: 0, //name on index zero
* surname: 1, //surname on index one
* phone: 2, //phone on index two
* email: 3, //...
* hasWonDiscount: 4, //...
* gameCount: 5, //...
* highScore: 6, //...
* last: 7 //...
* })
* .setOutput("output/aa.csv") //set output OR if you only <output/> ,will generate random output file name
* .setChanger(function(data){ //if was find same data, select which data.hasWonDiscount is yes
* return data.hasWonDiscount == "yes";
* })
* .setSeperator(function(data){ //look at same value which key
* return data.name;
* })
*
* excluder.start();
*/
var fs = require("fs");
var csv = require("fast-csv");
var path = require("path");
function CSVExcluder(){
var inputDirectory = __dirname + "/";
var outputDirectory = __dirname + "/";
var totalCounter = 0;
var whos = [];
var seperator;
var changer;
var keys;
var model;
var files;
this.setInputDirectory = function(path){
inputDirectory = __dirname + "/" + path;
return this;
}
this.setModel = function(m){
model = m;
keys = Object.keys(model);
return this;
}
this.setSeperator = function(sp){
seperator = sp;
return this;
}
this.setChanger = function(ch){
changer = ch;
return this;
}
this.setOutput = function(output){
var info = path.parse(output);
var stat = fs.lstatSync(info.dir);
console.log(info.ext);
if(!stat.isDirectory()){
throw new Error("output is not directory", output);
}
if(info.ext !== ".csv"){
outputDirectory = __dirname + "/" + output + "/" + new Date().getTime().toString() + ".csv";
} else {
outputDirectory = __dirname + "/" + output;
}
console.log("OUTPUT", outputDirectory);
return this;
}
function execute(l, callback){
var stream = fs.createReadStream(inputDirectory + "/" + files[l]);
var detectCounter = 0;
var c = 0;
var changeCounter = 0;
csv
.fromStream(stream)
.on("data", function(data){
var tokens = data[0].split(";");
var data = {};
var detected = false;
totalCounter++;
c++;
for(var i = 0; i< keys.length; i++){
data[ keys[i] ] = tokens[ parseInt( model[ keys[i] ] ) ];
}
for(var j = 0; j< whos.length; j++){
if(seperator(data) == whos[j].name){
detected = true;
detectCounter++;
if(changer(data)){
changeCounter++;
whos.splice(j, 1);
whos.push(data);
}
}
}
if(!detected){
whos.push(data);
} else {
detected = false;
}
})
.on("end", function(){
console.log(files[l] + " done!", c, detectCounter, changeCounter);
l--;
if(l == -1){
callback();
} else {
execute(l, callback);
}
})
}
function exportCSV(){
var writableStream = fs.createWriteStream(outputDirectory);
writableStream.on("finish", function(){
console.log("OUTPUT [DONE]!!", whos.length);
});
csv.write(whos, {
quoteHeaders: true
})
.pipe(writableStream)
}
this.start = function(){
if(!seperator) throw new Error("must set seperator [function: setSeperator]");
files = fs.readdirSync(inputDirectory);
execute(files.length - 1, function(){
console.log("READ ALL [DONE]!", totalCounter);
exportCSV();
});
}
}
module.exports = CSVExcluder;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment