Skip to content

Instantly share code, notes, and snippets.

@a0viedo
Last active July 7, 2016 17:20
Show Gist options
  • Save a0viedo/cf7267503f8cd4bb3cd16ca6833010c3 to your computer and use it in GitHub Desktop.
Save a0viedo/cf7267503f8cd4bb3cd16ca6833010c3 to your computer and use it in GitHub Desktop.
var fs = require('fs');
var through2 = require('through2');
var csv2 = require('csv2');
var SALARIO_MINIMO = 6000;
var lines = 0;
var conformidadPromedio = 0;
var promedioRemoto = 0;
var cantRemoto = 0;
var salarioPromedio = 0;
var salarioPromedioMujer = 0;
var salarioPromedioMujerCant = 0;
var salarioPromedioHombre = 0;
var salarioPromedioHombreCant = 0;
var salariosXL = 0;
var salariosXS = 0;
var salarioMax = 0;
var fieldMap = {
province: 3,
salary: 16,
experience: 4,
conformity: 18,
age: 2,
date: 0,
profesion: 6,
sex: 1,
dedication: 15
};
fs.createReadStream('data.csv')
.pipe(csv2())
.pipe(through2.obj(function (chunk, enc, next) {
var data = {
salary: +chunk[16],
province: chunk[3],
experience: chunk[4],
añosEnElPuestoActual: chunk[5],
sex: chunk[1],
timestamp: chunk[0],
dedication: chunk[15],
tipoDeSueldo: chunk[17],
profesion: chunk[6],
conformity: +chunk[18]
};
if(isNaN(data.salary)) {
return next();
}
if(data.salary === 0) {
next();
return;
}
if(data.salary < 100) {
data.salary *= 1000;
}
if(data.salary < (SALARIO_MINIMO / 2) && (data.dedication === 'Full-Time' || data.dedication.indexOf('Remoto') !== -1)) {
// respuestas con salarios mucho menor al salario mínimo y de dedicación full-time
// no encuentro razones por las cuales sean representativas del dataset
salariosXS++;
return next();
}
if(data.salary > 128000) {
salariosXL++;
if(data.salary > salarioMax && isFinite(data.salary)) {
salarioMax = data.salary;
}
return next();
}
if(data.sex === 'Mujer') {
salarioPromedioMujer += data.salary;
salarioPromedioMujerCant++;
}
if(data.sex === 'Hombre') {
salarioPromedioHombre += data.salary;
salarioPromedioHombreCant++;
}
salarioPromedio += data.salary;
if(data.dedication.indexOf('Remoto') !== -1) {
promedioRemoto += data.salary;
cantRemoto++;
}
if(data.province === 'Provincia de Buenos Aires') {
data.province = 'GBA';
}
if(data.province === 'Ciudad Autónoma de Buenos Aires') {
data.province = 'CABA';
}
lines++;
conformidadPromedio += data.conformity;
chunk[fieldMap.province] = data.province;
chunk[fieldMap.salary] = data.salary;
chunk[fieldMap.conformity] = data.conformity;
chunk[fieldMap.date] = new Date(chunk[fieldMap.date]).getHours();
chunk[fieldMap.experience] = chunk[fieldMap.experience].replace(/ /g, '');
chunk[fieldMap.profesion] = chunk[fieldMap.profesion].replace(/,/g, '');
chunk[fieldMap.age] = chunk[fieldMap.age].replace(/ /g, '');
chunk[fieldMap.sex] = chunk[fieldMap.sex] === 'Hombre' ? 'm' : 'f';
if(chunk[fieldMap.age].length > 5) {
if(chunk[fieldMap.age] === 'Menosde18años') {
chunk[fieldMap.age] = 18;
}
}
if(chunk[fieldMap.experience].length > 4) {
if(chunk[fieldMap.experience] === 'Menosdeunaño') {
chunk[fieldMap.experience] = 1;
}
}
var result = Object.keys(fieldMap).map(function(key) {
return fieldMap[key];
})
.sort(function(a, b) {
return a - b;
})
.map(function(e) {
return chunk[e];
});
this.push(result.toString() + '\n');
next();
}))
.on('end', function () {
console.log('Terminó!');
console.log('Líneas procesadas:', lines);
console.log('Salario promedio mujer:', salarioPromedioMujer / salarioPromedioMujerCant);
console.log('Salario promedio hombre:', salarioPromedioHombre / salarioPromedioHombreCant);
console.log('Salario promedio:', salarioPromedio / lines);
console.log('Salario promedio remoto:', promedioRemoto / cantRemoto);
console.log('Salarios XS:', salariosXS);
console.log('Salarios XL:', salariosXL);
console.log('Salario max:', salarioMax);
console.log('Conformidad promedio:', conformidadPromedio / lines);
})
.pipe(fs.createWriteStream('salarios.csv'));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment