Last active
July 7, 2016 17:20
-
-
Save a0viedo/cf7267503f8cd4bb3cd16ca6833010c3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'); | |
var through2 = require('through2'); | |
var csv2 = require('csv2'); | |
var SALARIO_MINIMO = 6000; | |
var lines = 0; | |
var conformidadPromedio = 0; | |
var promedioRemoto = 0; | |
var cantRemoto = 0; | |
var salarioPromedio = 0; | |
var salarioPromedioMujer = 0; | |
var salarioPromedioMujerCant = 0; | |
var salarioPromedioHombre = 0; | |
var salarioPromedioHombreCant = 0; | |
var salariosXL = 0; | |
var salariosXS = 0; | |
var salarioMax = 0; | |
var fieldMap = { | |
province: 3, | |
salary: 16, | |
experience: 4, | |
conformity: 18, | |
age: 2, | |
date: 0, | |
profesion: 6, | |
sex: 1, | |
dedication: 15 | |
}; | |
fs.createReadStream('data.csv') | |
.pipe(csv2()) | |
.pipe(through2.obj(function (chunk, enc, next) { | |
var data = { | |
salary: +chunk[16], | |
province: chunk[3], | |
experience: chunk[4], | |
añosEnElPuestoActual: chunk[5], | |
sex: chunk[1], | |
timestamp: chunk[0], | |
dedication: chunk[15], | |
tipoDeSueldo: chunk[17], | |
profesion: chunk[6], | |
conformity: +chunk[18] | |
}; | |
if(isNaN(data.salary)) { | |
return next(); | |
} | |
if(data.salary === 0) { | |
next(); | |
return; | |
} | |
if(data.salary < 100) { | |
data.salary *= 1000; | |
} | |
if(data.salary < (SALARIO_MINIMO / 2) && (data.dedication === 'Full-Time' || data.dedication.indexOf('Remoto') !== -1)) { | |
// respuestas con salarios mucho menor al salario mínimo y de dedicación full-time | |
// no encuentro razones por las cuales sean representativas del dataset | |
salariosXS++; | |
return next(); | |
} | |
if(data.salary > 128000) { | |
salariosXL++; | |
if(data.salary > salarioMax && isFinite(data.salary)) { | |
salarioMax = data.salary; | |
} | |
return next(); | |
} | |
if(data.sex === 'Mujer') { | |
salarioPromedioMujer += data.salary; | |
salarioPromedioMujerCant++; | |
} | |
if(data.sex === 'Hombre') { | |
salarioPromedioHombre += data.salary; | |
salarioPromedioHombreCant++; | |
} | |
salarioPromedio += data.salary; | |
if(data.dedication.indexOf('Remoto') !== -1) { | |
promedioRemoto += data.salary; | |
cantRemoto++; | |
} | |
if(data.province === 'Provincia de Buenos Aires') { | |
data.province = 'GBA'; | |
} | |
if(data.province === 'Ciudad Autónoma de Buenos Aires') { | |
data.province = 'CABA'; | |
} | |
lines++; | |
conformidadPromedio += data.conformity; | |
chunk[fieldMap.province] = data.province; | |
chunk[fieldMap.salary] = data.salary; | |
chunk[fieldMap.conformity] = data.conformity; | |
chunk[fieldMap.date] = new Date(chunk[fieldMap.date]).getHours(); | |
chunk[fieldMap.experience] = chunk[fieldMap.experience].replace(/ /g, ''); | |
chunk[fieldMap.profesion] = chunk[fieldMap.profesion].replace(/,/g, ''); | |
chunk[fieldMap.age] = chunk[fieldMap.age].replace(/ /g, ''); | |
chunk[fieldMap.sex] = chunk[fieldMap.sex] === 'Hombre' ? 'm' : 'f'; | |
if(chunk[fieldMap.age].length > 5) { | |
if(chunk[fieldMap.age] === 'Menosde18años') { | |
chunk[fieldMap.age] = 18; | |
} | |
} | |
if(chunk[fieldMap.experience].length > 4) { | |
if(chunk[fieldMap.experience] === 'Menosdeunaño') { | |
chunk[fieldMap.experience] = 1; | |
} | |
} | |
var result = Object.keys(fieldMap).map(function(key) { | |
return fieldMap[key]; | |
}) | |
.sort(function(a, b) { | |
return a - b; | |
}) | |
.map(function(e) { | |
return chunk[e]; | |
}); | |
this.push(result.toString() + '\n'); | |
next(); | |
})) | |
.on('end', function () { | |
console.log('Terminó!'); | |
console.log('Líneas procesadas:', lines); | |
console.log('Salario promedio mujer:', salarioPromedioMujer / salarioPromedioMujerCant); | |
console.log('Salario promedio hombre:', salarioPromedioHombre / salarioPromedioHombreCant); | |
console.log('Salario promedio:', salarioPromedio / lines); | |
console.log('Salario promedio remoto:', promedioRemoto / cantRemoto); | |
console.log('Salarios XS:', salariosXS); | |
console.log('Salarios XL:', salariosXL); | |
console.log('Salario max:', salarioMax); | |
console.log('Conformidad promedio:', conformidadPromedio / lines); | |
}) | |
.pipe(fs.createWriteStream('salarios.csv')); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment