-
-
Save AlbinoDrought/9ef941d5e95428bbf29b57fa77c010f0 to your computer and use it in GitHub Desktop.
non-typescript version of DataBitsNormalizer, with the default export being the created class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function isArray(input) { | |
return (Object.prototype.toString.call(input) === '[object Array]') ? true : false; | |
} | |
var sampleData = [ | |
{ soilhum: 500, airtemp: true, airhum: 18, water: true, name: "romain", cats: ["a", "b"] }, | |
{ soilhum: 1050, airtemp: false, airhum: 21, water: true, name: "romain", cats: ["c", "a"] }, | |
{ soilhum: 300, airtemp: true, airhum: 90, water: false, name: "edwards", cats: ["a", "b"] }, | |
{ soilhum: 950, airtemp: true, airhum: 26, water: true, name: "jane", cats: ["c", "b"] }, | |
{ soilhum: 1050, airtemp: false, airhum: 26, water: true, name: "romain", cats: ["a", "b"] }, | |
{ soilhum: 1050, airtemp: false, airhum: 26, water: true, name: "romain", cats: ["b", "c"] }, | |
]; | |
var DataBitsNormalizer = /** @class */ (function () { | |
function DataBitsNormalizer(data) { | |
this.dataset = []; | |
this.binaryInput = []; | |
this.binaryOutput = []; | |
this.outputProperties = []; | |
this.dataset = data; | |
// prevent empty data input | |
if (this.dataset.length <= 0) { | |
throw new Error("Input data cant be empty"); | |
} | |
// prevent data rows to contain no properties | |
if (Object.keys(this.dataset[0]).length <= 0) { | |
throw new Error("Input data rows has to contain some properties (only 1st row is checked)"); | |
} | |
} | |
DataBitsNormalizer.prototype.getOutputLength = function () { | |
return this.outputProperties.length; | |
}; | |
DataBitsNormalizer.prototype.getOutputProperties = function () { | |
return this.outputProperties; | |
}; | |
DataBitsNormalizer.prototype.getInputLength = function () { | |
return this.binaryInput[0].length; | |
}; | |
DataBitsNormalizer.prototype.getBinaryInputDataset = function () { | |
return this.binaryInput; | |
}; | |
DataBitsNormalizer.prototype.getBinaryOutputDataset = function () { | |
return this.binaryOutput; | |
}; | |
DataBitsNormalizer.prototype.normalize = function () { | |
// at this point we know that data is not an empty array and | |
// that the first row contains at least one property (the others should as well) | |
// depending on each data row property, find the values data type using only the first row | |
var firstRow = this.dataset[0]; | |
var distinctProps = this.distinctProps(firstRow); | |
var distinctTypes = this.distinctTypes(firstRow); | |
var metadata = {}; | |
var bitDataset = []; | |
for (var _i = 0, distinctProps_1 = distinctProps; _i < distinctProps_1.length; _i++) { | |
var prop = distinctProps_1[_i]; | |
var type = distinctTypes[prop]; | |
metadata[prop] = { | |
type: type, | |
min: null, | |
max: null, | |
distinctValues: null, | |
}; | |
switch (type) { | |
case 'number': | |
// data will be normalize with a number between 0 and 1 | |
var minMax = this.getMinMax(prop, this.dataset); | |
metadata[prop].min = minMax[0]; | |
metadata[prop].max = minMax[1]; | |
break; | |
case 'boolean': | |
// data is a simple 0 or 1 bit | |
metadata[prop].min = 0; | |
metadata[prop].max = 1; | |
break; | |
case 'string': | |
// data will be normalize in an array of bits which length is equivalent | |
// to the total number of distinct string values of the whole dataset | |
var distinctStrVals = this.getDistinctVals(prop, this.dataset); | |
metadata[prop].distinctValues = distinctStrVals; | |
break; | |
case 'array': | |
var distinctArrVals = this.getDistinctArrayVals(prop, this.dataset); | |
metadata[prop].distinctValues = distinctArrVals; | |
break; | |
} | |
} | |
// now loop through data and convert any data to bits | |
// depending on data type and known settings of metadata | |
var binaryInput = []; | |
var binaryOutput = []; | |
for (var i in this.dataset) { | |
var row = this.dataset[i]; | |
var index = 0; | |
var inputBits = []; | |
var outputBits = []; | |
for (var prop in row) { | |
// skip output properties, they are not in the input dataset | |
// start turning all data into bits! | |
var bitsArr = void 0; | |
var value = row[prop]; | |
var meta = metadata[prop]; | |
switch (meta.type) { | |
case 'number': | |
bitsArr = [this.numToBit(meta.min, meta.max, value)]; // scalar to array of 1 length | |
break; | |
case 'boolean': | |
bitsArr = [this.boolToBit(value)]; // scalar to array of 1 length | |
break; | |
case 'string': | |
bitsArr = this.strToBitsArr(meta.distinctValues, value); | |
break; | |
case 'array': | |
bitsArr = this.arrToBitsArr(meta.distinctValues, value); | |
break; | |
default: | |
break; | |
} | |
if (this.outputProperties.indexOf(prop) > -1) { | |
outputBits = outputBits.concat(bitsArr); | |
} | |
else { | |
inputBits = inputBits.concat(bitsArr); | |
} | |
index++; | |
} | |
if (inputBits.length > 0) { | |
this.binaryInput.push(inputBits); | |
} | |
if (outputBits.length > 0) { | |
this.binaryOutput.push(outputBits); | |
} | |
} | |
}; | |
DataBitsNormalizer.prototype.setOutputProperties = function (props) { | |
this.outputProperties = props; | |
return this; | |
}; | |
DataBitsNormalizer.prototype.getMinMax = function (prop, data) { | |
var min = null; | |
var max = null; | |
for (var i in data) { | |
var val = data[i][prop]; | |
if (min === null || val < min) { | |
min = val; | |
} | |
if (max === null || val > max) { | |
max = val; | |
} | |
} | |
return [min, max]; | |
}; | |
DataBitsNormalizer.prototype.getDistinctVals = function (property, data) { | |
var count = 0; | |
var distinctValues = []; | |
for (var _i = 0, data_1 = data; _i < data_1.length; _i++) { | |
var row = data_1[_i]; | |
var val = row[property]; | |
if (distinctValues.indexOf(val) === -1) { | |
distinctValues.push(val); | |
} | |
} | |
return distinctValues; | |
}; | |
DataBitsNormalizer.prototype.getDistinctArrayVals = function (property, data) { | |
var count = 0; | |
var distinctValues = []; | |
for (var _i = 0, data_2 = data; _i < data_2.length; _i++) { | |
var row = data_2[_i]; | |
var arrVal = row[property]; | |
for (var _a = 0, arrVal_1 = arrVal; _a < arrVal_1.length; _a++) { | |
var val = arrVal_1[_a]; | |
if (distinctValues.indexOf(val) === -1) { | |
distinctValues.push(val); | |
} | |
} | |
} | |
return distinctValues; | |
}; | |
DataBitsNormalizer.prototype.numToBit = function (min, max, value) { | |
var num = (value - min) / (max - min); | |
return Number((num).toFixed(6)); | |
}; | |
DataBitsNormalizer.prototype.boolToBit = function (val) { | |
return +val; | |
}; | |
/** | |
* Turns discint values into unique array of bits to represent them all. | |
* For example if we have distinct data values of [ 500, 1050, 300, 950 ] | |
* will will need a 4 length array of bits to represent them all. | |
* The 1st value will be [0,0,0,1], the second [0,0,1,0]... and so on. | |
* The methor | |
*/ | |
DataBitsNormalizer.prototype.strToBitsArr = function (distinctValues, val) { | |
var bitArr = new Array(distinctValues.length); | |
bitArr.fill(0); | |
for (var i in distinctValues) { | |
if (val === distinctValues[i]) { | |
bitArr[i] = 1; | |
} | |
} | |
return bitArr; | |
}; | |
DataBitsNormalizer.prototype.arrToBitsArr = function (distinctValues, vals) { | |
var bitArr = new Array(distinctValues.length); | |
bitArr.fill(0); | |
for (var j in vals) { | |
var val = vals[j]; | |
var idx = distinctValues.indexOf(val); | |
bitArr[idx] = 1; | |
} | |
return bitArr; | |
}; | |
DataBitsNormalizer.prototype.distinctProps = function (row) { | |
return Object.keys(row); | |
}; | |
DataBitsNormalizer.prototype.distinctTypes = function (row) { | |
var distinctTypes = {}; | |
for (var prop in row) { | |
var value = row[prop]; | |
// also check for "real" array or object type | |
if (typeof value === 'object' && isArray(value)) { | |
distinctTypes[prop] = 'array'; | |
} | |
else if (typeof value === 'object') { | |
distinctTypes[prop] = 'object'; | |
} | |
else { | |
distinctTypes[prop] = typeof (value); | |
} | |
} | |
return distinctTypes; | |
}; | |
DataBitsNormalizer.prototype.getRow1stValue = function (row) { | |
return row[Object.keys(row)[0]]; | |
}; | |
return DataBitsNormalizer; | |
}()); | |
module.exports = DataBitsNormalizer; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment