Created
September 1, 2017 12:28
-
-
Save adadgio/ce54cba2d3f9b953924aa3be497259bb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Convert any input data to bits representation. | |
*/ | |
export type RowInput = { | |
[prop: string]: string|number|Array<string|number>|boolean; | |
}; | |
export function isArray(input: any) | |
{ | |
return ( Object.prototype.toString.call(input) === '[object Array]' ) ? true : false; | |
} | |
const sampleData: Array<RowInput> = [ | |
{ soilhum: 500, airtemp: true, airhum: 18, water: true, name: "romain", cats: ["a", "b"] }, | |
{ soilhum: 1050, airtemp: false, airhum: 21, water: true, name: "romain", cats: ["c", "a"] }, | |
{ soilhum: 300, airtemp: true, airhum: 90, water: false, name: "edwards", cats: ["a", "b"] }, | |
{ soilhum: 950, airtemp: true, airhum: 26, water: true, name: "jane", cats: ["c", "b"] }, | |
{ soilhum: 1050, airtemp: false, airhum: 26, water: true, name: "romain", cats: ["a", "b"] }, | |
{ soilhum: 1050, airtemp: false, airhum: 26, water: true, name: "romain", cats: ["b", "c"] }, | |
]; | |
export class DataBitsNormalizer | |
{ | |
private dataset: Array<RowInput> = []; | |
private binaryInput: Array<Array<any>> = []; | |
private binaryOutput: Array<number> = []; | |
private outputProperties: Array<string> = []; | |
constructor(data: Array<RowInput>) | |
{ | |
this.dataset = data; | |
// prevent empty data input | |
if (this.dataset.length <= 0 ) { | |
throw new Error(`Input data cant be empty`); | |
} | |
// prevent data rows to contain no properties | |
if (Object.keys(this.dataset[0]).length <= 0) { | |
throw new Error(`Input data rows has to contain some properties (only 1st row is checked)`); | |
} | |
} | |
getOutputLength() | |
{ | |
return this.outputProperties.length; | |
} | |
getOutputProperties() | |
{ | |
return this.outputProperties; | |
} | |
getInputLength() | |
{ | |
return this.binaryInput[0].length; | |
} | |
getBinaryInputDataset() | |
{ | |
return this.binaryInput; | |
} | |
getBinaryOutputDataset() | |
{ | |
return this.binaryOutput; | |
} | |
normalize() | |
{ | |
// at this point we know that data is not an empty array and | |
// that the first row contains at least one property (the others should as well) | |
// depending on each data row property, find the values data type using only the first row | |
const firstRow = this.dataset[0]; | |
const distinctProps = this.distinctProps(firstRow); | |
const distinctTypes = this.distinctTypes(firstRow); | |
let metadata = {}; | |
let bitDataset = []; | |
for (let prop of distinctProps) { | |
const type = distinctTypes[prop]; | |
metadata[prop] = { | |
type: type, | |
min: null, | |
max: null, | |
distinctValues: null, | |
}; | |
switch (type) { | |
case 'number': | |
// data will be normalize with a number between 0 and 1 | |
const minMax = this.getMinMax(prop, this.dataset); | |
metadata[prop].min = minMax[0]; | |
metadata[prop].max = minMax[1]; | |
break; | |
case 'boolean': | |
// data is a simple 0 or 1 bit | |
metadata[prop].min = 0; | |
metadata[prop].max = 1; | |
break; | |
case 'string': | |
// data will be normalize in an array of bits which length is equivalent | |
// to the total number of distinct string values of the whole dataset | |
const distinctStrVals = this.getDistinctVals(prop, this.dataset); | |
metadata[prop].distinctValues = distinctStrVals; | |
break; | |
case 'array': | |
const distinctArrVals = this.getDistinctArrayVals(prop, this.dataset); | |
metadata[prop].distinctValues = distinctArrVals; | |
break; | |
} | |
} | |
// now loop through data and convert any data to bits | |
// depending on data type and known settings of metadata | |
let binaryInput: Array<any> = []; | |
let binaryOutput: Array<any> = []; | |
for (let i in this.dataset) { | |
const row = this.dataset[i]; | |
let index: number = 0; | |
let inputBits: any = []; | |
let outputBits: any = []; | |
for (let prop in row) { | |
// skip output properties, they are not in the input dataset | |
// start turning all data into bits! | |
let bitsArr: any; | |
const value: any = row[prop]; | |
const meta = metadata[prop]; | |
switch (meta.type) { | |
case 'number': | |
bitsArr = [this.numToBit(meta.min, meta.max, value)]; // scalar to array of 1 length | |
break; | |
case 'boolean': | |
bitsArr = [this.boolToBit(value)]; // scalar to array of 1 length | |
break; | |
case 'string': | |
bitsArr = this.strToBitsArr(meta.distinctValues, value); | |
break; | |
case 'array': | |
bitsArr = this.arrToBitsArr(meta.distinctValues, value); | |
break; | |
default: | |
break; | |
} | |
if (this.outputProperties.indexOf(prop) > -1) { | |
outputBits = outputBits.concat(bitsArr); | |
} else { | |
inputBits = inputBits.concat(bitsArr); | |
} | |
index++; | |
} | |
if (inputBits.length > 0) { this.binaryInput.push(inputBits) } | |
if (outputBits.length > 0) { this.binaryOutput.push(outputBits) } | |
} | |
} | |
setOutputProperties(props: Array<string>) | |
{ | |
this.outputProperties = props; | |
return this; | |
} | |
getMinMax(prop: string, data: Array<RowInput>): Array<number> | |
{ | |
let min = null; | |
let max = null; | |
for (let i in data) { | |
const val = data[i][prop]; | |
if (min === null || val < min) { min = val; } | |
if (max === null || val > max) { max = val; } | |
} | |
return [min, max]; | |
} | |
getDistinctVals(property: string, data: Array<RowInput>) | |
{ | |
let count = 0; | |
let distinctValues = []; | |
for(let row of data) { | |
const val = row[property]; | |
if (distinctValues.indexOf(val) === -1) { | |
distinctValues.push(val); | |
} | |
} | |
return distinctValues; | |
} | |
getDistinctArrayVals(property: string, data: Array<RowInput>) | |
{ | |
let count = 0; | |
let distinctValues = []; | |
for(let row of data) { | |
const arrVal: any = row[property]; | |
for (let val of arrVal) { | |
if (distinctValues.indexOf(val) === -1) { | |
distinctValues.push(val); | |
} | |
} | |
} | |
return distinctValues; | |
} | |
numToBit(min: number, max: number, value: number): number | |
{ | |
const num = (value - min) / (max - min); | |
return Number((num).toFixed(6)); | |
} | |
boolToBit(val: boolean) | |
{ | |
return + val; | |
} | |
/** | |
* Turns discint values into unique array of bits to represent them all. | |
* For example if we have distinct data values of [ 500, 1050, 300, 950 ] | |
* will will need a 4 length array of bits to represent them all. | |
* The 1st value will be [0,0,0,1], the second [0,0,1,0]... and so on. | |
* The methor | |
*/ | |
strToBitsArr(distinctValues: any, val: string) | |
{ | |
let bitArr = new Array(distinctValues.length); | |
bitArr.fill(0); | |
for (let i in distinctValues) { | |
if (val === distinctValues[i]) { | |
bitArr[i] = 1; | |
} | |
} | |
return bitArr; | |
} | |
arrToBitsArr(distinctValues: any, vals: any) | |
{ | |
let bitArr = new Array(distinctValues.length); | |
bitArr.fill(0); | |
for (let j in vals) { | |
const val = vals[j]; | |
let idx = distinctValues.indexOf(val); | |
bitArr[idx] = 1; | |
} | |
return bitArr; | |
} | |
distinctProps(row: RowInput) | |
{ | |
return Object.keys(row); | |
} | |
distinctTypes(row: RowInput) | |
{ | |
let distinctTypes = {}; | |
for (let prop in row) { | |
const value = row[prop]; | |
// also check for "real" array or object type | |
if (typeof value === 'object' && isArray(value)) { | |
distinctTypes[prop] = 'array'; | |
} else if (typeof value === 'object') { | |
distinctTypes[prop] = 'object'; | |
} else { | |
distinctTypes[prop] = typeof(value); | |
} | |
} | |
return distinctTypes; | |
} | |
getRow1stValue(row: RowInput) | |
{ | |
return row[Object.keys(row)[0]]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment