Last active
October 15, 2019 11:00
-
-
Save eenblam/8897a17fe5a917af53ccf38afd62302e to your computer and use it in GitHub Desktop.
Naive JS implementation of tidyr's gather function. Intended for use with JSON-styled tabular data... like you'd get from d3.dsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
let R = require('ramda'); | |
// lengthenRow :: String -> String -> [Object] | |
let lengthenRow = R.curry(function (keyLabel, valueLabel, row) { | |
let customKV = key => ({[keyLabel]: key, | |
[valueLabel]: row[key]}); | |
return Object.keys(row).map(customKV); | |
}); | |
// gatherRow :: String -> String -> [String] -> Object -> [Object] | |
let gatherRow = R.curry(function (keyLabel, valueLabel, columns, row) { | |
// Convert wide JSON representation of CSV row into an array long format rows | |
let pickWithout = R.pick(R.difference(R.keys(row), columns)); | |
let pickWith = R.pick(columns); | |
let kept = pickWithout(row); | |
let wide = pickWith(row); | |
let lengthened = lengthenRow(keyLabel, valueLabel, wide); | |
let mergeAll = R.map(R.merge(kept)); | |
return mergeAll(lengthened); | |
}); | |
module.exports.gatherRow = gatherRow; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
let R = require('ramda'); | |
// lengthenRow :: String -> String -> [Object] | |
let lengthenRow = R.curry(function (keyLabel, valueLabel, row) { | |
// This point-free thing is kinda cute, but I think the original was clearer. | |
// Note: lengthenRow is just the trivial gatherRow when no columns are kept! | |
let customKV = key => ({[keyLabel]: key, | |
[valueLabel]: row[key]}); | |
let customEntries = R.map(customKV); | |
let lengthen = R.compose(customEntries, R.keys); | |
return lengthen(row); | |
}); | |
// gatherRow :: String -> String -> [String] -> Object -> [Object] | |
let gatherRow = R.curry(function (keyLabel, valueLabel, columns, row) { | |
// Convert wide JSON representation of CSV row into long format | |
let pickWithout = R.pick(R.difference(R.keys(row), columns)); | |
let pickWith = R.pick(columns); | |
let kept = pickWithout(row); | |
let wide = pickWith(row); | |
let lengthened = lengthenRow(keyLabel, valueLabel, wide); | |
let mergeAll = R.map(R.merge(kept)); | |
return mergeAll(lengthened); | |
}); | |
module.exports.gatherRow = gatherRow; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
function withFields(record, fields) { | |
// Returns record with only properties specified in fields | |
return fields.reduce((acc, key) => | |
{acc[key] = record[key]; return acc;}, {}); | |
} | |
function splitRecord(record, ...fields) { | |
let withGivenFields = withFields(record, fields); | |
let otherFields = Object.keys(record) | |
.filter(key => !(fields.includes(key))); | |
let withOtherFields = withFields(record, otherFields); | |
return [withOtherFields, withGivenFields]; | |
} | |
function gather(data, keyLabel, valueLabel, ...columns) { | |
// Convert wide JSON representation of CSV into long format | |
let lengthen = record => Object.keys(record) | |
.map(key => ({[keyLabel]: key, | |
[valueLabel]: record[key]})); | |
return data.map(record => { | |
let [keptFields, wideFields] = splitRecord(record, ...columns); | |
let longFields = lengthen(wideFields); | |
let nestedArrays = longFields.map( | |
longField => Object.assign({}, longField, keptFields) | |
); | |
return nestedArrays; | |
}).reduce((acc, arr) => acc.concat(arr), []); | |
} | |
module.exports.gather = gather; |
In the gather-vanilla.js
the variables are changed in lines 25 and 27.
return data.map(record => {
let [keptFields, wideFields] = splitRecord(record, ...columns);
let longFields = lengthen(keptFields); //here
let nestedArrays = longFields.map(
longField => Object.assign({}, longField, wideFields) // and here
);
return nestedArrays;
}).reduce((acc, arr) => acc.concat(arr), []);
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
gather-vanilla
Usage:
...which should produce...
Note the spread operator (
...
) in the signature ofgather
. This means that the two lines below are equivalent:This way,
gather
can be explicitly parameterized for a small number of fields, but we can also dump a larger number of values in. Suppose we have a very wide data set, with 28 columns,['Factor1', 'Factor2', 'A', 'B', ..., 'Z']
. If we wish to "lengthen" all but the first two columns, 'Factor1' and 'Factor2', we could simply relabel them all underFactor3
like so:gather-ramda
Now, we could just reduce by concat...
...or, we can apply gatherRow to an arbitrary stream of rows with most.concatMap!