Skip to content

Instantly share code, notes, and snippets.

@u8sand
Last active January 7, 2019 21:55
Show Gist options
  • Save u8sand/f6c0bda65af2d4b66a3bc785714efd95 to your computer and use it in GitHub Desktop.
Save u8sand/f6c0bda65af2d4b66a3bc785714efd95 to your computer and use it in GitHub Desktop.
import { matrix_flatten, matrix_slice, slice } from './matrix'
import { range } from './range'
export function count_first_na(L) {
for (let i = 0; i < L.length; i++) {
if (L[i] != null)
return i
}
throw new Error('NaNs not identified')
}
export function dictzip(header, data) {
const D = {}
for (let i = 0; i < Math.min(header.length, data.length); i++)
D[header[i]] = data[i]
return D
}
export function *parse(matrix) {
const border_x = count_first_na(matrix[0])
const border_y = count_first_na(matrix_flatten(matrix_slice(matrix, 0, null)))
if (border_y <= 0 || border_x <= 0)
throw new Error('Invalid formatting')
const header_x = matrix_flatten(matrix_slice(matrix, border_x, slice(null, border_y + 1)))
const header_y = matrix_flatten(matrix_slice(matrix, slice(null, border_x + 1), border_y))
for (const y of range(border_y + 1, matrix.length)) {
for (const x of range(border_x + 1, matrix[0].length)) {
yield {
'meta': {
...dictzip(
header_x,
matrix_flatten(matrix_slice(matrix, x, slice(null, border_y + 1)))
),
...dictzip(
header_y,
matrix_flatten(matrix_slice(matrix, slice(null, border_x + 1), y))
),
},
'data': matrix[y][x],
}
}
}
}
export function parse_csv(data) {
const lines_re = /[\n\r]/
const line_re = /(^(("([^"]*)")|([^,]*)),)|((("([^"]*)")|([^,]*)),)|((("([^"]+)")|([^,]+))$)/g
function *parse_line(line) {
let m
while (m = line_re.exec(line)) {
let r =
m[2] || m[7] || m[12]
try {
r = JSON.parse(r)
} catch(e) {}
if (r === undefined || r.length === 0) {
yield null
} else {
yield r
}
}
if (line[line.length - 1] === ',') {
yield null
}
}
function *parse_lines(lines) {
for (const line of lines) {
yield [...parse_line(line)]
}
}
return [...parse_lines(data.split(lines_re))]
}
export function parse_file(data) {
return parse(parse_csv(data))
}
import assert from 'assert'
import { count_first_na, dictzip, parse, parse_csv } from './parse'
describe('util.parse', () => {
it('count_first_na', () => {
assert.equal(
count_first_na([null, null, 1]),
2
)
})
it('dictzip', () => {
assert.deepEqual(
dictzip(['a', 'b', 'c'], [1, 2, 3]),
{
'a': 1,
'b': 2,
'c': 3,
}
)
})
it('parse csv', () => {
assert.deepEqual(
parse_csv(',Cell Type,MCF10\n,Drug,mydrug\nGene,Cell Line,\nSTAT3,mycellline,0.4'),
[
[ null, "Cell Type", "MCF10" ],
[ null, "Drug", "mydrug" ],
[ "Gene", "Cell Line", null ],
[ "STAT3", "mycellline", 0.4 ],
]
)
})
it('parse', () => {
assert.deepEqual(
[...parse([
[ null, 'Cell Type', 'MCF10', 'mycelltype',],
[ null, 'Drug', 'mydrug','myotherdrug',],
[ 'Gene', 'Cell Line', null, null,],
[ 'STAT3', 'Blah', 0.1, 0.2,],
])],
[
{
'data': 0.1,
'meta': {
'Cell Line': 'Blah',
'Cell Type': 'MCF10',
'Drug': 'mydrug',
'Gene': 'STAT3'
}
},
{
'data': 0.2,
'meta': {
'Cell Line': 'Blah',
'Cell Type': 'mycelltype',
'Drug': 'myotherdrug',
'Gene': 'STAT3'
}
}
]
)
})
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment