npm i
./python_to_arrow.py
./use-arrow.js
| { | |
| "presets": [ | |
| "@babel/preset-env", | |
| ] | |
| } |
| /node_modules/ | |
| package-lock.json |
| idx | color | name | large_number | |
|---|---|---|---|---|
| 1 | green | alice | 17592186044416 | |
| 2 | blue | bob | 17592186044417 | |
| 3 | red | carl | 17592186044418 |
| { | |
| "dependencies": { | |
| "@babel/core": "^7.0.0-beta.55", | |
| "@babel/node": "^7.0.0-beta.55", | |
| "@babel/preset-env": "^7.0.0-beta.55", | |
| "apache-arrow": "^0.3.1" | |
| } | |
| } |
| #!/usr/bin/env python3 | |
| import pandas as pd | |
| import pyarrow as pa | |
| import numpy as np | |
| if __name__ == "__main__": | |
| df = pd.read_table("./data.csv", sep=',') | |
| df['idx'] = df['idx'].astype(np.int32) | |
| print(df.columns) | |
| print(df.shape) | |
| table = pa.RecordBatch.from_pandas(df) | |
| with open("./arrow-out.ipc", 'bw') as f: | |
| writer = pa.ipc.RecordBatchFileWriter(f, table.schema) | |
| writer.write_batch(table) | |
| writer.close() |
| numpy==1.15.0 | |
| pandas==0.23.3 | |
| pyarrow==0.9.0.post1 |
| #!/usr/bin/env ./node_modules/@babel/node/bin/babel-node.js | |
| import { readFileSync } from 'fs'; | |
| import { Table, predicate, util } from 'apache-arrow'; | |
| const main = () => { | |
| let filt | |
| const arrow = readFileSync('./arrow-out.ipc'); | |
| const table = Table.from([arrow]); | |
| console.log(table.toString()) | |
| const name = 'alice' | |
| filt = table.filter(predicate.col('name').eq(name)) | |
| console.log(`rows with name[${name}] in them ${filt.count()}`) | |
| const idx = 2 | |
| filt = table.filter(predicate.col('idx').eq(idx)) | |
| console.log(`rows with idx[${idx}] ${filt.count()}`) | |
| const largeNumber = 17592186044417 | |
| filt = table.filter(predicate.col('large_number').eq(largeNumber)) | |
| console.log(`rows with large_number[${largeNumber}] ${filt.count()}`) | |
| filt = table.filter(predicate.col('large_number').eq(util.Int64.fromString(`${largeNumber}`))) | |
| console.log(`rows with large_number[${largeNumber}] Int64 ${filt.count()}`) | |
| } | |
| main() |