|
const duckdb = require('@duckdb/duckdb-wasm'); |
|
const path = require('path'); |
|
const fs = require("fs") |
|
const Worker = require('web-worker'); |
|
const DUCKDB_DIST = path.dirname(require.resolve('@duckdb/duckdb-wasm')); |
|
|
|
// node duckdb.cjs filename |
|
// expects csv file in the same directory |
|
let filename = process.argv[2] |
|
filename = filename.replace(/\.csv/,"") |
|
console.log("filename", filename) |
|
|
|
(async () => { |
|
try { |
|
const DUCKDB_CONFIG = await duckdb.selectBundle({ |
|
mvp: { |
|
mainModule: path.resolve(DUCKDB_DIST, './duckdb-mvp.wasm'), |
|
mainWorker: path.resolve(DUCKDB_DIST, './duckdb-node-mvp.worker.cjs'), |
|
}, |
|
next: { |
|
mainModule: path.resolve(DUCKDB_DIST, './duckdb-next.wasm'), |
|
mainWorker: path.resolve(DUCKDB_DIST, './duckdb-node-next.worker.cjs'), |
|
}, |
|
}); |
|
|
|
const logger = new duckdb.ConsoleLogger(); |
|
const worker = new Worker(DUCKDB_CONFIG.mainWorker); |
|
const db = new duckdb.AsyncDuckDB(logger, worker); |
|
await db.instantiate(DUCKDB_CONFIG.mainModule, DUCKDB_CONFIG.pthreadWorker); |
|
|
|
const conn = await db.connect(); |
|
|
|
console.log("reading") |
|
const txt = fs.readFileSync(`${filename}.csv`).toString() |
|
await db.registerFileText(`${filename}.csv`, txt); |
|
await conn.insertCSVFromPath(`${filename}.csv`, { |
|
schema: 'main', |
|
name: filename, |
|
}) |
|
|
|
console.log("inserted, querying") |
|
let res = await conn.query(`SELECT * from ${filename} LIMIT 1`); |
|
console.log(res.toArray()) |
|
|
|
await conn.query(`COPY (SELECT * FROM ${filename}) TO '${filename}.parquet' (FORMAT 'parquet', CODEC 'GZIP')`) |
|
const buffer = await db.copyFileToBuffer(`${filename}.parquet`) |
|
console.log("buffer len", buffer.length) |
|
fs.writeFileSync(`${filename}.parquet`, Buffer.from(new Uint8Array(buffer))); |
|
console.log("parquet file written") |
|
|
|
await conn.close(); |
|
await db.terminate(); |
|
await worker.terminate(); |
|
console.log("all done") |
|
} catch (e) { |
|
console.error(e); |
|
} |
|
})(); |