/**
 * Converts a parquet file on disk to an arrow file in memory
 * Adapted from the README example in https://github.com/kylebarron/parquet-wasm?tab=readme-ov-file
 *
 * @param {string} filepath
 * @returns {buffer}
 */
const arrow = require("apache-arrow");
const { parseRecordBatch } = require("arrow-js-ffi");
const { readFileSync } = require("fs");
const { readParquet, wasmMemory } = require("parquet-wasm");

module.exports = async function parquetToArrow(filepath) {
	// A reference to the WebAssembly memory object.
	const WASM_MEMORY = wasmMemory();

	const buf = readFileSync(filepath);
	const parquetUint8Array = new Uint8Array(Buffer.from(buf));

	const wasmArrowTable = readParquet(parquetUint8Array).intoFFI();

	const recordBatches = [];
	for (let i = 0; i < wasmArrowTable.numBatches(); i++) {
		// Note: Unless you know what you're doing, setting `true` below is recommended to _copy_
		// table data from WebAssembly into JavaScript memory. This may become the default in the
		// future.
		const recordBatch = parseRecordBatch(
			WASM_MEMORY.buffer,
			wasmArrowTable.arrayAddr(i),
			wasmArrowTable.schemaAddr(),
			true
		);
		recordBatches.push(recordBatch);
	}

	const table = new arrow.Table(recordBatches);
  
  // Skip this step converting it to bytes if you just want the table
	const ipcStream = arrow.tableToIPC(table, 'stream');
	const bytes = Buffer.from(ipcStream, 'utf-8');

	// VERY IMPORTANT! You must call `drop` on the Wasm table object when you're done using it
	// to release the Wasm memory.
	// Note that any access to the pointers in this table is undefined behavior after this call.
	// Calling any `wasmArrowTable` method will error.
	wasmArrowTable.drop();

	return bytes;
}