Last active
August 21, 2024 10:41
-
-
Save tobilg/4d1a49a37ee2da795ea71c9e4dd81d9b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { Table, Vector, Field, Utf8, Type, Schema } from 'apache-arrow'; | |
/** | |
* Cast all columns with complex data types in an Apache Arrow Table to strings | |
* @param {Table} table - The Apache Arrow Table | |
* @returns {Table} - A new Table with all complex data type columns cast to strings | |
*/ | |
function castComplexColumnsToString(table: Table): Table { | |
const schemaFields = table.schema.fields; | |
// Helper function to determine if a data type is complex | |
const isComplexType = (type: any): boolean => { | |
return type.typeId === Type.Struct || | |
type.typeId === Type.List || | |
type.typeId === Type.Map || | |
type.typeId === Type.Dictionary || | |
type.typeId === Type.FixedSizeList || | |
type.typeId === Type.FixedSizeBinary; | |
}; | |
// Create a new schema where complex types are replaced by Utf8 (String) | |
const newSchema = new Schema( | |
schemaFields.map((field) => { | |
if (isComplexType(field.type)) { | |
return new Field(field.name, new Utf8(), field.nullable); | |
} | |
return field; | |
}) | |
); | |
// Transform each column if it is of a complex type | |
const newColumns = table.columns.map((column, index) => { | |
const field = schemaFields[index]; | |
if (isComplexType(field.type)) { | |
const newColumnData: string[] = []; | |
for (let i = 0; i < column.length; i++) { | |
const complexValue = column.get(i); | |
// Convert complex type to string representation (e.g., JSON string) | |
const stringValue = JSON.stringify(complexValue); | |
newColumnData.push(stringValue); | |
} | |
return Vector.from(newColumnData); | |
} | |
return column; | |
}); | |
// Create and return the new table | |
return new Table(newColumns, newSchema); | |
} | |
// Example usage: | |
// Assume you have an Apache Arrow Table 'table' | |
// const newTable = castComplexColumnsToString(table); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Claude's suggestion (also wrong):