Last active
August 29, 2025 19:24
-
-
Save lardratboy/ea3212147c58a446fbbfd5130e008892 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Data type configuration constants with float support | |
const DATA_TYPES = { | |
int8: { size: 1, min: -128, max: 127, method: 'getInt8', isFloat: false }, | |
uint8: { size: 1, min: 0, max: 255, method: 'getUint8', isFloat: false }, | |
int16: { size: 2, min: -32768, max: 32767, method: 'getInt16', isFloat: false }, | |
uint16: { size: 2, min: 0, max: 65535, method: 'getUint16', isFloat: false }, | |
int32: { size: 4, min: -2147483648, max: 2147483647, method: 'getInt32', isFloat: false }, | |
uint32: { size: 4, min: 0, max: 4294967295, method: 'getUint32', isFloat: false }, | |
fp8_e4m3: { size: 1, method: 'getFloat8E4M3', isFloat: true }, | |
fp8_e5m2: { size: 1, method: 'getFloat8E5M2', isFloat: true }, | |
fp16: { size: 2, method: 'getFloat16', isFloat: true }, | |
bf16: { size: 2, method: 'getBFloat16', isFloat: true }, | |
fp32: { size: 4, method: 'getFloat32', isFloat: true }, | |
tf32: { size: 4, method: 'getTensorFloat32', isFloat: true }, | |
fp64: { size: 8, method: 'getFloat64', isFloat: true } | |
}; | |
// Pre-calculate normalization multipliers for integer types only | |
const NORMALIZERS = Object.fromEntries( | |
Object.entries(DATA_TYPES) | |
.filter(([type, config]) => !config.isFloat) | |
.map(([type, config]) => [ | |
type, | |
{ | |
multiplier: 2 / (config.max - config.min), | |
offset: config.min | |
} | |
]) | |
); | |
/** | |
* Convert IEEE 754 half-precision (fp16) to single precision (fp32) | |
* @param {number} uint16Value - 16-bit unsigned integer representing fp16 | |
* @returns {number} - JavaScript number (fp32/fp64) | |
*/ | |
function fp16ToFloat32(uint16Value) { | |
const sign = (uint16Value & 0x8000) >> 15; | |
const exponent = (uint16Value & 0x7C00) >> 10; | |
const mantissa = uint16Value & 0x03FF; | |
if (exponent === 0) { | |
if (mantissa === 0) { | |
// Zero | |
return sign === 0 ? 0.0 : -0.0; | |
} else { | |
// Subnormal | |
return (sign === 0 ? 1 : -1) * Math.pow(2, -14) * (mantissa / 1024); | |
} | |
} else if (exponent === 31) { | |
if (mantissa === 0) { | |
// Infinity | |
return sign === 0 ? Infinity : -Infinity; | |
} else { | |
// NaN | |
return NaN; | |
} | |
} else { | |
// Normal | |
return (sign === 0 ? 1 : -1) * Math.pow(2, exponent - 15) * (1 + mantissa / 1024); | |
} | |
} | |
/** | |
* Convert Google's bfloat16 (bf16) to single precision (fp32) | |
* @param {number} uint16Value - 16-bit unsigned integer representing bf16 | |
* @returns {number} - JavaScript number (fp32/fp64) | |
*/ | |
function bf16ToFloat32(uint16Value) { | |
const sign = (uint16Value & 0x8000) >> 15; | |
const exponent = (uint16Value & 0x7F80) >> 7; // bits 14-7 (8 bits) | |
const mantissa = uint16Value & 0x007F; // bits 6-0 (7 bits) | |
if (exponent === 0) { | |
if (mantissa === 0) { | |
// Zero | |
return sign === 0 ? 0.0 : -0.0; | |
} else { | |
// Subnormal | |
return (sign === 0 ? 1 : -1) * Math.pow(2, -126) * (mantissa / 128); | |
} | |
} else if (exponent === 255) { | |
if (mantissa === 0) { | |
// Infinity | |
return sign === 0 ? Infinity : -Infinity; | |
} else { | |
// NaN | |
return NaN; | |
} | |
} else { | |
// Normal | |
return (sign === 0 ? 1 : -1) * Math.pow(2, exponent - 127) * (1 + mantissa / 128); | |
} | |
} | |
/** | |
* Convert 8-bit float E4M3 format to float32 | |
* @param {number} uint8Value - 8-bit unsigned integer representing fp8 E4M3 | |
* @returns {number} - JavaScript number (fp32/fp64) | |
*/ | |
function fp8e4m3ToFloat32(uint8Value) { | |
const sign = (uint8Value & 0x80) >> 7; | |
const exponent = (uint8Value & 0x78) >> 3; // 4 bits | |
const mantissa = uint8Value & 0x07; // 3 bits | |
if (exponent === 0) { | |
// Subnormal or zero | |
if (mantissa === 0) return sign ? -0.0 : 0.0; | |
return (sign ? -1 : 1) * Math.pow(2, -6) * (mantissa / 8); | |
} | |
if (exponent === 0xF) { | |
// Inf or NaN | |
return mantissa === 0 ? (sign ? -Infinity : Infinity) : NaN; | |
} | |
return (sign ? -1 : 1) * Math.pow(2, exponent - 7) * (1 + mantissa / 8); | |
} | |
/** | |
* Convert 8-bit float E5M2 format to float32 | |
* @param {number} uint8Value - 8-bit unsigned integer representing fp8 E5M2 | |
* @returns {number} - JavaScript number (fp32/fp64) | |
*/ | |
function fp8e5m2ToFloat32(uint8Value) { | |
const sign = (uint8Value & 0x80) >> 7; | |
const exponent = (uint8Value & 0x7C) >> 2; // 5 bits | |
const mantissa = uint8Value & 0x03; // 2 bits | |
if (exponent === 0) { | |
if (mantissa === 0) return sign ? -0.0 : 0.0; | |
return (sign ? -1 : 1) * Math.pow(2, -14) * (mantissa / 4); | |
} | |
if (exponent === 0x1F) { | |
return mantissa === 0 ? (sign ? -Infinity : Infinity) : NaN; | |
} | |
return (sign ? -1 : 1) * Math.pow(2, exponent - 15) * (1 + mantissa / 4); | |
} | |
/** | |
* Convert NVIDIA TensorFloat-32 format to float32 | |
* TF32: 1 sign + 8 exponent + 10 mantissa bits (19 bits total, typically stored in 32-bit word) | |
* @param {number} uint32Value - 32-bit unsigned integer representing TF32 | |
* @returns {number} - JavaScript number (fp32/fp64) | |
*/ | |
function tf32ToFloat32(uint32Value) { | |
const sign = (uint32Value & 0x80000000) >>> 31; // Bit 31 | |
const exponent = (uint32Value & 0x7F800000) >>> 23; // Bits 30-23 (8 bits) | |
const mantissa = (uint32Value & 0x007FE000) >>> 13; // Bits 22-13 (10 bits) | |
if (exponent === 0) { | |
if (mantissa === 0) { | |
// Zero | |
return sign === 0 ? 0.0 : -0.0; | |
} else { | |
// Subnormal numbers | |
return (sign === 0 ? 1 : -1) * Math.pow(2, -126) * (mantissa / 1024); | |
} | |
} else if (exponent === 255) { | |
if (mantissa === 0) { | |
// Infinity | |
return sign === 0 ? Infinity : -Infinity; | |
} else { | |
// NaN | |
return NaN; | |
} | |
} else { | |
// Normal numbers | |
return (sign === 0 ? 1 : -1) * Math.pow(2, exponent - 127) * (1 + mantissa / 1024); | |
} | |
} | |
/** | |
* Extended DataView with fp16, bf16, fp8, and TF32 support | |
* @param {ArrayBuffer} buffer - The buffer to create DataView from | |
* @returns {DataView} - Enhanced DataView with additional float methods | |
*/ | |
function createExtendedDataView(buffer) { | |
const view = new DataView(buffer); | |
// Add fp16 support | |
view.getFloat16 = function(byteOffset, littleEndian = false) { | |
const uint16Value = this.getUint16(byteOffset, littleEndian); | |
return fp16ToFloat32(uint16Value); | |
}; | |
// Add bf16 support | |
view.getBFloat16 = function(byteOffset, littleEndian = false) { | |
const uint16Value = this.getUint16(byteOffset, littleEndian); | |
return bf16ToFloat32(uint16Value); | |
}; | |
// Add fp8 E4M3 support | |
view.getFloat8E4M3 = function(byteOffset) { | |
const uint8Value = this.getUint8(byteOffset); | |
return fp8e4m3ToFloat32(uint8Value); | |
}; | |
// Add fp8 E5M2 support | |
view.getFloat8E5M2 = function(byteOffset) { | |
const uint8Value = this.getUint8(byteOffset); | |
return fp8e5m2ToFloat32(uint8Value); | |
}; | |
// Add TensorFloat-32 support | |
view.getTensorFloat32 = function(byteOffset, littleEndian = false) { | |
const uint32Value = this.getUint32(byteOffset, littleEndian); | |
return tf32ToFloat32(uint32Value); | |
}; | |
// Note: fp64 support is already provided by DataView.getFloat64() | |
return view; | |
} | |
/** | |
* Process binary data into normalized 3D points with colors using configurable quantization | |
* Now supports both integer and floating-point data types | |
* @param {ArrayBuffer} buffer - The input binary data | |
* @param {string} dataType - The data type to interpret the buffer as | |
* @param {boolean} isLittleEndian - Whether to read as little endian | |
* @param {number} quantizationBits - Number of bits for quantization (2-10) | |
* @returns {{ points: Float32Array, colors: Float32Array, numPoints: number }} | |
*/ | |
function quantizeProcessDataAs(buffer, dataType, isLittleEndian, quantizationBits = 10) { | |
// Input validation | |
if (!buffer || !(buffer instanceof ArrayBuffer)) { | |
throw new Error('Invalid buffer provided - must be an ArrayBuffer'); | |
} | |
const config = DATA_TYPES[dataType]; | |
if (!config) { | |
throw new Error(`Unsupported data type: ${dataType}. Supported types: ${Object.keys(DATA_TYPES).join(', ')}`); | |
} | |
// Validate quantization bits | |
if (!Number.isInteger(quantizationBits) || quantizationBits < 2 || quantizationBits > 10) { | |
throw new Error('quantizationBits must be an integer between 2 and 10'); | |
} | |
// Check if total bits would exceed safe integer limits | |
const totalBits = quantizationBits * 3; | |
if (totalBits > 30) { | |
throw new Error(`Total quantization bits (${totalBits}) would exceed safe integer limits. Max 30 bits (10 bits per dimension)`); | |
} | |
const typeSize = config.size; | |
const tupleSize = typeSize * 3; | |
if (buffer.byteLength < tupleSize) { | |
throw new Error(`Buffer too small for data type ${dataType}. Need at least ${tupleSize} bytes, got ${buffer.byteLength}`); | |
} | |
// Use enhanced DataView for float support | |
const view = createExtendedDataView(buffer); | |
const maxOffset = buffer.byteLength - tupleSize; | |
const maxTuples = Math.floor(buffer.byteLength / tupleSize); | |
// Pre-allocate typed arrays for better performance | |
const points = new Float32Array(maxTuples * 3); | |
const colors = new Float32Array(maxTuples * 3); | |
// Setup normalization function based on data type | |
const readMethod = view[config.method].bind(view); | |
let normalize; | |
if (config.isFloat) { | |
// Use tanh for floating point normalization | |
normalize = value => { | |
// Handle special values | |
if (!isFinite(value)) { | |
return isNaN(value) ? 0 : (value > 0 ? 1 : -1); | |
} | |
// Apply tanh for smooth [-1, 1] mapping | |
return Math.tanh(value); | |
}; | |
} else { | |
// Use linear normalization for integers | |
const { multiplier, offset } = NORMALIZERS[dataType]; | |
normalize = value => ((value - offset) * multiplier) - 1; | |
} | |
let pointIndex = 0; | |
let baseOffset = 0; | |
// Calculate quantization parameters based on bit count | |
const qRange = 1 << quantizationBits; // 2^quantizationBits | |
const qHalfRange = qRange / 2; | |
const qMaxIndex = qRange - 1; | |
// Bit array sized for qRange^3 possible quantized positions | |
const totalQuantizedPositions = qRange * qRange * qRange; | |
const bitArraySizeInUint32 = Math.ceil(totalQuantizedPositions / 32); | |
const tupleBitArray = new Uint32Array(bitArraySizeInUint32); | |
// Calculate bit shift amounts for index creation | |
const yShift = quantizationBits; | |
const zShift = quantizationBits * 2; | |
console.log(`Using ${quantizationBits}-bit quantization with ${dataType} (${config.isFloat ? 'float' : 'integer'}): ${qRange}³ = ${totalQuantizedPositions.toLocaleString()} possible positions`); | |
try { | |
while (baseOffset <= maxOffset) { | |
// Read and normalize all three coordinates | |
const x = normalize(readMethod(baseOffset, isLittleEndian)); | |
const y = normalize(readMethod(baseOffset + typeSize, isLittleEndian)); | |
const z = normalize(readMethod(baseOffset + typeSize * 2, isLittleEndian)); | |
// Quantize coordinates: map [-1,1] to [0, qMaxIndex] with bounds checking | |
const qx = Math.max(0, Math.min(qMaxIndex, Math.floor((x + 1) * qHalfRange))); | |
const qy = Math.max(0, Math.min(qMaxIndex, Math.floor((y + 1) * qHalfRange))); | |
const qz = Math.max(0, Math.min(qMaxIndex, Math.floor((z + 1) * qHalfRange))); | |
// Create unique index for this quantized position using variable bit shifts | |
const qIndex = (qz << zShift) | (qy << yShift) | qx; | |
// Check if we've seen this quantized position before | |
const elementIndex = qIndex >>> 5; // Use unsigned right shift for bit array indexing | |
const bitPosition = qIndex & 0x1F; | |
const mask = 1 << bitPosition; | |
if ((tupleBitArray[elementIndex] & mask) === 0) { | |
// Mark this position as seen | |
tupleBitArray[elementIndex] |= mask; | |
// Store points (original normalized coordinates, not quantized) | |
points[pointIndex] = x; | |
points[pointIndex + 1] = y; | |
points[pointIndex + 2] = z; | |
// Store colors (mapped from [-1,1] to [0,1] for Three.js) | |
colors[pointIndex] = (x + 1) / 2; | |
colors[pointIndex + 1] = (y + 1) / 2; | |
colors[pointIndex + 2] = (z + 1) / 2; | |
pointIndex += 3; | |
} | |
baseOffset += tupleSize; | |
} | |
} catch (e) { | |
console.error(`Error processing data at offset: ${baseOffset}`, e); | |
// Return what we've processed so far rather than failing completely | |
} | |
// Trim arrays to actual size used | |
const actualPoints = new Float32Array(points.buffer, 0, pointIndex); | |
const actualColors = new Float32Array(colors.buffer, 0, pointIndex); | |
const normalizationMethod = config.isFloat ? 'tanh' : 'linear'; | |
console.log(`Quantization completed: ${quantizationBits} bits, ${qRange}^3 possible positions, ${pointIndex / 3} unique points found using ${normalizationMethod} normalization`); | |
return { | |
points: actualPoints, | |
colors: actualColors, | |
numPoints: pointIndex / 3, | |
quantizationBits: quantizationBits, | |
quantizationRange: qRange, | |
dataType: dataType, | |
normalizationMethod: normalizationMethod | |
}; | |
} | |
export { quantizeProcessDataAs, DATA_TYPES, createExtendedDataView }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment