Created
March 22, 2023 14:31
-
-
Save raineorshine/d30886c0cb66c135197a4f89b23f45d4 to your computer and use it in GitHub Desktop.
Stream a file by chunk size and return whole lines. Based on: https://stackoverflow.com/a/39505307/480608
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Stream a file by chunk size and return whole lines. */ | |
// See: https://stackoverflow.com/a/39505307/480608 | |
function readLines( | |
file: Blob, | |
{ | |
chunkSizeBytes, | |
data, | |
complete, | |
}: { chunkSizeBytes?: number; data?: (lines: string) => void; complete?: (err: DOMException | null) => void }, | |
) { | |
const chunkSize = chunkSizeBytes || 100000 | |
const decoder = new TextDecoder() | |
let offset = 0 | |
let results = '' | |
const fileReader = new FileReader() | |
/** Reads the next chunk. */ | |
const seek = () => { | |
if (offset !== 0 && offset >= file.size) { | |
complete?.(null) | |
return | |
} | |
const chunk = file.slice(offset, offset + chunkSize) | |
fileReader.readAsArrayBuffer(chunk) | |
} | |
fileReader.onload = () => { | |
// Use stream:true in case we cut the file | |
// in the middle of a multi-byte character | |
results += decoder.decode(fileReader.result as BufferSource, { stream: true }) | |
const lines = results.split('\n') | |
offset += chunkSize | |
// do not return partial lines | |
// add them to the next chunk | |
if (offset < file.size) { | |
// eslint-disable-next-line fp/no-mutating-methods | |
results = lines.pop()! | |
} | |
// yield all whole lines from this chunk | |
if (lines.length > 0) { | |
data?.(lines.join('\n') + '\n') | |
} | |
seek() | |
} | |
fileReader.onerror = () => { | |
complete?.(fileReader.error) | |
} | |
seek() | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment