Last active
December 8, 2024 09:49
-
-
Save park-brian/3130979a25f8dbdc3bcdf1088ed813ad to your computer and use it in GitHub Desktop.
getBytesFromRanges (eg: sliceMany for remote files)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Retrieves byte ranges from a URL using the Fetch API. | |
* Uses ASCII encoding for direct byte<->text position mapping. | |
* | |
* @param {Object} options | |
* @param {string} options.url - Source URL | |
* @param {(number[]|null)[]} options.ranges - Array of [start, end] pairs (end is exclusive) | |
* @param {RequestInit} options.requestInit - Fetch API configuration | |
* @param {boolean} [options.trustContentType=false] - Whether to trust multipart boundary from content-type header | |
*/ | |
async function getBytesFromRanges({ | |
url, | |
ranges, | |
requestInit = { cache: "no-store" }, | |
trustContentType = false | |
}) { | |
const byteRanges = ranges.map(() => new ArrayBuffer(0)); | |
if (!ranges.some(Boolean)) return byteRanges; | |
const rangeHeader = ranges | |
.filter(Boolean) | |
.map(([start, end]) => `${start}-${end - 1}`) | |
.join(","); | |
const response = await fetch(url, { | |
...requestInit, | |
headers: { ...requestInit.headers, range: `bytes=${rangeHeader}` }, | |
}); | |
const responseBuffer = await response.arrayBuffer(); | |
const responseText = new TextDecoder("ascii", { fatal: true }).decode(responseBuffer); | |
let boundary = null; | |
if (trustContentType) { | |
const contentType = response.headers.get("content-type") || ""; | |
boundary = contentType.includes("boundary=") | |
? contentType.split("boundary=")[1].trim() | |
: null; | |
} else { | |
const trimmed = responseText.trim(); | |
const boundaryMatch = trimmed.match(/^--([^\s]+)$/m); | |
boundary = boundaryMatch?.index === 0 && | |
trimmed.endsWith(`--${boundaryMatch[1]}--`) | |
? boundaryMatch[1] | |
: null; | |
} | |
if (!boundary) { | |
ranges.forEach((range, i) => { | |
if (range) byteRanges[i] = responseBuffer.slice(...range); | |
}); | |
return byteRanges; | |
} | |
const parts = responseText | |
.split(`--${boundary}`) | |
.filter(part => part.trim() && !part.includes("--\r\n")) | |
.map(part => part.split("\r\n\r\n").map(p => p.replace(/\r\n$/, ""))); | |
let position = 0; | |
for (const [headers, content] of parts) { | |
let [, start, end] = headers.match(/content-range:.*bytes (\d+)-(\d+)/i) || []; | |
if (!start) continue; | |
start = +start; | |
end = +end + 1; | |
const contentOffset = responseText.indexOf(content, position + headers.length); | |
const partBuffer = responseBuffer.slice(contentOffset, contentOffset + content.length); | |
position = contentOffset + content.length; | |
ranges.forEach((range, i) => { | |
if (!range) return; | |
if (start <= range[0] && range[1] <= end) { | |
const offset = range[0] - start; | |
const length = range[1] - range[0]; | |
byteRanges[i] = partBuffer.slice(offset, offset + length); | |
} | |
}); | |
} | |
return byteRanges; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment