Skip to content

Instantly share code, notes, and snippets.

@bellbind
Last active April 10, 2022 10:16
Show Gist options
  • Select an option

  • Save bellbind/b7803cd78249c95bc95f1084adb36eeb to your computer and use it in GitHub Desktop.

Select an option

Save bellbind/b7803cd78249c95bc95f1084adb36eeb to your computer and use it in GitHub Desktop.
[browser] Display MNIST images with Web API DecompressionStream
// BYOB emulation as TransformStream for `u8readable.pipeThrough(new BYOBTransform())`
const newQueue = () => {
const [gets, polls] = [[], []];
const next = () => new Promise(get => polls.length > 0 ? polls.shift()(get) : gets.push(get));
const poll = () => new Promise(poll => gets.length > 0 ? poll(gets.shift()) : polls.push(poll));
const push = async value => (await poll())({value, done: false});
const close = async () => (await poll())({done: true});
return {next, push, close, [Symbol.asyncIterator]() {return this}};
};
export const BYOBTransform = class {
constructor(transform = {}) {
const queue = newQueue();
let chunk = null;
this.readable = new ReadableStream({
type: "bytes",
autoAllocateChunkSize: transform.autoAllocateChunkSize,
async pull(controller) {
let view = controller.byobRequest.view;
while (!chunk || chunk.byteLength < view.byteLength) {
if (chunk) {
view.set(chunk);
view = view.subarray(chunk.byteLength);
}
const {done, value} = await queue.next();
chunk = value;
if (done) {
const size = view.byteOffset - controller.byobRequest.view.byteOffset;
if (size === 0) controller.close();
controller.byobRequest.respond(size);
if (size > 0) controller.close();
return;
}
}
view.set(chunk.subarray(0, view.byteLength));
chunk = chunk.subarray(view.byteLength);
const size = view.byteOffset + view.byteLength - controller.byobRequest.view.byteOffset;
controller.byobRequest.respond(size);
},
});
this.writable = new WritableStream({
async write(chunk, controller) {await queue.push(chunk);},
async close(controller) {await queue.close();},
});
}
};
<!doctype html>
<html>
<head>
<link rel="icon" href="data:," />
<meta charset="utf-8" />
<script type="module" src="./main.js"></script>
</head>
<body></body>
</html>
import {MimicBYOBReader} from "./mimic-byob-reader.js";
import {BYOBTransform} from "./byob-transform.js";
// MNIST data from: http://yann.lecun.com/exdb/mnist/
const mnistUrl = {
train: {
images: "./train-images-idx3-ubyte.gz",
labels: "./train-labels-idx1-ubyte.gz",
},
t10k: {
images: "./t10k-images-idx3-ubyte.gz",
labels: "./t10k-labels-idx1-ubyte.gz",
},
};
//[MNIST gzip decompressed file format]
// images:
// 0-3: magic = 2051 (Big Endian)
// 4-7: image count = train 60000 | t10k 10000 (Big Endian)
// 8-11: image width = 28 (Big Endian)
// 12-15: image height = 28 (Big Endian)
// 16-799: 28x28 pixel bytes(white 0-255 black) of image[0]
// 800-1583: 28x28 pixel bytes(white 0-255 black) of image[1]
// ...
// labels:
// 0-3: magic = 2049 (Big Endian)
// 4-7: image count = train 60000 | t10k 10000 (Big Endian)
// 8: a number value(0-9) of image[0]
// 9: a number value(0-9) of image[1]
// ...
//
// load mnist images and labels with urls
const loadMnist = async function* (urls) {
const imageReadable = (await fetch(urls.images)).body.pipeThrough(new DecompressionStream("gzip"));
const labelReadable = (await fetch(urls.labels)).body.pipeThrough(new DecompressionStream("gzip"));
const imageReader = imageReadable.pipeThrough(new BYOBTransform()).getReader({mode: "byob"});
//const labelReader = labelReadable.pipeThrough(new BYOBTransform()).getReader({mode: "byob"});
//const imageReader = new MimicBYOBReader(imageReadable.getReader());
const labelReader = new MimicBYOBReader(labelReadable.getReader());
try {
const imageMagic = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (imageMagic !== 2051) throw new TypeError("invalid magic of images file");
const labelMagic = (await labelReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (labelMagic !== 2049) throw new TypeError("invalid magic of labels file");
const count = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
const labelCount = (await labelReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (count !== labelCount) throw new TypeError(`mismatched counts: images, labels: ${count}, ${labelCount}`);
const width = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
const height = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (width !== 28 || height !== 28) throw new TypeError(`invalid image size(28x28): ${width}x${height}`);
const bytes = width * height, images = new Array(count), labels = new Array(count);
for (let i = 0; i < count; i++) {
const image = (await imageReader.read(new Uint8Array(bytes))).value;
const label = (await labelReader.read(new Uint8Array(1))).value;
yield {image, label};
[images[i], labels[i]] = [image, label];
}
return {images, labels};
} finally {
imageReader.releaseLock();
labelReader.releaseLock();
}
};
// mnist image view for HTML canvas
const toImageData = image => {
const id = new ImageData(28, 28);
for (let x = 0; x < 28; x++) for (let y = 0; y < 28; y++) {
const i = y * 28 + x, offs = i * 4;
id.data[offs] = id.data[offs + 1] = id.data[offs + 2] = 255 - image[i];
id.data[offs + 3] = 255;
}
return id;
};
const toCanvas = image => {
const canvas = document.createElement("canvas");
canvas.width = canvas.height = 28;
canvas.style.borderStyle = "solid";
canvas.getContext("2d").putImageData(toImageData(image), 0, 0);
return canvas;
};
//[example] show MNIST images
for await (const {image, label} of loadMnist(mnistUrl.t10k)) {
const canvas = toCanvas(image);
canvas.title = label;
document.body.append(canvas);
//await new Promise(f => requestAnimationFrame(f));
}
// Emulate BYOB reader for Uint8Array reader of DecompressionStream readable
export const MimicBYOBReader = class {
#closed; #reader; #chunk; #done;
constructor(u8reader) {
this.#reader = u8reader;
this.#chunk = null;
this.#done = false;
this.#closed = false;
}
get closed() {return this.#reader.closed;}
cancel() {return this.#reader.cancel();}
releaseLock() {return this.#reader.releaseLock();}
//NOTE: Web API BYOBReader's passed view `buffer` is "detached" (it cannot access view's array values)
async read(view) {
if (view.byteLength === 0) throw new TypeError("it must be view.byteLength > 0");
let u8view = new Uint8Array(view.buffer, view.byteOffset, view.byteLength);
if (this.#closed) return {done: true, value: new view.constructor(view.buffer, view.byteOffset, 0)};
while (!this.#chunk || this.#chunk.byteLength < u8view.byteLength) {//[chunk shorter than view]
if (this.#chunk) {
u8view.set(this.#chunk);
u8view = u8view.subarray(this.#chunk.byteLength);
}
const {done, value} = await this.#reader.read();
this.#done = done;
if (done) {//[just after the last chunk]
this.#closed = true;
this.#chunk = null;
const blen = u8view.byteOffset - view.byteOffset, bpe = view.BYTES_PER_ELEMENT ?? 1;
const len = Math.trunc(blen / bpe) * bpe;
return {done: blen === 0, value: new view.constructor(view.buffer, view.byteOffset, len)};
}
if (!(value instanceof Uint8Array)) throw new TypeError(`Must be Uint8Array reader but: ${value}`);
this.#chunk = value;
}
u8view.set(this.#chunk.subarray(0, u8view.byteLength));
this.#chunk = this.#chunk.subarray(u8view.byteLength);
return {done: false, value: new view.constructor(view.buffer, view.byteOffset, view.length)};
}
};
This file has been truncated, but you can view the full file.
View raw

(Sorry about that, but we can’t show files that are this big right now.)

@bellbind

Copy link
Copy Markdown
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment