Created
January 2, 2018 23:54
-
-
Save kirbysayshi/a815c0a706aec68a0a14ef6b8819cc70 to your computer and use it in GitHub Desktop.
Not the best JS (whoa global scope!) but I think this proves out the core ideas. Probably some bugs still.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// JS Implementation of https://yave.handmade.network/blogs/p/2723-how_media_molecule_does_serialization | |
// We have to make several changes due to JS's dynamic nature, and lack of type | |
// information, outlined below. | |
// The rules: | |
// 1) Every class/valuetype needs a `serialize_TYPENAME(version, serializer, datum)` | |
// function. This gets around JS not having function overloading as well as the | |
// implicit type annotation that function overloading (in the original C example) | |
// provides. Since these are just called from user-defined `serialize_?` | |
// functions, we don't need any more type information! It's tempting to define | |
// these functions as methods, or to put them into a polymorphic function with | |
// a big switch statement or something. Don't do that! It's better to not | |
// require instances and to not rely on more runtime structures than necessary. | |
// 2) `serialize_TYPENAME` must handle what to do if datum is `null`. These are | |
// standalone functions (not class members), because otherwise you'd need really | |
// complex rules/semantics for how to instantiate a class just to hydrate it | |
// and cover all cases. Too complex! Instead assume the input can be null, which | |
// means: ALL VALUES MUST HAVE DEFAULTS. For example, the below code assumes that | |
// if a Boolean is null, it's read/written as `false`. C doesn't have this | |
// requirement because its spec provides default values. JS needs more | |
// guarding, e.g. check to make sure an actual number is coming in. And since | |
// JS doesn't have different number types, we need to translate those to C | |
// types anyway for the sake of interop. | |
// 3) If a field can be null, which implicitly means there could be absolutely | |
// no value written to the file (because null means the absence of a value...), | |
// then unfortunately a separate field (boolean, int32 for enum) must be used | |
// so the serializer knows whether it's allowed to read the next bytes or not. | |
// This is unintuitive, but it's a chicken/egg problem. All bytes in the "file" | |
// are just data, no field names, no markers. So you can't simply "get" the | |
// next value to check if it's null or not. You need to know what you're | |
// looking for. And how do you look for a struct? You can't, because aside from | |
// running out of data, there aren't any checks to know if things are misaligned. | |
// 4) `serialize_TYPENAME` must return a value, and the parent `serialize_?` | |
// call must handle that value, usually by assigning. This is duplication, but | |
// since JS doesn't have pointers, we have no way of assigning a value within | |
// the `serialize_?` function. Requiring all values to be have mutable | |
// interiors requires wrappers of all JS types! No go. Also allows the REM | |
// macro (not defined here) to work without references too. | |
const SV = { | |
INITIAL: 1, | |
CHILD_ARR: 2, | |
NO_ASTR: 3, | |
// Add new iterations above this comment. | |
}; | |
// Runtime check the values since JS doesn't have an Enum type. | |
{ | |
const values = new Set(); | |
Object.values(SV).forEach(v => { | |
if (isNaN(Number(v))) throw new Error('Found a NaN!'); | |
if (v < 0) throw new Error('Values cannot be negative!'); | |
if (values.has(v)) throw new Error('Values must be unique!'); | |
values.add(v); | |
}); | |
if (values.has(0)) throw new Error('Values must start at 1!'); | |
} | |
// Don't mess with this! | |
SV.LATEST = Math.max(...Object.values(SV)); | |
class LBPSerializer { | |
// Pretty easy to add a field here called `filename` in a real implementation. | |
constructor(filename) { | |
this.filename = filename; | |
this.isWriting = false; | |
this.dataVersion = 0; | |
this.file = []; // fake file "buffer". Using this to approximate writing | |
// and reading something produced by C for the sake of interop. Note that | |
// interop would require JS to agree with C on value defaults, optionals. | |
} | |
} | |
// This is cool. We can treat storing the version number like any other | |
// serialized value! We serialize the serializer! But we change the function | |
// signature because the serializer has ALWAYS BEEN AND ALWAYS WILL BE. | |
function serialize_LBPSerializer(serializer) { | |
if (serializer.isWriting) { | |
serializer.dataVersion = SV.LATEST; | |
} | |
serializer.dataVersion = serialize_Int32(serializer, serializer.dataVersion); | |
// Check if we're reading a newer file version than we support | |
// Original example returns bools because no exceptions in C, but throwing | |
// might be better in other languages, especially since we're breaking the | |
// other C convention from the original of the `serialize` functions all | |
// being `void`. We instead return the value due to lack of pointers. | |
if (serializer.dataVersion > SV.LATEST) { | |
return false; | |
} else { | |
return true; | |
} | |
} | |
function ADD(serializerFn, versionAdded, serializer, datum) { | |
if (serializer.dataVersion >= versionAdded) { | |
return serializerFn(serializer, datum); | |
} | |
return datum; | |
} | |
function REM(serializerFn, versionAdded, versionRemoved, serializer, datum) { | |
if (serializer.dataVersion >= versionAdded && serializer.dataVersion < versionRemoved) { | |
return serializerFn(serializer, datum); | |
} | |
return datum; | |
} | |
function serialize_Bool(serializer, datum) { | |
datum = datum || false; | |
if (serializer.isWriting) { | |
serializer.file.push({ | |
bytes: Int8Array.BYTES_PER_ELEMENT, | |
value: datum === false ? 0 : 1, | |
}) | |
} else { | |
const desc = serializer.file.shift(); | |
datum = desc.value === 0 ? false : true; | |
} | |
return datum; | |
} | |
function serialize_Int32(serializer, datum) { | |
datum = datum || 0; | |
if (serializer.isWriting) { | |
serializer.file.push({ | |
bytes: Int32Array.BYTES_PER_ELEMENT, | |
value: datum, | |
}) | |
} else { | |
const desc = serializer.file.shift(); | |
datum = desc.value; | |
} | |
return datum; | |
} | |
function serialize_Int32Array(serializer, datum) { | |
const length = serialize_Int32(serializer, datum ? datum.length : 0); | |
if (serializer.isWriting) { | |
for (let i = 0; i < length; i++) { | |
serialize_Int32(serializer, datum[i]); | |
} | |
} else { | |
// Int32Array cannot be resized. | |
datum = new Int32Array(length); | |
for (let i = 0; i < length; i++) { | |
datum[i] = serialize_Int32(serializer, 0, 0); | |
} | |
} | |
return datum; | |
} | |
function serialize_CharArray(serializer, datum) { | |
const length = serialize_Int32(serializer, datum ? datum.length : 0); | |
if (serializer.isWriting) { | |
if (!datum) datum = ''; // handle if the datum is `null` | |
// Handle UTF-16 without breaking surrogate pairs. | |
// https://stackoverflow.com/a/41085758/169491 | |
const points = [...datum].map(c => c.codePointAt(0)); | |
const i32Points = new Int32Array(points); | |
// We can't use serialize_Int32Array because we've already written | |
// the length! Using it would write the length twice. | |
for (let i = 0; i < length; i++) { | |
serialize_Int32(serializer, i32Points[i]); | |
} | |
} else { | |
// We can't use serialize_Int32Array because we've already written | |
// the length! Using it would write the length twice. | |
const i32Points = new Int32Array(length); | |
for (let i = 0; i < length; i++) { | |
i32Points[i] = serialize_Int32(serializer, 0, 0); | |
} | |
// ... is to convert to a string array | |
// https://stackoverflow.com/a/41085758/169491 | |
const chars = [...i32Points].map(p => String.fromCodePoint(p)); | |
datum = chars.join(''); | |
} | |
return datum; | |
} | |
const assert = require('assert'); | |
class TestObj { | |
constructor() { | |
this.someNum = 0; | |
// Null will be handled during writing, but a default value for each | |
// data type will be written. In the case of an empty array, only length | |
// 0 will be written. It's a chicken egg problem: how do you know it's a | |
// null array without reading? And how do you tell the software to read a | |
// possible null without knowing beforehand? There's no way to ask for the | |
// next "value", because you don't know what type it's supposed to be! What | |
// if you asked for the next byte, but the value was meant to be a double? | |
// then you need to recombine it manually... gross. | |
this.someArr = new Int32Array(); | |
// Removed with SV.NO_ASTR | |
// this.aStr = ''; | |
// A field that cannot have a type-system-defined default must instead | |
// use another mechanism to know that the field is set. | |
// https://twitter.com/OswaldHurlem/status/948281710986194944 | |
this.hasChild = false; | |
this.child = null; | |
this.childArr = []; | |
} | |
} | |
function serialize_TestObj(serializer, datum) { | |
// datum == this | |
datum.someNum = ADD(serialize_Int32, SV.INITIAL, serializer, datum.someNum); | |
datum.someArr = ADD(serialize_Int32Array, SV.INITIAL, serializer, datum.someArr); | |
// This value has been removed, and there is nothing to do with it. | |
const aStr = REM(serialize_CharArray, SV.INITIAL, SV.NO_ASTR, serializer, ''); | |
datum.hasChild = ADD(serialize_Bool, SV.INITIAL, serializer, datum.hasChild); | |
// https://twitter.com/OswaldHurlem/status/948281710986194944 | |
// Could instead assign hasChild as an Enum, and control more advanced | |
// behavior. | |
// Example: | |
// 0: false (none) | |
// 1: struct instance (equiv to new Obj() in JS, assuming the Obj's | |
// serialize function provides proper defaults) | |
// 2: call an agreed upon naming-convention function?? i dunno, probably | |
// just need 0 or 1. | |
if (datum.hasChild) { | |
datum.child = ADD(serialize_ChildTestObj, SV.INITIAL, serializer, datum.child); | |
} | |
// For lists, each parent object needs to control the list serialization, | |
// since JS doesn't have Templates or generics (again more because of lack of | |
// type information). Another approach is to subclass array: | |
// class ChildArr extends Array {} and impl serialize for that. But that gets | |
// pretty annoying for JS. | |
// Not writing to the childArr.length. It *should* be fine, but we don't want | |
// chances of a non-contiguous array (holes... they exist in JS). | |
const childArrLength = ADD(serialize_Int32, SV.CHILD_ARR, serializer, datum.childArr.length); | |
for (let i = 0; i < childArrLength; i++) { | |
datum.childArr[i] = ADD(serialize_ChildTestObj, SV.CHILD_ARR, serializer, datum.childArr[i]); | |
} | |
return datum; | |
} | |
class ChildTestObj { | |
constructor() { | |
this.id = null; | |
} | |
} | |
function serialize_ChildTestObj(serializer, datum) { | |
if (!datum) datum = new ChildTestObj; | |
datum.id = ADD(serialize_CharArray, SV.INITIAL, serializer, datum.id); | |
return datum; | |
} | |
let s; | |
let obj; | |
let out; | |
let expected; | |
let serializedOk; | |
obj = new TestObj(); | |
s = new LBPSerializer('the-file-001.file'); | |
s.isWriting = true; | |
serialize_LBPSerializer(s); | |
serialize_TestObj(s, obj); | |
console.log('written', s); | |
assert.deepEqual( | |
s.file, | |
[ | |
{ bytes: 4, value: 3 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 1, value: 0 }, | |
{ bytes: 4, value: 0 }, | |
] | |
); | |
// Assign it after construction to simulate typical programming... | |
obj.someArr = new Int32Array([0, 1, 2, 3]); | |
s = new LBPSerializer('the-file-001.file'); | |
s.isWriting = true; | |
serialize_LBPSerializer(s); | |
serialize_TestObj(s, obj); | |
console.log('written', s); | |
assert.deepEqual( | |
s.file, | |
[{ bytes: 4, value: 3 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 4, value: 4 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 4, value: 1 }, | |
{ bytes: 4, value: 2 }, | |
{ bytes: 4, value: 3 }, | |
{ bytes: 1, value: 0 }, | |
{ bytes: 4, value: 0 },] | |
); | |
obj = new TestObj(); | |
obj.child = new ChildTestObj(); | |
obj.child.id = ' '; | |
obj.hasChild = true; | |
s = new LBPSerializer('the-file-001.file'); | |
s.isWriting = true; | |
serialize_LBPSerializer(s); | |
serialize_TestObj(s, obj); | |
console.log('written', s); | |
assert.deepEqual( | |
s.file, | |
[ | |
{ bytes: 4, value: 3 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 1, value: 1 }, | |
{ bytes: 4, value: 1 }, | |
{ bytes: 4, value: 32 }, | |
{ bytes: 4, value: 0 }, | |
] | |
); | |
// Make sure the deserialized child has an id of ' '!! | |
out = new TestObj(); | |
s.isWriting = false; | |
serialize_LBPSerializer(s); | |
serialize_TestObj(s, out); | |
expected = new TestObj(); | |
expected.hasChild = true; | |
expected.child = new ChildTestObj(); | |
expected.child.id = ' '; | |
assert.deepEqual(out, expected); | |
// Test the list | |
obj = new TestObj(); | |
obj.childArr.push(new ChildTestObj()); | |
obj.childArr.push(new ChildTestObj()); | |
obj.childArr[0].id = ' '; | |
obj.childArr[1].id = ' '; | |
s = new LBPSerializer('the-file-001.file'); | |
s.isWriting = true; | |
serialize_LBPSerializer(s); | |
serialize_TestObj(s, obj); | |
console.log('written', s); | |
assert.deepEqual( | |
s.file, | |
[ | |
{ bytes: 4, value: 3 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 4, value: 0 }, | |
{ bytes: 1, value: 0 }, | |
{ bytes: 4, value: 2 }, | |
{ bytes: 4, value: 1 }, | |
{ bytes: 4, value: 32 }, | |
{ bytes: 4, value: 2 }, | |
{ bytes: 4, value: 32 }, | |
{ bytes: 4, value: 32 }, | |
] | |
); | |
// Now read it back | |
out = new TestObj(); | |
s.isWriting = false; | |
serialize_LBPSerializer(s); | |
serialize_TestObj(s, out); | |
assert.deepEqual(out, obj); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment