Created
June 9, 2023 02:21
-
-
Save thoughtpolice/317c9306ca79a2fe400a84f0c463cd82 to your computer and use it in GitHub Desktop.
Standalone implementation of KSUIDs for TypeScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// SPDX-License-Identifier: MIT | |
// SPDX-FileCopyrightText: 2017-2021 Mark Wubben <[email protected]> | |
// SPDX-FileCopyrightText: 2017-2023 Segment.io | |
// SPDX-FileCopyrightText: 2023 Austin Seipp <[email protected]> | |
// Implementation of KSUIDs, a K-Sortable Globally Unique IDentifier, as defined | |
// by https://github.com/segmentio/ksuid -- this is a port of the upstream | |
// project to typescript/deno, with some modifications. | |
// | |
// External API based on the one from https://github.com/novemberborn/ksuid, | |
// with some code copied; but with some modifications to be simpler and ported | |
// to typescript | |
// | |
// Base62 algorithms are basically direct translaterations of upstream golang to | |
// javascript: https://github.com/segmentio/ksuid/blob/master/base62.go -- this | |
// is less efficient than e.g. base-x it seems, but actually seems to work | |
// better in practice with fewer edge cases; I trust it as more correct wrt. the | |
// upstream implementation, while almost every other base62 encoder I found | |
// couldn't handle original ksuid strings correctly (e.g. they would fail to | |
// decode them), or would decode incorrectly (e.g. 21 byte buffers returned), | |
// and so on. | |
// | |
// However, this at least makes the implementation completely standalone and | |
// isolated from any other dependencies, which is nice. it should work on deno, | |
// node, and the browser. | |
// | |
// Includes tests and benchmarks for deno, and a command line program that can | |
// be used to generate ksuids and inspect them, like upstream tools: | |
// | |
// $ deno run util/ksuid.ts | |
// $ deno run util/ksuid.ts -f inspect 0ujtsYcgvSTl8PAuAdqWYSMnLOv | |
// $ deno run util/ksuid.ts -f inspect $(deno run util/ksuid.ts -n 4) | |
import * as flags from "$std/flags/mod.ts"; | |
// --------------------------------------------------------------------------------------------------------------------- | |
// KSUID's epoch starts more recently so that the 32-bit number space gives a | |
// significantly higher useful lifetime of around 136 years from March 2014. | |
// This number (14e11) was picked to be easy to remember. | |
const EPOCH_IN_MS = 14e11; | |
const MAX_TIME_IN_MS = 1e3 * (2 ** 32 - 1) + EPOCH_IN_MS; | |
const TIMESTAMP_BYTE_LENGTH = 4; // uint32 timestamp | |
const PAYLOAD_BYTE_LENGTH = 16; // 16-byte payload, so... | |
const BYTE_LENGTH = 4 + 16; // 20 bytes total! | |
const STRING_ENCODED_LENGTH = 27; // 20 bytes (160 bits) = 27 base62 characters, exactly | |
const TIME_IN_MS_ASSERTION = | |
`Valid KSUID timestamps must be in milliseconds since ${ | |
new Date(0).toISOString() | |
}, | |
no earlier than ${new Date(EPOCH_IN_MS).toISOString()} and no later than ${ | |
new Date(MAX_TIME_IN_MS).toISOString() | |
} | |
`.trim().replace(/(\n|\s)+/g, " ").replace(/\.000Z/g, "Z"); | |
const VALID_ENCODING_ASSERTION = | |
`Valid encoded KSUIDs are ${STRING_ENCODED_LENGTH} characters`; | |
const VALID_BUFFER_ASSERTION = `Valid KSUID buffers are ${BYTE_LENGTH} bytes`; | |
const VALID_PAYLOAD_ASSERTION = | |
`Valid KSUID payloads are ${PAYLOAD_BYTE_LENGTH} bytes`; | |
function concatArrayBuffers(bufs: ArrayBuffer[]): ArrayBuffer { | |
let total = 0; | |
for (const buf of bufs) { | |
total += buf.byteLength; | |
} | |
const result = new Uint8Array(total); | |
let offset = 0; | |
for (const buf of bufs) { | |
result.set(new Uint8Array(buf), offset); | |
offset += buf.byteLength; | |
} | |
return result.buffer; | |
} | |
function validateParts(ms: number, payload: ArrayBuffer): number { | |
if (!Number.isInteger(ms) || ms < EPOCH_IN_MS || ms > MAX_TIME_IN_MS) { | |
return 1; | |
} | |
if (payload.byteLength !== PAYLOAD_BYTE_LENGTH) { | |
return 2; | |
} | |
return 0; | |
} | |
// --------------------------------------------------------------------------------------------------------------------- | |
const BASE62_ALPHABET = | |
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; | |
function base62Value(digit: number): number { | |
const offsetUppercase = 10; | |
const offsetLowercase = 36; | |
if (digit >= 48 && digit <= 57) { | |
return digit - 48; // '0'-'9' | |
} else if (digit >= 65 && digit <= 90) { | |
return offsetUppercase + (digit - 65); // 'A'-'Z' | |
} else { | |
return offsetLowercase + (digit - 97); // 'a'-'z' | |
} | |
} | |
export function fastEncodeBase62(dst: Uint8Array, src: Uint8Array) { | |
if (src.length !== 20) throw new Error("Invalid base62 string"); | |
if (dst.length !== 27) throw new Error("Invalid buffer length"); | |
const srcBase = 4294967296; | |
const dstBase = 62; | |
// Split src into 5 4-byte words, this is where most of the efficiency comes | |
// from because this is a O(N^2) algorithm, and we make N = N / 4 by working | |
// on 32 bits at a time. | |
const parts = new Uint32Array([ | |
(new DataView(src.buffer)).getUint32(0), | |
(new DataView(src.buffer)).getUint32(4), | |
(new DataView(src.buffer)).getUint32(8), | |
(new DataView(src.buffer)).getUint32(12), | |
(new DataView(src.buffer)).getUint32(16), | |
]); | |
let n = 27; | |
let bp = parts.slice(); | |
const bq = new Uint32Array(5); | |
while (n !== 0) { | |
let off = 0; | |
const quotient = bq.slice(0); | |
let remainder = 0; | |
for (const c of bp) { | |
const value = c + remainder * srcBase; | |
const digit = Math.floor(value / dstBase); | |
remainder = value % dstBase; | |
if (quotient.length !== 0 || digit !== 0) { | |
quotient[off++] = digit; | |
} | |
} | |
// Writes at the end of the destination buffer because we computed the | |
// lowest bits first. | |
dst[--n] = BASE62_ALPHABET.charCodeAt(remainder); | |
bp = quotient; | |
} | |
return dst; | |
} | |
export function fastDecodeBase62(dst: Uint8Array, src: Uint8Array) { | |
if (src.length !== 27) throw new Error("Invalid base62 string"); | |
if (dst.length !== 20) throw new Error("Invalid buffer length"); | |
const srcBase = 62; | |
const dstBase = 4294967296; | |
const parts = new Uint8Array([ | |
base62Value(src[0]), | |
base62Value(src[1]), | |
base62Value(src[2]), | |
base62Value(src[3]), | |
base62Value(src[4]), | |
base62Value(src[5]), | |
base62Value(src[6]), | |
base62Value(src[7]), | |
base62Value(src[8]), | |
base62Value(src[9]), | |
base62Value(src[10]), | |
base62Value(src[11]), | |
base62Value(src[12]), | |
base62Value(src[13]), | |
base62Value(src[14]), | |
base62Value(src[15]), | |
base62Value(src[16]), | |
base62Value(src[17]), | |
base62Value(src[18]), | |
base62Value(src[19]), | |
base62Value(src[20]), | |
base62Value(src[21]), | |
base62Value(src[22]), | |
base62Value(src[23]), | |
base62Value(src[24]), | |
base62Value(src[25]), | |
base62Value(src[26]), | |
]); | |
let n = 20; | |
let bp = parts.slice(); | |
const bq = new Uint8Array(27); | |
while (n > 0) { | |
const quotient = bq.slice(); | |
let remainder = 0, off = 0; | |
for (let i = 0; i < bp.length; i++) { | |
const c = bp[i]; | |
const value = c + remainder * srcBase; | |
const digit = Math.floor(value / dstBase); | |
remainder = value % dstBase; | |
if (quotient.length !== 0 || digit !== 0) { | |
quotient[off++] = digit; | |
} | |
} | |
if (n < 4) { | |
throw new Error("Short buffer"); | |
} | |
dst[n - 4] = remainder >> 24; | |
dst[n - 3] = remainder >> 16; | |
dst[n - 2] = remainder >> 8; | |
dst[n - 1] = remainder; | |
n -= 4; | |
bp = quotient; | |
} | |
return dst; | |
} | |
// --------------------------------------------------------------------------------------------------------------------- | |
/** | |
* Validate a KSUID. This ensures it has the proper binary format and timestamp range, | |
* but otherwise does not validate the payload. | |
* | |
* @param blob Buffer containing a KSUID | |
* @returns False if invalid; true otherwise. | |
*/ | |
export function validate(blob: ArrayBuffer): boolean { | |
if (blob.byteLength !== BYTE_LENGTH) { | |
return false; | |
} | |
if ( | |
0 != validateParts( | |
1e3 * (new DataView(blob)).getUint32(0) + EPOCH_IN_MS, | |
blob.slice(TIMESTAMP_BYTE_LENGTH), | |
) | |
) { | |
return false; | |
} | |
return true; | |
} | |
/** | |
* Construct a KSUID from the given timestamp and payload. | |
* | |
* @param ms Milliseconds since unix epoch. Typically `Date.now()`. | |
* @param payload 16-byte payload. Use `crypto.getRandomValues(16)`. | |
* @returns A KSUID. | |
*/ | |
export function fromParts(ms: number, payload: ArrayBuffer): KSUID { | |
switch (validateParts(ms, payload)) { | |
case 1: | |
throw new TypeError(TIME_IN_MS_ASSERTION); | |
case 2: | |
throw new TypeError(VALID_PAYLOAD_ASSERTION); | |
default: | |
} | |
const timestamp = Math.floor((ms - EPOCH_IN_MS) / 1e3); | |
const buffer = new ArrayBuffer(TIMESTAMP_BYTE_LENGTH); | |
const view = new DataView(buffer); | |
view.setUint32(0, timestamp); | |
const result = concatArrayBuffers([buffer, payload]); | |
if (result.byteLength !== BYTE_LENGTH) { | |
throw new Error(VALID_BUFFER_ASSERTION); | |
} | |
return new KSUID(result); | |
} | |
/** | |
* Create a KSUID based on the current Unix timestamp and a securely generated | |
* random payload. | |
* | |
* @returns A KSUID. | |
*/ | |
export function create() { | |
return fromParts( | |
Date.now(), | |
crypto.getRandomValues(new Uint8Array(PAYLOAD_BYTE_LENGTH)), | |
); | |
} | |
/** | |
* Parse the string representation of a KSUID; this should be in base62 format, and | |
* is always a string exactly 27 characters long. | |
* | |
* @param str Serialized KSUID input | |
* @returns A KSUID. | |
*/ | |
export function parse(str: string): KSUID { | |
if (str.length !== STRING_ENCODED_LENGTH) { | |
throw new TypeError(VALID_ENCODING_ASSERTION); | |
} | |
const bytes = new Uint8Array(20); | |
fastDecodeBase62( | |
bytes, | |
new TextEncoder().encode(str), | |
); | |
return new KSUID(bytes.buffer); | |
} | |
// --------------------------------------------------------------------------------------------------------------------- | |
export class KSUID { | |
private blob: ArrayBuffer; | |
constructor(blob: ArrayBuffer) { | |
if (!validate(blob)) { | |
throw new TypeError(VALID_BUFFER_ASSERTION); | |
} | |
const copy = new ArrayBuffer(20); | |
new Uint8Array(copy).set(new Uint8Array(blob)); | |
this.blob = copy; | |
} | |
/** | |
* Get the raw bytes of the KSUID. | |
*/ | |
get raw(): ArrayBuffer { | |
return this.blob; | |
} | |
get rawHex(): string { | |
return KSUID.buf2hex(this.blob); | |
} | |
/** | |
* Get the string representation of the KSUID; a 27-character base62 string. | |
*/ | |
get string(): string { | |
const result = new Uint8Array(27); | |
fastEncodeBase62(result, new Uint8Array(this.blob)); | |
return new TextDecoder().decode(result); | |
} | |
/** | |
* Get the timestamp of the KSUID as a Date object. | |
*/ | |
get date(): Date { | |
return new Date(this.timestamp); | |
} | |
/** | |
* Get the timestamp of the KSUID in milliseconds since unix epoch. | |
*/ | |
get timestamp(): number { | |
return 1e3 * this.timestampRaw + EPOCH_IN_MS; | |
} | |
/** | |
* Get the raw timestamp of the KSUID in seconds since unix epoch; this is not | |
* actually an exact date, but a scaled value that is adjusted to March 2014. | |
* You normally should not need this. | |
*/ | |
get timestampRaw(): number { | |
return (new DataView(this.blob)).getUint32(0); | |
} | |
/** | |
* Get the binary payload of the KSUID. | |
* | |
* @returns A 16-byte ArrayBuffer. | |
*/ | |
get payload(): ArrayBuffer { | |
return this.blob.slice(TIMESTAMP_BYTE_LENGTH); | |
} | |
/** | |
* Get the hex-encoded payload of the 16-byte payload. | |
* | |
* @returns A 32-character hex string. | |
*/ | |
get payloadHex(): string { | |
return KSUID.buf2hex(this.payload); | |
} | |
/** | |
* Compare two KSUIDs for equality. | |
* | |
* @param other Other KSUID to compare to. | |
* @returns True if equal; false otherwise. | |
*/ | |
equals(other: KSUID): boolean { | |
return this.string === other.string; // FIXME: optimize | |
} | |
/** | |
* Get the string representation of the KSUID; a 27-character base62 string. | |
* | |
* @returns String representation of the KSUID. | |
*/ | |
toString(): string { | |
return this.string; | |
} | |
/* Convert an ArrayBuffer to a hex string. */ | |
private static buf2hex(buffer: ArrayBuffer): string { | |
return [...new Uint8Array(buffer)] | |
.map((x) => x.toString(16).padStart(2, "0")) | |
.join("") | |
.toUpperCase(); | |
} | |
} | |
// --------------------------------------------------------------------------------------------------------------------- | |
// simple port of upstream ksuid inspection program from segmentio/ksuid | |
if (import.meta.main) { | |
const args = flags.parse(Deno.args, { | |
string: ["n", "f"], | |
default: { n: "1" }, | |
}); | |
if (parseInt(args.n, 10) < 1) { | |
throw new Error("invalid number of ksuids to generate"); | |
} | |
const n = parseInt(args.n, 10); | |
if (args.f !== undefined) { | |
switch (args.f) { | |
case "inspect": { | |
const printKSUID = (ksuid: KSUID) => { | |
console.log(` | |
REPRESENTATION: | |
String: ${ksuid.string} | |
Raw: ${ksuid.raw} | |
COMPONENTS: | |
Time: ${ksuid.date} | |
Timestamp: ${ksuid.timestampRaw} | |
Payload: ${ksuid.payloadHex} | |
`); | |
}; | |
if (args._.length !== 0) { | |
for (const str of args._) { | |
printKSUID(parse(str.toString())); | |
} | |
} else { | |
for (let i = 0; i < n; i++) { | |
printKSUID(create()); | |
} | |
} | |
break; | |
} | |
default: { | |
throw new Error(`unknown flag: ${args.f}`); | |
} | |
} | |
Deno.exit(0); | |
} | |
for (let i = 0; i < n; i++) { | |
console.log(create().string); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment