-
-
Save sebinsua/e756554faaa2d346aff0341374ee792e to your computer and use it in GitHub Desktop.
type _Tuple< | |
T, | |
N extends number, | |
R extends readonly T[] = [] | |
> = R["length"] extends N ? R : _Tuple<T, N, readonly [T, ...R]>; | |
type Tuple<T, N extends number> = _Tuple<T, N> & { | |
readonly length: N; | |
[I: number]: T; | |
[Symbol.iterator]: () => IterableIterator<T>; | |
}; | |
function zip< | |
A extends readonly any[], | |
Length extends A["length"], | |
B extends Tuple<any, Length> | |
>(a: A, b: B): Tuple<readonly [A[number], B[number]], Length> { | |
if (a.length !== b.length) { | |
throw new Error(`zip cannot operate on different length arrays; ${a.length} !== ${b.length}`); | |
} | |
return a.map((v, index) => [v, b[index]]) as Tuple< | |
readonly [A[number], B[number]], | |
Length | |
>; | |
} | |
const a = [1, 2, 3] as const; | |
const b1 = [1, 2, 6, 2, 4] as const; | |
const b2 = [1, 2, 6] as const; | |
// @ts-expect-error Source has 5 element(s) but target allows only 3. | |
const c1 = zip(a, b1); | |
const c2 = zip(a, b2); | |
console.log(c2); | |
// ^? |
I was able to take this helper and create an assertsHasLength
:
type _Tuple<
T,
N extends number,
R extends readonly T[] = []
> = R["length"] extends N ? R : _Tuple<T, N, readonly [T, ...R]>;
type Tuple<T, N extends number> = _Tuple<T, N> & {
readonly length: N;
[I: number]: T;
[Symbol.iterator]: () => IterableIterator<T>;
};
function zip<
A extends readonly any[],
Length extends A["length"],
B extends Tuple<any, Length>
>(a: A, b: B): Tuple<readonly [A[number], B[number]], Length> {
if (a.length !== b.length) {
throw new Error(
`zip cannot operate on different length arrays; ${a.length} !== ${b.length}`
);
}
return a.map((v, index) => [v, b[index]]) as Tuple<
readonly [A[number], B[number]],
Length
>;
}
// ...
type TupleSeq_<T, L, A extends [...any]> = A["length"] extends L
? A
: TupleSeq_<T, L, [...A, T]>;
type TupleSeq<T, L> = TupleSeq_<T, L, []>;
function assertsHasLength<T, L extends number>(
arr: T[],
length: L
): asserts arr is TupleSeq<T, L> {
if (arr.length !== length) {
throw new Error(
`Array has a length of ${arr.length} instead of ${length}.`
);
}
}
declare const a: string[];
declare const b: number[];
assertsHasLength(a, 3);
assertsHasLength(b, 3);
const [first, second, third] = a;
const arr = zip(a, b);
It's not as helpful as we'd like, since (1) our Tuple
type resolves number
into an empty tuple instead of an array of a non-fixed length, and (2) rather than prescribing that an array has a length of a particular numeric literal, we want to be able to specify that an array has a Length
which is the same as the length of another array.
I wonder if the latter is possible already or whether it could be a feature request that the TypeScript team might be able to implement...
Rather than using length
in this case, could we make use of opaque/branded types?
See: https://twitter.com/sebinsua/status/1583064897432477696?s=61&t=Fb9i9QQ8Pp-4n0iSmZ05CA
Unfortunately, regarding not matching on number
, it doesn't seem possible to use extends
in this way as by definition all numeric literals inherit from number
. I guess we'd need TypeScript to add a NumberLiteral
type that is the superset of all numeric literals in between individual numeric literals and number
. (Maybe this type could just be an infinite number interval.)
And, regarding the possibility of using a runtime assertion of equal lengths (or some other property) to assign an opaque/branded/flavoured type to two or more values, this isn't currently possible as TypeScript doesn't support multiple/aggregate function assertions or propagation of type assertions outside of their scope.
I think that everything I attempted is possible, as I've found that somebody has implemented type-safe tensors here: https://github.com/newhouseb/potatogpt/tree/main#-fully-typed-tensors
// Utilities
type UnionToIntersection<U> = (U extends any ? (k: U) => void : never) extends (
k: infer I
) => void
? I
: never;
type IsUnion<T> = [T] extends [UnionToIntersection<T>] ? false : true;
type And<A, B> = A extends true ? (B extends true ? true : false) : false;
type Or<A, B> = A extends true ? true : B extends true ? true : false;
type Not<A> = A extends true ? false : true;
// Defines a branded dimension with a runtime-determined size and an associated label.
export type Var<Label extends string> = number & { label: Label };
export const Var = <Label extends string>(d: number, label: Label) => {
return d as Var<Label>;
};
export const isVar = <Label extends string>(
value: any
): value is Var<Label> => {
return typeof value === "object" && "label" in value;
};
// We check whether `T` is a numeric literal by checking that `number` does not
// extend from it but that it does extend from `number`.
type IsNumericLiteral<T> = number extends T
? false
: T extends number
? true
: false;
type IsVar<T> = T extends Var<string> ? true : false;
// For type-checking of tensors to work they can only be created using
// numeric literals (e.g. `5`) or `Var<string>` and not types like
// `number` or `1 | 2 | 3`.
type IsNumericLiteralOrVar<T extends number | (string & number)> = And<
// We disallow `T` to be a union of types.
Not<IsUnion<T>>,
Or<
// We allow `T` to be a numeric literal but not a number.
IsNumericLiteral<T>,
// We allow `T` to be a `Var`.
IsVar<T>
>
>;
type IsShapeContainingOnlyNumericLiteralsOrVarDimensions<
T extends ReadonlyArray<number | Var<string>>
> = T extends ReadonlyArray<unknown>
? { [K in keyof T]: IsNumericLiteralOrVar<T[K]> } extends {
[K in keyof T]: true;
}
? T
: never
: never;
export type Dimension = number | Var<string>;
export type Tensor<Shape extends readonly Dimension[]> = {
data: Float32Array;
shape: Shape;
};
export type InvalidTensor<Shape extends readonly Dimension[]> = [
never,
"Invalid shape: please provide an array of only numeric literals or `Var`s.",
Shape
];
export function tensor<const Shape extends readonly Dimension[]>(
shape: Shape,
init?: number[]
): Shape extends IsShapeContainingOnlyNumericLiteralsOrVarDimensions<Shape>
? Tensor<Shape>
: InvalidTensor<Shape> {
return {
data: new Float32Array(init || (shape.reduce((a, b) => a * b, 1) as any)),
shape,
} as any;
}
export type Vector<T extends number> = Tensor<[1, T]>;
export function vector<Size extends number>(
size: Size,
init?: number[]
): Vector<Size>;
export function vector<Label extends string>(
size: Var<Label>,
init?: number[]
): Vector<Var<Label>>;
export function vector<N extends readonly [number, ...number[]]>(
init: N
): Vector<N["length"]>;
export function vector(
size: number | Var<string> | readonly number[],
init?: number[]
): Vector<any> {
let shape: Dimension[];
if (typeof size === "number") {
shape = [1, size];
} else if (isVar(size)) {
shape = [1, size];
} else if (Array.isArray(size)) {
shape = [1, size.length];
init = size;
} else {
throw new Error("Invalid input type for vector.");
}
return tensor(shape, init) as any;
}
function zip<V extends Vector<number>>(a: V, b: V): Tensor<[V["shape"][1], 2]> {
if (a.shape[1] !== b.shape[1]) {
throw new Error(
`zip cannot operate on different length vectors; ${a.shape[1]} !== ${b.shape[1]}`
);
}
const length = a.shape[1];
const resultData: number[] = [];
for (let i = 0; i < length; i++) {
resultData.push(a.data[i], b.data[i]);
}
return tensor([length, 2], resultData) as any;
}
const t1 = tensor([5, Var(3, "word")]);
const v1 = vector(5);
const v2 = vector(Var(3, "hello"));
const v3 = vector([1, 2, 3]);
const v4 = vector([1, 2, 3]);
const v5 = vector([4, 5, 6]);
const v6 = vector([7, 8, 9, 10]);
const v7 = vector(Var(3, "three"), [1, 2, 3]);
const v8 = vector(Var(3, "three"), [5, 10, 15]);
const v9 = vector(Var(4, "four"), [10, 11, 12, 13]);
const zipped = zip(v4, v5); // Works fine
const zippedError = zip(v4, v6); // Now it's a compile-time error
const zipped2 = zip(v7, v8); // Works fine
const zippedError2 = zip(v7, v9); // Now it's a compile-time error
// Utilities
type UnionToIntersection<U> = (U extends any ? (k: U) => void : never) extends (
k: infer I
) => void
? I
: never;
type IsUnion<T> = [T] extends [UnionToIntersection<T>] ? false : true;
type And<A, B> = A extends true ? (B extends true ? true : false) : false;
type Or<A, B> = A extends true ? true : B extends true ? true : false;
type Not<A> = A extends true ? false : true;
// Defines a branded dimension with a runtime-determined size and an associated label.
export type Var<Label extends string> = number & { label: Label };
export const Var = <Label extends string>(d: number, label: Label) => {
return d as Var<Label>;
};
// We check whether `T` is a numeric literal by checking that `number` does not
// extend from it but that it does extend from `number`.
type IsNumericLiteral<T> = number extends T
? false
: T extends number
? true
: false;
type IsVar<T> = T extends Var<string> ? true : false;
// For type-checking of tensors to work they can only be created using
// numeric literals (e.g. `5`) or `Var<string>` and not types like
// `number` or `1 | 2 | 3`.
type IsNumericLiteralOrVar<T extends number | (string & number) | readonly number[]> = And<
// We disallow `T` to be a union of types.
Not<IsUnion<T>>,
Or<
// We allow `T` to be a numeric literal but not a number.
IsNumericLiteral<T>,
// We allow `T` to be a `Var`.
IsVar<T>
>
>;
type IsShapeContainingOnlyNumericLiteralsOrVarDimensions<
T extends ReadonlyArray<number | Var<string>>
> = T extends ReadonlyArray<unknown>
? { [K in keyof T]: IsNumericLiteralOrVar<T[K]> } extends {
[K in keyof T]: true;
}
? T
: never
: never;
export type Dimension = number | Var<string>;
export type Tensor<Shape extends readonly Dimension[]> = {
data: Float32Array;
shape: Shape;
};
export type InvalidTensor<Shape extends readonly Dimension[]> = [
never,
"Invalid tensor: please provide an array of only numeric literals or `Var`s.",
Shape
];
export function tensor<const Shape extends readonly Dimension[]>(
shape: Shape,
init?: number[]
): Shape extends IsShapeContainingOnlyNumericLiteralsOrVarDimensions<Shape>
? Tensor<Shape>
: InvalidTensor<Shape> {
return {
data: new Float32Array(init || (shape.reduce((a, b) => a * b, 1) as any)),
shape,
} as any;
}
export type Matrix<Rows extends Dimension, Columns extends Dimension> = Tensor<[Rows, Columns]>;
export type InvalidMatrix<Shape extends readonly [Dimension, Dimension]> = [
never,
"Invalid matrix: please provide an array of only numeric literals or `Var`s.",
Shape
];
export function matrix<const Shape extends readonly [Dimension, Dimension]>(
shape: Shape,
init?: number[]
): Shape extends IsShapeContainingOnlyNumericLiteralsOrVarDimensions<Shape>
? Matrix<Shape[0], Shape[1]>
: InvalidMatrix<Shape> {
return tensor(shape, init) as any;
}
export type Vector<Size extends number> = Tensor<[1, Size]>;
export type InvalidVector<Size extends Dimension | readonly number[]> = [
never,
"Invalid vector: please provide either a numeric literal or a `Var`.",
Size
];
export function vector<const Size extends number>(
size: Size,
init?: number[]
): true extends IsNumericLiteralOrVar<Size>
? Vector<Size>
: InvalidVector<Size>;
export function vector<const Size extends Dimension>(
size: Size,
init?: number[]
): true extends IsNumericLiteralOrVar<Size>
? Vector<Size>
: InvalidVector<Size>;
export function vector<const Array extends readonly [number, ...number[]]>(
init: Array
): Vector<Array["length"]>;
export function vector<const Size extends number | Var<string> | readonly number[]>(
size: Size,
init?: number[]
): Vector<any> {
let shape: Dimension[];
if (typeof size === "number") {
shape = [1, size];
} else if (Array.isArray(size)) {
shape = [1, size.length];
init = size;
} else {
throw new Error("Invalid input type for vector.");
}
return tensor(shape, init) as any;
}
function zip<SameVector extends Vector<number>>(a: SameVector, b: SameVector): Matrix<SameVector["shape"][1], 2> {
if (a.shape[1] !== b.shape[1]) {
throw new Error(
`zip cannot operate on different length vectors; ${a.shape[1]} !== ${b.shape[1]}`
);
}
const length = a.shape[1];
const resultData: number[] = [];
for (let i = 0; i < length; i++) {
resultData.push(a.data[i], b.data[i]);
}
return matrix([length, 2], resultData) as any;
}
const t1 = tensor([5, Var(3, "three"), 10]);
const m1 = matrix([5, Var(3, "three")]);
const v1 = vector(2);
const v2 = vector(Var(3, "three"));
const v3 = vector([1, 2, 3]);
const v4 = vector([1, 2, 3]);
const v5 = vector([4, 5, 6]);
const v6 = vector([7, 8, 9, 10]);
const v7 = vector(Var(3, "three"), [1, 2, 3]);
const v8 = vector(Var(3, "three"), [5, 10, 15]);
const v9 = vector(Var(4, "four"), [10, 11, 12, 13]);
const zipped = zip(v4, v5); // Works fine
const zippedError = zip(v4, v6); // Now it's a compile-time error
const zipped2 = zip(v7, v8); // Works fine
const zippedError2 = zip(v7, v9); // Now it's a compile-time error
I improved upon this and wrote it up as a blog post here: https://twitter.com/sebinsua/status/1656297294008819712
type Zipped<A extends readonly (readonly any[])[]> = {
[RowIndex in keyof A[0]]: RowIndex extends "length" ? number : {
[ArrayIndex in keyof A]: A[ArrayIndex] extends readonly any[] ? A[ArrayIndex][RowIndex] : never;
};
};
type NumericRange<TotalLength extends number, TempRange extends any[] = []> =
TempRange['length'] extends TotalLength ? TempRange : NumericRange<TotalLength, [...TempRange, TempRange['length']]>;
type TupleFromKeys<T, K extends readonly (keyof T)[]> = {
[I in keyof K]: K[I] extends keyof T ? T[K[I]] : never;
};
type Tuple<T extends readonly (readonly any[])[]> = TupleFromKeys<Zipped<T>, NumericRange<T[0]['length']>>;
function zip<const T extends readonly (readonly any[])[]>(...arrays: T): Tuple<T> {
const minLength = Math.min(...arrays.map(arr => arr.length));
const result: any[][] = [];
for (let i = 0; i < minLength; i++) {
const zippedItem = arrays.map(arr => arr[i]);
result.push(zippedItem);
}
return result as Tuple<T>;
}
const grid = [
['a', 1, true],
['b', 2, false],
['c', 3, true]
] as const;
const [col1, col2, col3] = zip(...grid);
console.log(col1);
console.log(col2);
console.log(col3);
^ This approach would be useful if you needed to convert a grid of rows into a list of columns.
I've been wondering whether it was possible to create a type-safe
zip
operation and did so today.Although, maybe this isn't how people normally use
zip
, recently I've been thinking about how in Python you join series/frames and end up withNaN
ornull
s in columns which are empty, and how this can cause bugs downstream. I was wondering whether it'd be possible to make a type-safe data programming language... Basically, by having invariants on things like lengths, and only letting people ignore these if they provide a function to "impute" missing values, etc.