Skip to content

Instantly share code, notes, and snippets.

@dbuenzli
Created June 4, 2025 09:45
Show Gist options
  • Save dbuenzli/83b968337c5938105455b4e47e80cf4d to your computer and use it in GitHub Desktop.
Save dbuenzli/83b968337c5938105455b4e47e80cf4d to your computer and use it in GitHub Desktop.
Bigbytes signature
(*---------------------------------------------------------------------------
Copyright (c) 2025 The bigbytes programmers. All rights reserved.
SPDX-License-Identifier: ISC
---------------------------------------------------------------------------*)
(** Bigarrays of bytes.
{b Note.} Getter and setters raise [Invalid_argument] on out of
bounds errors. Functions that encode 8-bit or 16-bit integers
represented by [int] values truncate their input their least
significant bytes. *)
module Bigbytes : sig
(** {1:ints Integers} *)
type sub_uint8 = int
(** The type for unsigned n-bit integers with [n < 8]. *)
type uint8 = int
(** The type for unsigned 8-bit integers. *)
type uint16 = int
(** The type for unsigned 16-bit integers. *)
type uint32 = int32
(** The type for unsigned 32-bit integers. *)
type uint64 = int64
(** The type for unsigned 64-bit integers. *)
type int8 = int
(** The type for signed 8-bit integers. *)
type int16 = int
(** The type for signed 16-bit integers. *)
(** {2:note32 32-bit [int] note}
The encoding read and written by this module for [int] values
is the same on 64-bit and 32-bit plaforms. However values outside
the {e safe} range \[-2{^30};2{^30}-1\] will not be read back correctly
on 32-bit platforms. *)
val max_safe_int : int
(** [max_safe_int] is 2{^30}-1. *)
val min_safe_int : int
(** [min_safe_int] is -2{^30}. *)
val is_safe_int : int -> bool
(** [is_safe_int i] is [true] iff [i] is 32-bit safe. *)
(** {1:byte_sizes Byte sizes} *)
type byte_pos = int
(** The type for byte positions. *)
type byte_size = int
(** The type for byte sizes. *)
(** Encoding byte sizes. *)
module Size : sig
(** {1:unsigned Unsigned integers} *)
val uint8 : byte_size
(** [uint8] is the encoding size of {!type-uint8} values. *)
val uint16 : byte_size
(** [uint16] is the encoding size of {!type-uint16} values. *)
val uint32 : byte_size
(** [uint32] is the encoding size of {!type-uint32} values. *)
val uint64 : byte_size
(** [uint64] is the encoding size of {!type-uint64} values. *)
val vuint : int -> byte_size
(** [vuint i] is the variable encoding size of for integer [i].
See {{!section-vuint}this note}. *)
val vuint_min : byte_size
(** [vuint_min] is the minmal size encoding size of a variable encoding
of an [int]. See {{!section-vuint}this note}. *)
val vuint_max : byte_size
(** [vuint_max] is the maximal size encoding size of a variable encoding
of an [int]. See {{!section-vuint}this note}. *)
(** {1:signed Signed integers} *)
val int8 : byte_size
(** [int8] is the encoding size of {!type-int8} values. *)
val int16 : byte_size
(** [int16] is the encoding size of {!type-int16} values. *)
val int32 : byte_size
(** [int32] is the encoding size of {!type-int32} values. *)
val int64 : byte_size
(** [int64] is the encoding size of {!type-int64} values. *)
val int : byte_size
(** [int] is the encoding size of {!type-int} values. This is [8]
regardless of platform bitness, see {{!Bigbytes.note32}this
note}. *)
(** {1:other Other} *)
val float : byte_size
(** [float] is the encoding size of {!type-float}. *)
val binary_string : string -> byte_size
(** [binary_string s] is the encoding size for the binary string [s].
This is {!String.length}. *)
val sized_binary_string : string -> byte_size
(** [sized_binary_string s] is the encoding size for the sized string
[s]. This first encodes the string length as a {!vuint} and then
writes the bytes of the string. *)
val sized_binary_string_max : string -> byte_size
(** [sized_binary_string_max s] is an upper bound for the encoding
size of [s]. *)
end
(** {1:bigbytes Bigbytes} *)
type t =
(uint8, Bigarray.int8_unsigned_elt, Bigarray.c_layout ) Bigarray.Array1.t
(** The type for bigarrays of bytes. *)
val create : byte_size -> t
(** [create n] are [n] bytes of uninitialized memory. *)
val make : byte_size -> fill:uint8 -> t
(** [make r n ~fill] are [n] bytes filled with [fill]. *)
val init : byte_size -> init:(byte_pos -> uint8) -> t
(** [init r n ~init] are [n] bytes with byte [i] initialized to [init i]
using representation [r]. *)
val empty : t
(** [empty] is an empty bigarray of bytes. *)
val length : t -> byte_size
(** [length b] is the length of [b]. *)
val copy : t -> t
(** [copy b] is an exact copy of [b] which is unaffected by
modifications to [b]. *)
val sub : t -> first:byte_pos -> length:byte_size -> t
(** [sub ~repr b ~first ~length:n] is a copy of the bytes in range
\[[first];[first + n - 1]\]. *)
val fill : ?first:byte_pos -> ?length:byte_size -> fill:uint8 -> t -> unit
(** [fill ~first ~length:n ~fill b] sets [b]'s bytes in range
\[[first];[first + n - 1]\] to [fill].
[start] defaults to [0] and [len] to [length b - start]. *)
val blit : src:t -> byte_pos -> dst:t -> byte_pos -> length:byte_size -> unit
(** [blit ~src src_first ~dst dst_first ~length:n] blits the bytes of
[src] in
range \[[src_first];[src_first + n - 1]\] to
\[[dst_first];[dst_first + n - 1]\]. *)
(** {1:getters Getters} *)
(** {2:get_unsigned Unsigned} *)
val get_bit : t -> byte_pos -> bit:int -> bool
(** [get_bit b i ~bit:n] is the zero-based [n]th least significant
bit at byte position [i]. *)
val get_bits : t -> byte_pos -> bits:uint8 -> sub_uint8
(** [get_bits b i ~bits] is the value of bits set in [bits] at byte
position [i]. *)
val get_uint8 : t -> byte_pos -> uint8
(** [get_uint8 b i] is the unsigned 8-bit integer at byte position [i]. *)
val get_uint16_ne : t -> byte_pos -> uint16
(** [get_uint16_ne b i] is the native endian unsigned 16-bit integer
starting at byte position [i]. *)
val get_uint16_be : t -> byte_pos -> uint16
(** [get_uint16_be b i] is the big endian unsigned 16-bit integer
starting at byte position [i]. *)
val get_uint16_le : t -> byte_pos -> uint16
(** [get_uint16_le b i] is the little endian unsigned 16-bit integer
starting at byte position [i]. *)
val get_uint32_ne : t -> byte_pos -> uint32
(** [get_uint32_ne b i] is the native endian unsigned 32-bit integer
starting at byte position [i]. *)
val get_uint32_be : t -> byte_pos -> uint32
(** [get_uint32_be b i] is the big endian unsigned 32-bit integer
starting at byte position [i]. *)
val get_uint32_le : t -> byte_pos -> uint32
(** [get_uint32_le b i] is the little endian unsigned 32-bit integer
starting at byte position [i]. *)
val get_uint64_ne : t -> byte_pos -> uint64
(** [get_uint64_ne b i] is the native endian unsigned 64-bit integer
starting at byte position [i]. *)
val get_uint64_be : t -> byte_pos -> uint64
(** [get_uint64_be b i] is the big endian unsigned 64-bit integer
starting at byte position [i]. *)
val get_uint64_le : t -> byte_pos -> uint64
(** [get_uint64_le b i] is the little endian unsigned 64-bit integer
starting at byte position [i]. *)
val get_vuint : t -> byte_pos -> int
(** [get_vuint b i] is the unsigned variable length integer starting at
position [i] in [b]. See {{!vuint}this note}. *)
(** {2:get_signed Signed} *)
val get_int8 : t -> byte_pos -> int8
(** [get_int8 b i] is the signed 8-bit integer at byte position [i]. *)
val get_int16_ne : t -> byte_pos -> int16
(** [get_int16_ne b i] is the native endian signed 16-bit integer starting
at byte position [i]. *)
val get_int16_le : t -> byte_pos -> int16
(** [get_int16_le b i] is the little endian signed 16-bit integer
starting at byte position [i]. *)
val get_int16_be : t -> byte_pos -> int16
(** [get_int16_be b i] is the big endian signed 16-bit integer
starting at byte position [i]. *)
val get_int32_ne : t -> byte_pos -> int32
(** [get_int32_ne b i] is the native endian signed 32-bit integer starting
at byte position [i]. *)
val get_int32_be : t -> byte_pos -> int32
(** [get_int32_be b i] is the big endian signed 32-bit integer
starting at byte position [i]. *)
val get_int64_ne : t -> byte_pos -> int64
(** [get_int64_ne b i] is the native endian signed 64-bit integer starting
at byte position [i]. *)
val get_int64_be : t -> byte_pos -> int64
(** [get_int64_be b i] is the big endian signed 64-bit integer
starting at byte position [i]. *)
val get_int64_le : t -> byte_pos -> int64
(** [get_int64_le b i] is the little endian signed 64-bit integer
starting at byte position [i]. *)
val get_int : t -> byte_pos -> int
(** [get_int b i] is the integer starting at byte position [i].
{b Warning.} See {{!Bigbytes.note32}this note}. *)
(** {2:get_other Other} *)
val get_float : t -> byte_pos -> float
(** [get_float b i] is the 64-bit float starting at position [i] in [b]. *)
val get_binary_string : t -> byte_pos -> length:byte_size -> string
(** [get_binary_string b i ~length] are [length] bytes starting at position
[i] in [b]. *)
val get_sized_binary_string : t -> int -> string
(** [get_sized_binary_string b i] is the string starting at position [i] in
[b]. The string starts with a {{!get_vuint}variable length}
encoding of the string size followed by the corresponding
number of bytes. *)
(** {1:setters Setters} *)
(** {2:set_unsigned Unsigned} *)
val set_bit : t -> byte_pos -> bit:int -> bool -> unit
(** [set_bit b i ~bit:n v] sets the zero-based [n]th least significant
bit at byte position [i] to [v]. *)
val set_bits : t -> byte_pos -> bits:uint8 -> sub_uint8 -> unit
(** [set_bits b i ~bits v] sets the bits set in [bits] at byte
position [i] to the value of these in [v]. *)
val set_uint8 : t -> byte_pos -> uint8 -> unit
(** [set_uint8 b i v] sets the unsigned 8-bit integer at byte position [i]
to [v]. *)
val set_uint16_ne : t -> byte_pos -> uint16 -> unit
(** [set_uint16_ne b i v] sets the native endian unsigned 16-bit integer
starting at byte position [i] to [v]. *)
val set_uint16_le : t -> byte_pos -> uint16 -> unit
(** [set_uint16_le b i v] sets the little endian unsigned 16-bit integer
starting at byte position [i] to [v]. *)
val set_uint16_be : t -> byte_pos -> uint16 -> unit
(** [set_uint16_be b i v] sets the big endian unsigned 16-bit integer
starting at byte position [i] to [v]. *)
val set_uint32_ne : t -> byte_pos -> uint32 -> unit
(** [set_uint32_ne b i v] sets the native endian unsigned 32-bit integer
starting at byte position [i] to [v]. *)
val set_uint32_be : t -> byte_pos -> uint32 -> unit
(** [set_uint32_be b i v] sets the big endian unsigned 32-bit integer
starting at byte position [i] to [v]. *)
val set_uint32_le : t -> byte_pos -> uint32 -> unit
(** [set_uint32_le b i v] sets the little endian unsigned 32-bit integer
starting at byte position [i] to [v]. *)
val set_uint64_ne : t -> byte_pos -> uint64 -> unit
(** [set_uint64_ne b i v] sets the native endian unsigned 64-bit integer
starting at byte position [i] to [v]. *)
val set_uint64_be : t -> byte_pos -> uint64 -> unit
(** [set_uint64_be b i v] sets the big endian unsigned 64-bit integer
starting at byte position [i] to [v]. *)
val set_uint64_le : t -> byte_pos -> uint64 -> unit
(** [set_uint64_le b i v] sets the little endian unsigned 64-bit integer
starting at byte position [i] to [v]. *)
val set_vuint : t -> byte_pos -> int -> byte_pos
(** [set_vuint b i v] sets an unsigned variable length integer starting
at position [i] in [b] to [v] and returns the position after the encoding.
See {{!vuint}this note}. *)
(** {2:set_signed Signed} *)
val set_int8 : t -> byte_pos -> int8 -> unit
(** [set_int8 b i v] sets the signed 8-bit integer at byte position [i] of [b]
to [v]. *)
val set_int16_ne : t -> byte_pos -> int16 -> unit
(** [set_int16_ne b i v] sets the native endian signed 16-bit integer
starting at byte position [i] to [v]. *)
val set_int16_le : t -> byte_pos -> int16 -> unit
(** [set_int16_le b i v] sets the little endian signed 16-bit integer
starting at byte position [i] to [v]. *)
val set_int16_be : t -> byte_pos -> int16 -> unit
(** [set_int16_be b i v] sets the big endian signed 16-bit integer
starting at byte position [i] to [v]. *)
val set_int32_ne : t -> byte_pos -> int32 -> unit
(** [set_int32_ne b i v] sets the native endian signed 32-bit integer
starting at byte position [i] to [v]. *)
val set_int32_be : t -> byte_pos -> int32 -> unit
(** [set_int32_be b i v] sets the big endian signed 32-bit integer
starting at byte position [i] to [v]. *)
val get_int32_le : t -> byte_pos -> int32
(** [get_int32_le b i] is the little endian signed 32-bit integer
starting at byte position [i]. *)
val set_int32_le : t -> byte_pos -> int32 -> unit
(** [set_int32_le b i v] sets the little endian signed 32-bit integer
starting at byte position [i] to [v]. *)
val set_int64_be : t -> byte_pos -> int64 -> unit
(** [set_int64_be b i v] sets the big endian signed 64-bit integer
starting at byte position [i] to [v]. *)
val set_int64_ne : t -> byte_pos -> int64 -> unit
(** [set_int64_ne b i v] sets the native endian signed 64-bit integer
starting at byte position [i] to [v]. *)
val set_int64_le : t -> byte_pos -> int64 -> unit
(** [set_int64_le b i v] sets the little endian signed 64-bit integer
starting at byte position [i] to [v]. *)
val set_int : t -> byte_pos -> int -> unit
(** [set_int b i v] sets the integer starting at byte position [i] to [v].
{b Warning.} See {{!Bigbytes.note32}this note}. *)
(** {2:set_other Other} *)
val set_float : t -> byte_pos -> float -> unit
(** [set_float b i v] sets a 64-bit float starting at position [i]
in [b] to [v]. *)
val set_binary_string : t -> byte_pos -> string -> unit
(** [set_binary_string b i v] sets [String.length v] bytes starting
at byte position [i] in [b] to [v]. *)
val set_sized_binary_string : t -> byte_pos -> string -> byte_pos
(** [set_sized_binary_string b i v] sets a string starting at byte position
[i] in [b] to [v] and returns the position after the
encoding. The written string starts with a {{!set_vuint}variable
length} encoding of the string size followed by the
corresponding number of bytes. *)
(** {1:skipping Skipping} *)
val skip_vuint : t -> byte_pos -> byte_pos
(** [skip_vuint b i] is the (possibly out of bounds) position after the
unsigned variable length integer starting at position [i]. *)
val skip_sized_binary_string : t -> byte_pos -> byte_pos
(** [skip_sized_binary_string b i] is the (possibly out of bounds)
position after the string starting at position [i]. *)
(** {1:vuint Variable length [vuint] encoding}
The function {!get_vuint} and {!set_vuint} provide a variable
size [int] encoding. It's up to you to interpret the
unsignedness. But bear in mind that it is not a zigzag encoding:
if [-1] is encoded this takes {!Size.vuint_max} bytes. Since
this uses [int]s, see also this
{{!Bigbytes.note32}note} about 32-bit platforms. *)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment