Last active
July 9, 2017 22:59
-
-
Save dbuenzli/ba89e90cce9daabdd026993721c8477b to your computer and use it in GitHub Desktop.
UTF_8 module
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (* See http://erratique.ch/software/uutf/doc/Uutf.String.UTF_8.html | |
| If you want to test the API use: | |
| opam pin add uutf https://github.com/dbuenzli/uutf.git#string-utf-8 *) | |
| (** UTF-8 encoded strings. | |
| The [UTF_8] module provides a {{!t}datatype} that asserts | |
| that a string value is valid UTF-8 and a few functions | |
| that operate under this assumption. *) | |
| module UTF_8 : sig | |
| (** {1 UTF-8 encoded strings} *) | |
| type t = private string | |
| (** The type for valid UTF-8 strings. *) | |
| val is_valid : string -> bool | |
| (** [is_valid s] is [true] iff the string [s] is valid UTF-8. *) | |
| val of_string : string -> (t, t) result | |
| (** [of_string s] is [Ok s] if [s] is valid UTF-8 and [Error b] if | |
| [s] is not. In this case [b] is the result of a best-effort | |
| decode in which any UTF-8 decoding error is replaced by at | |
| least one replacement character {!Uchar.u_rep}. *) | |
| val v : string -> t | |
| (** [v s] is like [of_string s] but raises [Invalid_argument] if [s] is | |
| not valid UTF-8. *) | |
| (**/**) | |
| val unsafe_of_string : string -> t | |
| (**/**) | |
| val to_string : t -> string | |
| (** [to_string s] is [s] as a string. *) | |
| (** {1:decindices Decode indices} | |
| In a string [s] a {e decode index} [i] is a valid string index | |
| where an Unicode character can be decoded. In a valid UTF-8 | |
| encoded string the index [0] is always a valid decode index. *) | |
| val next_index : t -> int -> int | |
| (** [next s i] is the smallest decode index after the string | |
| byte position [i] or the string length if there is no such index. | |
| @raise Invalid_argument if [i] is not a valid string position. *) | |
| val prev_index : t -> int -> int | |
| (** [prev_index s i] is the smallest decode index before the string | |
| byte position [i] or [0] if there is no such index. | |
| @raise Invalid_argument if [i] is not a valid string position. *) | |
| val fold_indices : ('a -> int -> 'a) -> 'a -> t -> 'a | |
| (** [fold_indices f acc s] folds over the decode indices of [s]. *) | |
| (** {1:uchars Unicode characters} *) | |
| val get_uchar : t -> int -> Uchar.t | |
| (** [get_uchar t i] is the Unicode character at the | |
| {{!decindices}decode index} [i]. | |
| @raise Invalid_argument if [i] is not a decode index of [s] *) | |
| val fold_uchars : ('a -> int -> Uchar.t -> 'a) -> 'a -> t -> 'a | |
| (** [fold_uchars f acc s] folds over the Unicode characters of [s]. | |
| The decode index is passed with the character to [f]. *) | |
| (** {1:ops Operations} *) | |
| val append : t -> t -> t | |
| (** [append s s'] appends [s'] to [s]. *) | |
| val concat : t -> t list -> t | |
| (** [concat sep ss] concatenates the list of strings [ss] separating | |
| each consecutive elements in the list with [sep]. | |
| @raise Invalid_argument if the result is longer than | |
| {!Sys.max_string_length}. *) | |
| (** {1:comparisons Comparisons} *) | |
| val equal : t -> t -> bool | |
| (** [equal s s'] is [true] iff [s] and [s'] are bytewise equal. *) | |
| val compare : t -> t -> int | |
| (** [compare s s'] totally order [s] and [s'] according to their | |
| byte representation. *) | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment