-
-
Save leque/97f416db8282a9bc1ed2d40ddb3814c8 to your computer and use it in GitHub Desktop.
string -> Uchar.t Seq.t
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module type Reader = sig | |
type t | |
type stream | |
type elem | |
val make : stream -> t | |
val read : t -> (elem * t) option | |
end | |
module Option_syntax = struct | |
let (let*) = Option.bind | |
let (let+) o f = Option.map f o | |
let return = Option.some | |
let guard b = if b then Some () else None | |
end | |
module Uchar_reader : Reader with type stream = string and type elem = Uchar.t = struct | |
type t = string * int | |
type stream = string | |
type elem = Uchar.t | |
let make s = (s, 0) | |
let read (s, i) = | |
let open Option_syntax in | |
let* () = guard (i < String.length s) in | |
let dec = String.get_utf_8_uchar s i in | |
let+ () = guard (Uchar.utf_decode_is_valid dec) in | |
(Uchar.utf_decode_uchar dec, (s, i + Uchar.utf_decode_length dec)) | |
end | |
let uchar_seq_of_utf_8_string s = | |
let reader = Uchar_reader.make s in | |
Seq.unfold Uchar_reader.read reader | |
(* | |
# uchar_seq_of_utf_8_string "abcね" |> List.of_seq |> List.map Uchar.to_int;; | |
- : int list = [97; 98; 99; 12397] | |
*) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment