Module CCUtf8_string
Unicode String, in UTF8
type uchar
= Stdlib.Uchar.t
type 'a gen
= unit -> 'a option
type 'a iter
= ('a -> unit) -> unit
Fast internal iterator.
- since
- 2.8
val equal : t -> t -> bool
val hash : t -> int
val compare : t -> t -> int
val pp : Stdlib.Format.formatter -> t -> unit
val to_string : t -> string
Identity.
val to_gen : ?idx:int -> t -> uchar gen
Generator of unicode codepoints.
- parameter idx
offset where to start the decoding.
val to_iter : ?idx:int -> t -> uchar iter
Iterator of unicode codepoints.
- parameter idx
offset where to start the decoding.
- since
- 2.8
val to_seq : ?idx:int -> t -> uchar Stdlib.Seq.t
Iter of unicode codepoints. Renamed from
to_std_seq
since 3.0.- parameter idx
offset where to start the decoding.
- since
- 3.0
val to_list : ?idx:int -> t -> uchar list
List of unicode codepoints.
- parameter idx
offset where to start the decoding.
val fold : ?idx:int -> ('a -> uchar -> 'a) -> 'a -> t -> 'a
val iter : ?idx:int -> (uchar -> unit) -> t -> unit
val n_chars : t -> int
Number of characters.
val n_bytes : t -> int
Number of bytes.
val map : (uchar -> uchar) -> t -> t
val filter_map : (uchar -> uchar option) -> t -> t
val flat_map : (uchar -> t) -> t -> t
val empty : t
Empty string.
- since
- 3.5
val concat : t -> t list -> t
concat sep l
concatenates each string inl
, insertingsep
in between each string. Similar toString
.concat.
val of_seq : uchar Stdlib.Seq.t -> t
Build a string from unicode codepoints Renamed from
of_std_seq
since 3.0.- since
- 3.0
val uchar_to_bytes : uchar -> char iter
Translate the unicode codepoint to a list of utf-8 bytes. This can be used, for example, in combination with
Buffer
.add_char on a pre-allocated buffer to add the bytes one by one (despite its name,Buffer
.add_char takes individual bytes, not unicode codepoints).- since
- 3.2
val of_gen : uchar gen -> t
val of_list : uchar list -> t
val of_string_exn : string -> t
Validate string by checking it is valid UTF8.
- raises Invalid_argument
if the string is not valid UTF8.
val of_string : string -> t option
Safe version of
of_string_exn
.
val unsafe_of_string : string -> t
Conversion from a string without validating. CAUTION this is unsafe and can break all the other functions in this module. Use only if you're sure the string is valid UTF8. Upon iteration, if an invalid substring is met, Malformed will be raised.