module Levenshtein: CCLevenshtein
type'a
sequence =('a -> unit) -> unit
type'a
gen =unit -> 'a option
module type STRING =sig
..end
This data structure is used to represent a list of result that is evaluated only as far as the user wants. If the user only wants a few elements, she doesn't pay for the remaining ones.
In particular, when matching a string against a (big) set of indexed
strings, we return a continuation list so that, even if there are many results,
only those actually asked for are evaluated.
type'a
klist =unit -> [ `Cons of 'a * 'a klist | `Nil ]
val klist_to_list : 'a klist -> 'a list
The signature for a given string representation provides 3 main things:
edit_distance
function to compute the edit distance between stringsautomaton
type that is built from a string s
and a maximum distance n
,
and only accepts the strings s'
such that edit_distance s s' <= n
.Index
module that can be used to map many strings to values, like
a regular string map, but for which retrieval is fuzzy (for a given
maximal distance).let words = CCIO.with_in "/usr/share/dict/words"
(fun i -> CCIO.read_all i |> CCString.Split.list_cpy ~by:"\n");;
let words = List.map (fun s->s,s) words;;
let idx = CCLevenshtein.Index.of_list words;;
CCLevenshtein.Index.retrieve ~limit:1 idx "hell" |> CCLevenshtein.klist_to_list;;
module type S =sig
..end
module Make(
Str
:
STRING
)
:S
with type string_ = Str.t and type char_ = Str.char_
include CCLevenshtein.S
val debug_print : Pervasives.out_channel -> automaton -> unit