Module CCParse

module CCParse: sig .. end

Very Simple Parser Combinators

status still a bit unstable, the type 'a t might still change.

Examples:

parse recursive structures

open Containers_string.Parse;;

type tree = L of int | N of tree * tree;;

let mk_leaf x = L x
let mk_node x y = N(x,y)

let ptree = fix @@ fun self ->
  skip_space *>
  ( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
    <|>
    (U.int >|= mk_leaf) )
;;

parse_string_exn "(1 (2 3))" ptree;;
parse_string_exn "((1 2) (3 (4 5)))" ptree;;

Parse a list of words

open Containers_string.Parse;;
let p = U.list ~sep:"," U.word;;
parse_string_exn "[abc , de, hello ,world  ]" p;;

Stress Test
This makes a list of 100_000 integers, prints it and parses it back.

let p = CCParse.(U.list ~sep:"," U.int);;

let l = CCList.(1 -- 100_000);;
let l_printed =
  CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;;

let l' = CCParse.parse_string_exn ~p l_printed;;

assert (l=l');;

Since 0.11

type 'a or_error = [ `Error of string | `Ok of 'a ] 
type line_num = int 
Since 0.13
type col_num = int 
Since 0.13
exception ParseError of line_num * col_num * (unit -> string)
position * message

This type changed at 0.13


Input


module MemoTbl: sig .. end
type input = {
   is_done : unit -> bool; (*
End of input?
*)
   cur : unit -> char; (*
Current char
*)
   next : unit -> char; (*
Returns current char; if not is_done, move to next char, otherwise throw ParseError
*)
   pos : unit -> int; (*
Current pos
*)
   lnum : unit -> line_num; (*
Line number
Since 0.13
*)
   cnum : unit -> col_num; (*
Column number
Since 0.13
*)
   memo : MemoTbl.t; (*
Memoization table, if any
*)
   backtrack : int -> unit; (*
Restore to previous pos
*)
   sub : int -> int -> string; (*
sub pos len extracts slice from pos with len
*)
}
The type of input, which must allow for backtracking somehow. This type is unstable and its details might change.
val input_of_string : string -> input
Parse the string
val input_of_chan : ?size:int -> Pervasives.in_channel -> input
input_of_chan ic reads lazily the content of ic as parsing goes. All content that is read is saved to an internal buffer for backtracking.
Since 0.13
size : number of bytes read at once from ic

Combinators


type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit 
Takes the input and two continuations: The type definition changed since 0.14 to avoid stack overflows
Raises ParseError in case of failure
val return : 'a -> 'a t
Always succeeds, without consuming its input
val pure : 'a -> 'a t
Synonym to CCParse.return
val (>|=) : 'a t -> ('a -> 'b) -> 'b t
Map
val (>>=) : 'a t -> ('a -> 'b t) -> 'b t
Monadic bind
val (<*>) : ('a -> 'b) t -> 'a t -> 'b t
Applicative
val ( <* ) : 'a t -> 'b t -> 'a t
a <* b parses a into x, parses b and ignores its result, and returns x
val ( *> ) : 'b t -> 'a t -> 'a t
a *> b parses a, then parses b into x, and returns x. The results of a is ignored.
val fail : string -> 'a t
fail msg fails with the given message. It can trigger a backtrack
val eoi : unit t
Expect the end of input, fails otherwise
val nop : unit t
Succeed with ()
val char : char -> char t
char c parses the char c and nothing else
val char_if : (char -> bool) -> char t
char_if f parses a character c if f c = true
val chars_if : (char -> bool) -> string t
chars_if f parses a string of chars that satisfy f
val chars1_if : (char -> bool) -> string t
Same as CCParse.chars_if, but only non-empty strings
val endline : char t
Parses '\n'
val space : char t
Tab or space
val white : char t
Tab or space or newline
val skip_chars : (char -> bool) -> unit t
Skip 0 or more chars satisfying the predicate
val skip_space : unit t
Skip ' ' and '\t'
val skip_white : unit t
Skip ' ' and '\t' and '\n'
val is_alpha : char -> bool
Is the char a letter?
val is_num : char -> bool
Is the char a digit?
val is_alpha_num : char -> bool
val is_space : char -> bool
True on ' ' and '\t'
val is_white : char -> bool
True on ' ' and '\t' and '\n'
Since 0.13
val (~~~) : (char -> bool) -> char -> bool
Negation on predicates
val (|||) : (char -> bool) -> (char -> bool) -> char -> bool
Disjunction on predicates
val (&&&) : (char -> bool) -> (char -> bool) -> char -> bool
Conjunction on predicates
val (<|>) : 'a t -> 'a t -> 'a t
a <|> b tries to parse a, and if a fails, backtracks and tries to parse b. Therefore, it succeeds if either succeeds
val string : string -> string t
string s parses exactly the string s, and nothing else
val many : 'a t -> 'a list t
many p parses a list of p, eagerly (as long as possible)
val many1 : 'a t -> 'a list t
parses a non empty list
val skip : 'a t -> unit t
skip p parses p and ignores its result
val sep : by:'b t -> 'a t -> 'a list t
sep ~by p parses a list of p separated by by
val sep1 : by:'b t -> 'a t -> 'a list t
sep1 ~by p parses a non empty list of p, separated by by
val fix : ('a t -> 'a t) -> 'a t
Fixpoint combinator
val memo : 'a t -> 'a t
Memoize the parser. memo p will behave like p, but when called in a state (read: position in input) it has already processed, memo p returns a result directly. The implementation uses an underlying hashtable. This can be costly in memory, but improve the run time a lot if there is a lot of backtracking involving p.

This function is not thread-safe.
Since 0.13

val fix_memo : ('a t -> 'a t) -> 'a t
Same as CCParse.fix, but the fixpoint is memoized.
Since 0.13

Parse

Those functions have a label ~p on the parser, since 0.14.

val parse : input:input -> p:'a t -> 'a or_error
parse ~input p applies p on the input, and returns `Ok x if p succeeds with x, or `Error s otherwise
val parse_exn : input:input -> p:'a t -> 'a
Raises ParseError if it fails
val parse_string : string -> p:'a t -> 'a or_error
Specialization of CCParse.parse for string inputs
val parse_string_exn : string -> p:'a t -> 'a
Raises ParseError if it fails
val parse_file : ?size:int -> file:string -> p:'a t -> 'a or_error
parse_file ~file p parses file with p by opening the file and using CCParse.input_of_chan.
Since 0.13
size : size of chunks read from file
val parse_file_exn : ?size:int -> file:string -> p:'a t -> 'a
Unsafe version of CCParse.parse_file
Since 0.13

Utils


module U: sig .. end