Confusion as to how to integrate OperatorPrecedenceParser with mine - fparsec

The only thing I have left for my parser is to successfully make operator precedence work and my parser will be done. But I don't know how I should integrate it with my current parser.
First, here is the language representation:
module MiniML
(* Structures de donnees a exporter *)
type Exp =
| C of Cst
| Id of Id
| Lam of Id * Exp
| App of Exp * Exp
| Let of Id * Exp * Exp
| Pair of Exp * Exp
| If of Exp * Exp * Exp
and Cst = I of int | B of bool | Unit | Nil
and Id = string;;
let op = ["+";
"-";
"*";
"/";
"=";
"<";
">";
"#";
"and";
"or";
",";
"::"
]
(* Fonction permettant de transformer une exp en string *)
let rec exp2str e = match e with
| C (I i) -> string i
| C (B b) -> string b
| C Unit -> "()"
| C Nil -> "nil"
| Id x -> x
| Lam(x,e) -> "lam " + x + "." + (exp2str e)
| App(Id x,Id y) -> x + " " + y
| App(Id x,((C _) as e')) -> x + " " + (exp2str e')
| App(Id x,Pair(e1,e2)) when List.contains x op -> (exp2str e1) + " " + x + " " + (exp2str e2)
| App(Id x,((Pair _) as e')) -> x + (exp2str e')
| App(Id x, e') -> x + " (" + (exp2str e') + ")"
| App(e, Id x) -> "(" + (exp2str e) + ") " + x
| App(e', ((C _) as e'')) -> "(" + (exp2str e') + ") " + (exp2str e'')
| App(e,e') -> "(" + (exp2str e) + ") (" + (exp2str e') + ")"
| Let(x,e,e') ->
"let " + x + " = " + (exp2str e) + " in " + (exp2str e') + " end"
(* entensions *)
| Pair(e1,e2) -> "(" + (exp2str e1) + "," + (exp2str e2) + ")"
| If(e,e1,e2) -> "if " + (exp2str e) + " then " + (exp2str e1) + " else " + (exp2str e2)
Here is the current state of the parser code (I've left the french comment for completness sake.):
module FPParse
open MiniML
open FParsec
(* Raccourcis pour spaces. ws veux dire whitespace *)
let ws = spaces
let operator : Parser<MiniML.Id,unit> = op |> List.map pstring |> choice
(* Liste des mots cles du langage *)
let keyword : Parser<string,unit> = ["false";"true";"let";"end";"in";"if";"then";"else";"lam"] |> List.map pstring |> choice
(* Parse la premiere lettre d'un identifiant *)
let fstId = asciiLetter <|> pchar '_'
(* Parse les lettres autres que la premiere d'un identifiant *)
let restId = fstId <|> digit <|> pchar '''
(* Genere un parseur pour des valeurs entre parenthese *)
let betweenPar p = between (pchar '(' .>> ws) (pchar ')' .>> ws) p
(* Parse une constante boolean *)
let cstB = (stringReturn "true" (B true)) <|> (stringReturn "false" (B false))
(* Parse une valeur entiere *)
let cstI = puint32 |>> (int >> I)
(*
Parse Unit
*)
let cstU = stringReturn "()" Unit
(*
Parse Nil
*)
let cstN = stringReturn "[]" Nil
(* Parse une expression constante *)
let expC : Parser<Exp,unit> = cstB <|> cstI <|> cstU <|> cstN |>> C
(* Parse un string d'identifiant avec un parseur qui lorsqu'il reussis indique la fin du parsing de l'identifiant *)
let expIdStr = notFollowedByL keyword "Cannot use keyword as variable" >>.
notFollowedByL operator "Cannot use operator as variable" >>.
many1CharsTill2 fstId restId (notFollowedBy restId)
(* Parse un identifiant *)
let expId : Parser<Exp,unit> = expIdStr |>> (MiniML.Exp.Id)
(* Comme exp est recursif on doit le declarer avec une valeur indefinie pour l'utiliser *)
let exp, expRef = createParserForwardedToRef<Exp, unit>()
(* Comme expApp est recursif on doit le declarer avec une valeur indefinie pour l'utiliser *)
let expApp, expAppRef = createParserForwardedToRef<Exp, unit>()
(*
Parse une expression lambda de la forme: lam( )*expId( )*.( )*exp
*)
let expLam : Parser<Exp,unit> = (pstring "lam" >>. ws >>. expIdStr .>> ws .>> pchar '.') .>> ws .>>. exp |>> Lam
(*
Parse une expression let de la forme: let( )*expId( )*=( )*exp( )*in( )*exp( )*end
*)
let expLet = tuple3 (pstring "let" >>. ws >>. expIdStr .>> ws .>> pchar '=' .>> ws) (exp .>> ws .>> pstring "in" .>> ws) (exp .>> ws .>> pstring "end") |>> Let
(*
Parse une expression if de la forme: if( )*exp( )*then( )*exp( )*else( )*exp
*)
let expIf = tuple3 (pstring "if" >>. ws >>. exp .>> ws) (pstring "then" >>. ws >>. exp .>> ws) (pstring "else" >>. ws >>. exp) |>> If
(* Comme closeEXP est recursif on doit le declarer avec une valeur indefinie pour l'utiliser *)
let closeEXP, closeEXPRef = createParserForwardedToRef<Exp, unit>()
(*
Parse un operateur ! de la forme: !exp
*)
let expBang = (pstring "!" >>% MiniML.Id "!") .>>. closeEXP |>> App
let buildList (el,ef) =
let rec go l = match l with
| (e::es) -> App(MiniML.Id "cons", Pair(e,go es))
| [] -> C Nil
go (el # [ef])
let expList = between (pchar '[' .>> ws) (pchar ']') (many (exp .>>? (ws .>> pchar ';' .>> ws)) .>>. exp .>> ws
|>> buildList )
let opOpp : InfixOperator<Exp,unit,unit> list =
[
InfixOperator("*", ws, 7, Associativity.Left, fun x y -> App(MiniML.Id "*",Pair(x,y)));
InfixOperator("/", ws, 7, Associativity.Left, fun x y -> App(MiniML.Id "/",Pair(x,y)));
InfixOperator("+", ws, 6, Associativity.Left, fun x y -> App(MiniML.Id "+",Pair(x,y)));
InfixOperator("-", ws, 6, Associativity.Left, fun x y -> App(MiniML.Id "-",Pair(x,y)));
InfixOperator("::", ws,5, Associativity.Right, fun x y -> App(MiniML.Id "cons",Pair(x,y)));
InfixOperator("=", ws, 4, Associativity.Left, fun x y -> App(MiniML.Id "=",Pair(x,y)));
InfixOperator("<", ws, 4, Associativity.Left, fun x y -> App(MiniML.Id "<",Pair(x,y)));
InfixOperator(">", ws, 4, Associativity.Left, fun x y -> App(MiniML.Id ">",Pair(x,y)));
InfixOperator("and", ws, 3, Associativity.Right, fun x y -> App(MiniML.Id "and",Pair(x,y)));
InfixOperator("or", ws, 2, Associativity.Right, fun x y -> App(MiniML.Id "or",Pair(x,y)));
InfixOperator(",", ws,1, Associativity.None, fun x y -> Pair(x,y) )
]
let opp = new OperatorPrecedenceParser<Exp,unit,unit>()
let expOp = opp.ExpressionParser
let term = exp <|> betweenPar expOp
opp.TermParser <- term
List.iter (fun x -> opp.AddOperator(x)) opOpp
(*
Parse une expression close de la forme: expC | expId | (exp)( )* | [exp]
*)
do closeEXPRef := choice [expC ; expId ; expBang ; betweenPar exp ; expList] .>> ws
(*
Assigne le parseur d'application en lambda-calcul. Le parseur parse autant d'expression close que possible.
Il est impossible que la liste soit vide, du a l'utilisation de many1 qui echoue et retourne s'il n'y a pas
au moins une valeur.
*)
do expAppRef := many1 closeEXP |>> (function (x::xs) -> List.fold (fun x y -> App(x,y)) x xs | [] -> failwith "Impossible")
(*
Assigne le parseur d'expression en lambda-calcul. L'ordre des expressions represente leur priorite
*)
do expRef := [expLam;expIf;expLet;expApp] |> choice
(*
Parse une expression lambda-calcul au complet. Le eof s'assure qu'il ne reste pas de caractere non parse
*)
let mainExp = expOp .>> eof
(*
Prends un string et retourne un Exp. Peut leve un message d'erreur en exception si le parsing echoue
*)
let str2exp s = match run mainExp s with
| Success(result, _, _) -> result
| Failure(errorMsg, _, _) -> failwith errorMsg
I've tried to add it in the list of choice after the expApp parser but it's just ignored. If I put it before the expApp on in the closedExp, it throws exception because, I think, of infinite recursion. I really don't know how to mix it in. I don't want to be just given the solution, I'd like to know why. Also, is there any non-trivial exemple of big language being parse by FParsec?

Related

Mutually recursive functions over product type

(Beginner Coq question)
Related to Defining recursive function over product type, I'm trying to define a recursive function over a product type. The difference here is there's a mutually recursive definition. I keep running into this error:
Recursive definition of printObjItem is ill-formed.
Recursive call to printJson has principal argument equal to "val" instead of
a subterm of "item".
Conceptually it seems the recursion should go through since val is a subterm of item, is a subterm of items, is a subterm of x. I understand Coq is struggling with that first assertion but I'm not sure how to resolve. Is there a straightforward way without an explicit well-foundedness proof?
Require Import List.
Require Import String.
Import ListNotations.
Inductive Json :=
| Atom : Json
| String : string -> Json
| Array : nat -> list Json -> Json
| Object : list (string * Json) -> Json.
Fixpoint printJson (x : Json) :=
match x with
| Atom => "atom"
| String n => "'" ++ n ++ "'"
| Array _ els => "[" ++ (String.concat ", " (map printJson els)) ++ "]"
| Object items => "{" ++ (String.concat ", " (map printObjItem items)) ++ "}"
end%string
with printObjItem (item : string * Json) :=
let (key, val) := item in key ++ ": " ++ (printJson val).
One solution could be to make printObjItem a local definition:
Fixpoint printJson (x : Json) :=
let printObjItem (item : string * Json) :=
(let (key, val) := item in key ++ ": " ++ (printJson val))%string
in
match x with
| Atom => "atom"
| String n => "'" ++ n ++ "'"
| Array _ els => "[" ++ (String.concat ", " (map printJson els)) ++ "]"
| Object items => "{" ++ (String.concat ", " (map printObjItem items)) ++ "}"
end%string.

Boolean Expression with Laws

Hello can somebody help me? How this boolean expression simplified?
abcd + d
it simplified as:
d
im trying to use the Laws I don't understand at all
Here's the Laws
Basic Boolean Laws
Idempotent Law
A * A = A
A + A = A
Associative Law
(A * B) * C = A * (B * C)
(A + B) + C = A + (B + C)
Commutative Law
A * B = B * A
A + B = B + A
Distributive Law
A * (B + C) = A * B + A * C
A + (B * C) = (A + B) * (A + C)
Identity Law
A * 0 = 0 A * 1 = A
A + 1 = 1 A + 0 = A
Complement Law
A * ~A = 0
A + ~A = 1
Involution Law
~(~A) = A
DeMorgan's Law
~(A * B) = ~A + ~B
~(A + B) = ~A * ~B
Redundancy Laws
Absorption
A + (A * B) = A
A * (A + B) = A
(A * B) + (A * ~B) = A
(A + B) * (A + ~B) = A
A + (~A * B) = A + B
A * (~A + B) = A * B
Thanks in advance!
It's indeed D, by the following:
abcd+d -> (a+d)*(b+d)*(c+d)*(d+d) // Distributive Law
(a+d)*(b+d)*(c+d)*(d+d) -> (a+d)*(b+d)*(c+d)*d // Idempotent Law - d+d=d
(a+d)*(b+d)*(c+d)*d -> (a+d)*(b+d)*d // Redundancy Laws - (c+d)*d = d
(a+d)*(b+d)*d -> (a+d)*d // Redundancy Laws - (b+d)*d = d
(a+d)*d -> d // Redundancy Laws - (a+d)*d = d
Boolean Expression: abcd + d can be simplified as -
LHS = abcd + d [Assume: abcd + d*1 as A * 1 = A ]
= d(abc + 1) [Distributive Law]
= d(1 + abc)
= d(1) [Identity Law (A + 1 = 1)]
= d [Identity Law (A * 1 = A)]
= RHS

FParsec operator precedence parser conflicting with next parser in sequence

I have an operator presedence parser that parses mathematical expressions, similar to the calculator example in the FParsec repository.
The next parser in the sequence, however, parses -> which is causing conflicts with the subtraction operator.
Some representative code is:
open FParsec
let ws = spaces
let str s = pstring s .>> ws
let number = pfloat .>> ws
let opp = new OperatorPrecedenceParser<float,unit,unit>()
let expr = opp.ExpressionParser
opp.TermParser <- number <|> between (str "(") (str ")") expr
opp.AddOperator(InfixOperator("+", ws, 1, Associativity.Left, (+)))
opp.AddOperator(InfixOperator("-", ws, 1, Associativity.Left, (-)))
opp.AddOperator(InfixOperator("*", ws, 2, Associativity.Left, (*)))
opp.AddOperator(InfixOperator("/", ws, 2, Associativity.Left, (/)))
opp.AddOperator(InfixOperator("^", ws, 3, Associativity.Right, fun x y -> System.Math.Pow(x, y)))
opp.AddOperator(PrefixOperator("-", ws, 4, true, fun x -> -x))
let completeExpression = expr .>> ws
let program =
pipe3 completeExpression (str "->") completeExpression <| fun e1 _ e2 ->
(e1, e2)
Which doesn't work for, for example, 1+1 -> 1+2.
I know that if I change str "->" to str "=>" then something like 1+1 => 1+2 would work.
Is it possible to add look-ahead into the operator precedence parser? Or is there another way round this?
Thanks!
EDIT
I sorted this by adding a notFollowedBy in the after-string parser for the subtraction operator.
let after = (notFollowedBy (str ">")) >>. ws
opp.AddOperator(InfixOperator("-", after, 1, Associativity.Left, (-)))

OperatorPrecedenceParser throw exception about negative priority which I don't have

I am creating a parser for a programming language based on the lambda-calculus. I added an infix operator and their precedence but the parser crashed with an error about negative priority. I am able to do the parsing of operator by hand, but it seem that I cannot get the priority right. So I thought that I may as well learn to use the OperatorPrecedenceParser.
I will show the code because I have no idea why it crash, since I don't have any negative priority.
The language AST
module MiniML
type Exp =
| C of Cst
| Id of Id
| Lam of Id * Exp
| App of Exp * Exp
| Let of Id * Exp * Exp
| Pair of Exp * Exp
| If of Exp * Exp * Exp
and Cst = I of int | B of bool | Unit | Nil
and Id = string;;
let op = ["+";
"-";
"*";
"/";
"=";
"<";
">";
"#";
"and";
"or";
",";
"::"
]
Here is the parser itself. It's my first time with parser combinator (and parsing) so if there is something terribly wrong, I'd like to know. Otherwise, just knowing why it crash would be enough.
open MiniML
open FParsec
let ws = spaces
let operator : Parser<MiniML.Id,unit> = op |> List.map pstring |> choice
let keyword : Parser<string,unit> = ["false";"true";"let";"end";"in";"if";"then";"else";"lam"] |> List.map pstring |> choice
let fstId = asciiLetter <|> pchar '_'
let restId = fstId <|> digit <|> pchar '''
let betweenPar p = between (pchar '(' .>> ws) (pchar ')' .>> ws) p
let cstB = (stringReturn "true" (B true)) <|> (stringReturn "false" (B false))
let cstI = puint32 |>> (int >> I)
let cstU = stringReturn "()" Unit
let cstN = stringReturn "[]" Nil
let expC : Parser<Exp,unit> = cstB <|> cstI <|> cstU <|> cstN |>> C
let expIdStr = notFollowedByL keyword "Cannot use keyword as variable" >>.
notFollowedByL operator "Cannot use operator as variable" >>.
many1CharsTill2 fstId restId (notFollowedBy restId)
let expId : Parser<Exp,unit> = expIdStr |>> (MiniML.Exp.Id)
let exp, expRef = createParserForwardedToRef<Exp, unit>()
let expApp, expAppRef = createParserForwardedToRef<Exp, unit>()
let expLam : Parser<Exp,unit> = (pstring "lam" >>. ws >>. expIdStr .>> ws .>> pchar '.') .>> ws .>>. exp |>> Lam
let expLet = tuple3 (pstring "let" >>. ws >>. expIdStr .>> ws .>> pchar '=' .>> ws) (exp .>> ws .>> pstring "in" .>> ws) (exp .>> ws .>> pstring "end") |>> Let
let expIf = tuple3 (pstring "if" >>. ws >>. exp .>> ws) (pstring "then" >>. ws >>. exp .>> ws) (pstring "else" >>. ws >>. exp) |>> If
let closeEXP, closeEXPRef = createParserForwardedToRef<Exp, unit>()
let expBang = (pstring "!" >>% MiniML.Id "!") .>>. closeEXP |>> App
let buildList (el,ef) =
let rec go l = match l with
| (e::es) -> App(MiniML.Id "cons", Pair(e,go es))
| [] -> C Nil
go (el # [ef])
let expList = between (pchar '[' .>> ws) (pchar ']') (many (exp .>>? (ws .>> pchar ';' .>> ws)) .>>. exp .>> ws
|>> buildList )
do closeEXPRef := choice [expC ; expId ; expBang ; betweenPar exp ; expList] .>> ws
do expAppRef := many1 closeEXP |>> (function (x::xs) -> List.fold (fun x y -> App(x,y)) x xs | [] -> failwith "Impossible")
let opOpp : InfixOperator<Exp,unit,unit> list =
[
InfixOperator("*", ws, 6, Associativity.Left, fun x y -> App(MiniML.Id "*",Pair(x,y)));
InfixOperator("/", ws, 6, Associativity.Left, fun x y -> App(MiniML.Id "/",Pair(x,y)));
InfixOperator("+", ws, 5, Associativity.Left, fun x y -> App(MiniML.Id "+",Pair(x,y)));
InfixOperator("-", ws, 5, Associativity.Left, fun x y -> App(MiniML.Id "-",Pair(x,y)));
InfixOperator("::", ws,4, Associativity.Right, fun x y -> App(MiniML.Id "cons",Pair(x,y)));
InfixOperator("=", ws, 3, Associativity.Left, fun x y -> App(MiniML.Id "=",Pair(x,y)));
InfixOperator("<", ws, 3, Associativity.Left, fun x y -> App(MiniML.Id "<",Pair(x,y)));
InfixOperator(">", ws, 3, Associativity.Left, fun x y -> App(MiniML.Id ">",Pair(x,y)));
InfixOperator("and", ws, 2, Associativity.Right, fun x y -> App(MiniML.Id "and",Pair(x,y)));
InfixOperator("or", ws, 1, Associativity.Right, fun x y -> App(MiniML.Id "or",Pair(x,y)));
InfixOperator(",", ws,0, Associativity.None, fun x y -> Pair(x,y) )
]
let opp = new OperatorPrecedenceParser<Exp,unit,unit>()
let expr = opp.ExpressionParser
let term = exp <|> betweenPar expr
opp.TermParser <- term
List.iter (fun x -> opp.AddOperator(x)) opOpp
do expRef := [expLam;expIf;expLet;expApp] |> choice |> (fun p -> p .>>. opt (expOp operator) |>> binOp )
let mainExp = expr .>> eof
Your sample code doesn't seem to be complete, since expOp and binOp are not included. When I run your code without the last two lines, the OPP throws an ArgumentOutOfRangeException with the message "The operator precedence must be greater than 0." when the comma operator is added. The problem is that you specified 0 as the precedence for the comma operator.
Such problems are easier to diagnose when you use an IDE with a fully integrated debugger like Visual Studio.

OCaml: How to decode unicode-escape string?

Given a str as following:
let str = "#include \\u003Cunordered_map\\u003E\\u000D\\u000A"
How do I decode unicode-escape string into a unicode string or in may case Ascii string in OCaml?
In python I could easily do
str.decode("unicode-escape")
If your embedded escape sequences are always going to encode ASCII characters, as you say, you can find them and replace them with the decoded equivalent:
let decode s =
let re = Str.regexp "\\\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]" in
let s1 n = String.make 1 (Char.chr n) in
let subst = function
| Str.Delim u -> s1 (int_of_string ("0x" ^ String.sub u 2 4))
| Str.Text t -> t
in
String.concat "" (List.map subst (Str.full_split re s))
This works for your example:
val decode : string -> string = <fun>
# decode "#include \\u003Cunordered_map\\u003E\\u000D\\u000A";;
- : string = "#include <unordered_map>\r\n"
Indeed, Python has built-in support to decode these sequences.
Update
To support all four-digit hex escape sequences "\uXXXX" by converting to UTF-8 you can use this code:
let utf8encode s =
let prefs = [| 0x0; 0xc0; 0xe0 |] in
let s1 n = String.make 1 (Char.chr n) in
let rec ienc k sofar resid =
let bct = if k = 0 then 7 else 6 - k in
if resid < 1 lsl bct then
(s1 (prefs.(k) + resid)) ^ sofar
else
ienc (k + 1) (s1 (0x80 + resid mod 64) ^ sofar) (resid / 64)
in
ienc 0 "" (int_of_string ("0x" ^ s))
let decode2 s =
let re = Str.regexp "\\\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]" in
let subst = function
| Str.Delim u -> utf8encode (String.sub u 2 4)
| Str.Text t -> t
in
String.concat "" (List.map subst (Str.full_split re s))
It also works for your example, and some other examples:
val utf8encode : string -> string = <fun>
val decode2 : string -> string = <fun>
# decode2 "#include \\u003Cunordered_map\\u003E\\u000D\\u000A";;
- : string = "#include <unordered_map>\r\n"
# print_endline (decode2 "\\u00A2");;
¢
- : unit = ()
# print_endline (decode2 "\\u20AC");;
€
- : unit = ()