-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIGrammar.v
235 lines (194 loc) · 8.52 KB
/
IGrammar.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
From stdpp Require Export list.
From stdpp Require Export relations.
Section IGrammar.
(*
An Infix Expression Grammar is a CFG where all productions have the following form:
A = LEX (atomic expressions)
A = A op A (infix expressions)
A is the only nonterminal in the grammar. LEX and op are terminals, such that no operand
symbol op, is LEX.
In Coq, we define an infix expression grammar as two types L (LEX) and O (op).
Every possible instance l of L gives a production A = l. Similarly, every possible instance
o of O gives a production A = A o A.
*)
Definition word L O := list (L + O).
Inductive parse_tree L O :=
| AtomicNode : L -> parse_tree L O
| InfixNode : parse_tree L O -> O -> parse_tree L O -> parse_tree L O.
Global Arguments AtomicNode {_ _} _.
Global Arguments InfixNode {_ _} _ _ _.
(* [yield t] gives the left-to-right concatenation of all the leaves of t. *)
Fixpoint yield {L O} t : word L O :=
match t with
| AtomicNode lex => [inl lex]
| InfixNode t1 op t2 => yield t1 ++ inr op :: yield t2
end.
(* [language w] states that [w] is a sentence in the language generated by the grammar. *)
Definition language {L O} (w : word L O) : Prop :=
exists (t : parse_tree L O), yield t = w.
Inductive tree_pattern O :=
| HPatt : tree_pattern O (* Empty tree pattern *)
| InfixPatt : tree_pattern O -> O -> tree_pattern O -> tree_pattern O.
Global Arguments HPatt {_}.
Global Arguments InfixPatt {_} _ _ _.
(* [matches t q] states that parse tree [t] matches the tree pattern [q]. *)
Inductive matches {L O} : parse_tree L O -> tree_pattern O -> Prop :=
(* Every parse tree matches the nonterminal pattern node. *)
| HMatch t :
matches t HPatt
(* Infix nodes match the infix pattern node only if its subtrees match and the operand
word is equal. *)
| InfixMatch t1 t2 q1 q2 o :
matches t1 q1 ->
matches t2 q2 ->
matches (InfixNode t1 o t2) (InfixPatt q1 o q2).
(* [matches_set t qs] states that parse tree t matches one of the tree patterns in qs. *)
Definition matches_set {L O} (t : parse_tree L O) (Q : tree_pattern O -> Prop) : Prop :=
exists q : tree_pattern O, Q q /\ matches t q.
(* [conflict_free qs t] states that parse tree [t] and all its subtrees should not match
any patterns in [qs]. *)
Inductive conflict_free {L O} (Q : tree_pattern O -> Prop) : parse_tree L O -> Prop :=
| Atomic_cf l :
conflict_free Q (AtomicNode l)
| Infix_cf t1 o t2 :
~ matches_set (InfixNode t1 o t2) Q ->
conflict_free Q t1 ->
conflict_free Q t2 ->
conflict_free Q (InfixNode t1 o t2).
(* Definition for disambiguation rules. Since we identify infix production using operator
[O], these rules will be defined over these infix operators. *)
Record drules (O : Type) := mkDrules {
(* Priority *)
prio : O -> O -> Prop;
(* Left-Associativity *)
left_a : O -> O -> Prop;
(* Right-Associativity *)
right_a : O -> O -> Prop;
(* The following three properties state that all disambiguation rules are decidable. *)
prio_dec : RelDecision prio;
left_a_dec : RelDecision left_a;
right_a_dec : RelDecision right_a;
}.
Global Existing Instances prio_dec left_a_dec right_a_dec.
Global Arguments prio {_} _ _ _.
Global Arguments left_a {_} _ _ _.
Global Arguments right_a {_} _ _ _.
(* The two commonly used types of tree patterns that will be used throughout the code base. *)
Definition CL {O} (o1 o2 : O) : tree_pattern O :=
InfixPatt (InfixPatt HPatt o2 HPatt) o1 HPatt.
Definition CR {O} (o1 o2 : O) : tree_pattern O :=
InfixPatt HPatt o1 (InfixPatt HPatt o2 HPatt).
(* [conflict_pattern pr q] states that [q] is a conflict-pattern generated by
the disambiguation rules [pr]. Using currying, you can obtain the entire set of conflict-patterns
using [conflict_pattern pr], which we will commonly use throughout the code base. *)
Inductive conflict_pattern {O} (pr : drules O) : tree_pattern O -> Prop :=
| CPrio1 (o1 o2 : O) :
pr.(prio) o1 o2 ->
conflict_pattern pr (CL o1 o2)
| CPrio2 o1 o2 :
pr.(prio) o1 o2 ->
conflict_pattern pr (CR o1 o2)
| CLeft o1 o2 :
pr.(left_a) o1 o2 ->
conflict_pattern pr (CR o1 o2)
| CRight o1 o2 :
pr.(right_a) o1 o2 ->
conflict_pattern pr (CL o1 o2).
(* [dlanguage pr w] states that [w] is a sentence in the disambiguated language,
generated by the grammar and disambiguation rules [pr]. *)
Definition dlanguage {L O} (pr : drules O) (w : word L O) : Prop :=
exists t : parse_tree L O, yield t = w /\ conflict_free (conflict_pattern pr) t.
(* [safe pr] states that the disambiguated language generated by [pr] is safe. Meaning
that [pr] preserves the language of the grammar. *)
Definition safe L {O} (pr : drules O) : Prop :=
forall w : word L O, language w -> dlanguage pr w.
(* [complete pr] states that the disambiguated language generated by [pr] is unambiguous.
Meaning there is at most one conflict-free parse tree for each sentence in the grammar. *)
Definition complete L {O} (pr : drules O) : Prop :=
forall t1 t2 : parse_tree L O,
yield t1 = yield t2 ->
conflict_free (conflict_pattern pr) t1 ->
conflict_free (conflict_pattern pr) t2 ->
t1 = t2.
(* [safe_pr pr] states that [pr] itself is safe. i.e. it imposes restrictions on [pr] such
that [safe pr] should hold. *)
Definition safe_pr {O} (pr : drules O) : Prop :=
forall o1 o2,
(pr.(prio) o1 o2 \/ pr.(left_a) o1 o2) ->
(pr.(prio) o2 o1 \/ pr.(right_a) o2 o1) -> False.
(* [complete_pr pr] states that [pr] itself is complete. i.e. it imposes restrictions on [pr] such
that [complete pr] should hold. *)
Record complete_pr {O} (pr : drules O) := mkComplete_pr {
complete_1 : forall o1 o2,
pr.(prio) o1 o2 \/ pr.(left_a) o1 o2 \/
pr.(prio) o2 o1 \/ pr.(right_a) o2 o1;
complete_2 : forall o1 o2 o3,
pr.(prio) o1 o2 -> pr.(prio) o2 o3 -> pr.(prio) o1 o3;
complete_4 : forall o1 o2 o3,
pr.(prio) o1 o2 -> pr.(left_a) o2 o3 -> pr.(prio) o1 o3;
complete_5 : forall o1 o2 o3,
pr.(prio) o1 o2 -> pr.(right_a) o2 o3 -> pr.(prio) o1 o3;
complete_6 : forall o1 o2 o3,
pr.(left_a) o1 o2 -> pr.(prio) o2 o3 -> pr.(prio) o1 o3;
complete_7 : forall o1 o2 o3,
pr.(left_a) o1 o2 -> pr.(left_a) o2 o3 -> pr.(left_a) o1 o3;
complete_8 : forall o1 o2 o3,
pr.(left_a) o1 o2 -> pr.(right_a) o2 o3 -> False;
}.
End IGrammar.
Section IGrammarRepair.
(* This section introduces functions that will help proving safety and completeness. *)
Definition is_conflict_pattern {O} (pr : drules O) (q : tree_pattern O) :=
match q with
| InfixPatt (InfixPatt HPatt o2 HPatt) o1 HPatt =>
if decide (pr.(prio) o1 o2) then true
else if decide (pr.(right_a) o1 o2) then true
else false
| InfixPatt HPatt o1 (InfixPatt HPatt o2 HPatt) =>
if decide (pr.(prio) o1 o2) then true
else if decide (pr.(left_a) o1 o2) then true
else false
| _ => false
end.
(* Infix insertion. It inserts a single lexical and operator symbol on the left-most side
of the tree, trying not to introduce a conflict. *)
Fixpoint insert_in {L O} (pr : drules O) t1 o t2 : parse_tree L O :=
match t2 with
| AtomicNode l2 => InfixNode t1 o (AtomicNode l2)
| InfixNode t21 o2 t22 =>
if is_conflict_pattern pr (CR o o2)
then InfixNode (insert_in pr t1 o t21) o2 t22
else InfixNode t1 o (InfixNode t21 o2 t22)
end.
Fixpoint repair_in {L O} (pr : drules O) t1 o t2 : parse_tree L O :=
match t1 with
| AtomicNode l => insert_in pr t1 o t2
| InfixNode t11 o1 t12 => repair_in pr t11 o1 (repair_in pr t12 o t2)
end.
(* Repair uses left-insertion to fix all conflicts from a tree. *)
Fixpoint repair {L O} (pr : drules O) t : parse_tree L O :=
match t with
| AtomicNode l => AtomicNode l
| InfixNode t1 o t2 => repair_in pr t1 o (repair pr t2)
end.
(* Parse works the same way as Repair, with the exception that it takes a yield as input
rather than a parse tree. If the input list is not well-formed, then [None] is returned.
This function will be helpful in proving completeness. *)
Fixpoint parse {L O} (pr : drules O) (w : word L O) : option (parse_tree L O) :=
match w with
| [inl l] => Some (AtomicNode l)
| inl l :: inr o :: w =>
match (parse pr w) with
| None => None
| Some t => Some (insert_in pr (AtomicNode l) o t)
end
| _ => None
end.
(* [yield_struct w] states that [w] is well-formed for usage in the [build] function. *)
Inductive yield_struct {L O} : word L O -> Prop :=
| LYield l :
yield_struct [inl l]
| LOYield l o w :
yield_struct w ->
yield_struct (inl l :: inr o :: w).
End IGrammarRepair.