-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmatch.ml
468 lines (394 loc) · 16.5 KB
/
match.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
open Spotlib.Spot
open List
open Infix
open Stype
module I = Ident
open Option.Open
let fail = None
module PathLimit = struct
let decr ?(by=1) score =
let score = score - by in
if score < 0 then None
else Some score
end
module TypeLimit = struct
type t = { score: int;
expansions: int;
}
let create score = { score; expansions = 50 }
(* Type alias expansion is limited to 50. CR jfuruse: make it configurable. *)
let decr ?(by=1) ({ score } as desc) =
let score = score - by in
if score < 0 then None
else Some { desc with score }
let max t1 t2 =
match t1, t2 with
| None, None -> None
| None, _ -> t2
| _, None -> t1
| Some ({ score= s1 }, _), Some ({ score= s2 }, _) ->
(* we ignore expansion *)
if s1 >= s2 then t1 else t2
let maxs = fold_left1 max
end
module Make( A: sig
val cache : Levenshtein.StringWithHashtbl.cache
end) = struct
let error = ref false
let match_name
: string (* pattern *)
-> string (* target *)
-> int (* limit (upperbound) *)
-> (int * (string (* the matched *)
* string option(* the corresponding pattern. This is always Some but it is intentional *)
)) option
= fun n m limit ->
let n0 = n in
let m0 = m in
match n with
| "_" | "_*_" -> return (limit, (m0, Some n0))
| "(_)" when
begin try match m.[0] with
| 'A'..'Z' | 'a'..'z' | '_' | '0'..'9' | '#' (* class type *) -> false
| _ -> true
with _ -> false end ->
return (limit, (m, Some n))
| _ ->
if n = m then return (limit, (m0, Some n0))
else
let n = String.lowercase n
and m = String.lowercase m in
let upper_bound = min (min (String.length n / 2) 3) limit + 1 in (* CR jfuruse: should be configurable *)
let dist =
match Levenshtein.StringWithHashtbl.distance A.cache ~upper_bound n m with
| Levenshtein.Exact n -> n
| GEQ n -> n
in
if dist >= upper_bound then fail
else return (limit - dist, (m0, Some n0))
let match_package
: string (* pattern *)
-> OCamlFind.Packages.t (* target *)
-> int (* limit (upperbound) *)
-> (int * (OCamlFind.Packages.t (* the matched *)
* (string * string option) (* string match info *))) option
= fun n phack limit ->
let ps = OCamlFind.Packages.to_strings phack in
fold_left (fun st p ->
match st, match_name n p limit with
| None, None -> None
| (Some _ as x), None -> x
| (Some (limit, _) as x), Some (limit', _) when limit >= limit' -> x
| _, Some (limit, d) -> Some (limit, (phack, d))
) None ps
let rec match_path pat p limit : (int * Spath.t) option =
let open Spath in
match pat, p with
| SPdot(n1, "_*_"), SPdot(m1, m2) ->
max (match_path n1 m1 limit) (match_path pat m1 limit)
>>= fun (limit, d1) -> return (limit, nhc_dot d1 m2)
| SPident n, SPpack phack ->
match_package n phack limit
>>= fun (limit, d) -> return (limit, nhc_attr (`Pack (pat, d)) p)
| SPident n, SPdot(_m1, m2) ->
match_name n m2 limit
>>= fun (limit, d2) -> return (limit, nhc_attr (`AfterDot d2) p)
| SPident n, SPident m ->
match_name n m limit
>>= fun (limit, d) -> return (limit, nhc_attr (`Ident (pat, d)) p)
| SPdot(n1, n2), SPdot(m1, m2) ->
match_name n2 m2 limit
>>= fun (limit, d2) -> match_path n1 m1 limit
>>= fun (limit, d1) -> return (limit, nhc_attr (`AfterDot d2) (nhc_dot d1 m2))
| SPapply (li1, _li2), SPapply (p1, p2) ->
(* A(B) matches with A(C) *)
(* CR jfuruse: li2 is given but never used... *)
match_path li1 p1 limit
>>= fun (limit, d) -> return (limit, nhc_apply d p2)
| li, SPapply (p1, p2) ->
(* A matches with A(C) but with slight penalty *)
PathLimit.decr limit
>>= match_path li p1
>>= fun (limit, d) -> return (limit, nhc_apply d p2)
| _ -> fail
let dummy_pattern_type_var = Any
let dummy_type_expr_var = Var (-1)
let hist_type = ref []
let prof_type pat targ =
if Conf.prof_match_types then
try
let xs = assq pat !hist_type in
try
let r = assq targ !xs in
incr r
with
| Not_found ->
xs := (targ, ref 1) :: !xs
with
| Not_found ->
hist_type := (pat, ref [(targ, ref 1)]) :: !hist_type
let report_prof_type () =
if Conf.prof_match_types then
let distinct, total =
fold_left (fun st (_, xs) ->
fold_left (fun (d,t) (_, r) -> (d+1, t + !r)) st !xs) (0,0) !hist_type
in
!!% "distinct=%d total=%d@." distinct total
(* I never see good result with no_target_type_instantiate=false *)
let match_type (* ?(no_target_type_instantiate=true) *) pattern target limit
: (TypeLimit.t * _) option
=
prof_type pattern target;
let open TypeLimit in
(* We try to remember matches like 'a = 'var.
If we see a match 'a = 'var2 later, we lower the limit slightly
in order to give 'a = 'var higher score
CRv2 jfuruse: We can extend this to var to non var type case,
since stype is hcons-ed. Ah, but we have subtyping...
*)
let var_match_history = Hashtbl.create 107 in
let rec match_type pattern target limit =
(* pattern = tvar is treated specially since it always returns the maximum score w/o changing the pattern *)
match pattern, target with
| (Var _ | Univar _ | UnivarNamed _), _ -> assert false
| VarNamed (_, s), (Var _ | VarNamed _ | Univar _ | UnivarNamed _) ->
begin match Hashtbl.find_opt var_match_history s with
| None ->
Hashtbl.add var_match_history s (`Found target);
return (limit, Attr (`Ref pattern, target))
| Some (`Found target') when target == target' ->
return (limit, Attr (`Ref pattern, target))
| Some (`Found _) ->
(* A variable matches with different types
We mark this fact so that limit is lowered at most one time
for one variable.
*)
Hashtbl.replace var_match_history s `Unmatched;
decr limit >>= fun limit ->
return (limit, Attr (`Ref pattern, target))
| Some `Unmatched ->
return (limit, Attr (`Ref pattern, target))
end
| Any, (Var _ | VarNamed _ | Univar _ | UnivarNamed _) ->
(* return (limit, Attr (`Ref pattern, target)) *)
(* Any is used for dummy pattern vars in match_types, so it should not be highlighted *)
return (limit, target)
| Any, _ ->
(* Any matches anything but with a slight penalty. No real unification. *)
decr limit >>= fun limit ->
return (limit, Attr (`Ref pattern, target))
| _, Link { contents = `Linked ty } ->
decr limit >>= match_type pattern ty
| _, Link _ -> fail
| _ ->
maxs [
remove_target_type_option pattern target limit;
make_tuple_left pattern target limit;
make_tuple_right pattern target limit;
match_arrow_types pattern target limit;
match_alias pattern target limit;
(match pattern, target with
| Tuple ts1, Tuple ts2 ->
match_types ts1 ts2 limit
>>= fun (score, ds) -> return (score, Attr (`Ref pattern, Tuple ds))
| Constr ({dt_path=p1}, ts1), Constr (({dt_path=p2} as dt), ts2) ->
match_path p1 p2 limit.score >>= fun (score, pd) ->
match_types ts1 ts2 { limit with score } >>= fun (score, ds) ->
return (score, Attr (`Ref pattern, Constr ({dt with dt_path= pd}, ds)))
| _, (Var _ | VarNamed _ | Univar _ | UnivarNamed _) ->
decr ~by:(* (if no_target_type_instantiate then 1000
else size_type pattern) *) 1000 limit
>>= fun limit -> return (limit, Attr (`Ref pattern ,target))
| _ -> fail)
]
and match_alias pattern target limit =
if limit.expansions <= 0 then fail
else
match target with
| Constr ({dt_path= _p; dt_aliases= {contents = Some (Some (params, ty))}}, ts2) ->
let limit = { limit with expansions = limit.expansions - 1 } in
if length params <> length ts2 then begin
!!% "@[<2>ERROR: aliased type arity mismatch:@ %a@ (where alias = (%a).%a)@]@."
Stype.format target
Format.(list ", " Stype.format) params
Stype.format ty;
error := true;
fail
end else
match_type pattern (Stype.subst params ts2 ty) limit
>>= fun (limit, d) ->
return (limit, Attr(`Ref pattern, d))
| _ -> fail
and match_arrow_types pattern target limit =
let parrows, preturn = get_arrows pattern in
let tarrows, treturn = get_arrows target in
match parrows, tarrows with
| [], [] -> fail (* avoid inf loop *)
| _ ->
match_type preturn treturn limit >>= fun (limit, dret) ->
match_types (map snd parrows) (map snd tarrows) limit >>= fun (limit, dts) ->
return (limit,
(* CR jfuruse: put ref ? *)
fold_right
(fun (l,t) st -> Arrow (l, t, st))
(combine (map fst tarrows) dts)
dret)
and remove_target_type_option pattern target limit =
match target with
| Constr ( ({ dt_path=Spath.SPdot (Spath.SPpredef, "option") } as dt),
[t2]) ->
decr ~by:10 limit >>= (* CR jfuruse: must be configurable *)
match_type pattern t2 >>= fun (limit, d) ->
return (limit, Constr (dt, [d]))
| _ -> fail
and make_tuple_left pattern target limit =
match pattern, target with
| _, Tuple ts2 ->
match_types [pattern] ts2 limit >>= fun (limit, ds) ->
return (limit, Tuple ds)
| _ -> fail
and make_tuple_right pattern target limit =
match pattern, target with
| Tuple ts1, _ ->
match_types ts1 [target] limit
>>= (function
| (limit, [d]) -> return (limit, d)
| _ -> assert false)
| _ -> fail
and match_types pats targets limit =
(* matching of two list of types, with permutation and addition/removal *)
match pats, targets with
| [], [] -> return (limit, [])
| [pat], [target] -> match_type pat target limit >>= fun (limit, d) -> return (limit, [d])
| _ ->
let len_pats = length pats in
let len_targets = length targets in
let pats, targets, penalty =
(* Some component might be missing in pattern, we fill variables for them
but with a rather big price
At 8a9d320d4 :
pattern: int -> int -> int -> int
target: nat -> int -> int -> nat -> int -> int -> int
distance = 12 = 3 * (addition(3) + instance(1))
I think 2 arguments addition is enough
2 * (addition(x) + 1) < 30
how about 7? (2*(7+1)=16 3*(7+1)=24<30 4(7+1)=32>30
*)
if len_pats < len_targets then
map (fun _ -> dummy_pattern_type_var) (1 -- (len_targets - len_pats)) @ pats,
map (fun target -> target, true) targets,
(len_targets - len_pats) * 7
else if len_pats > len_targets then
(* The target can have less components than the pattern,
but with huge penalty
At 8a9d320d4 : x5
changed to x10
*)
pats,
map (fun _ -> dummy_type_expr_var, false) (1 -- (len_pats - len_targets))
@ map (fun target -> target, true) targets,
(len_pats - len_targets) * 10
else pats, map (fun x -> x, true) targets, 0
in
decr ~by:penalty limit >>= fun limit ->
(* O(n^2) *)
let targets_array = Array.of_list (map fst targets) in
let score_table =
(* I believe laziness does not help here *)
map (fun pat ->
Array.map (fun target ->
match match_type pat target limit with
| None -> None
| Some (limit', x) -> Some (limit.score - limit'.score, x)
) targets_array) pats
in
(* I've got [GtkEnums._get_tables : unit -> t1 * .. * t70].
Its permutation is ... huge: 1.19e+100.
*)
let rec perm_max target_pos xs limit = match xs with
| [] -> return (limit, [])
| xs ->
(* [choose [] xs = [ (x, xs - x) | x <- xs ] *)
let rec choose sx = function
| [] -> assert false
| [x] -> [x, rev sx]
| x::xs -> (x, rev_append sx xs) :: choose (x::sx) xs
in
let xss = choose [] xs in
let matches =
filter_map (fun (x,xs) ->
match Array.unsafe_get x target_pos with
| None -> (* too much cost *) fail
| Some (score,d) ->
(* CR jfuruse: bug: We ignore the changes of expansions here *)
PathLimit.decr ~by:score limit.score
>>= fun score -> perm_max (target_pos+1) xs { limit with score }
>>= fun (score,ds) -> return (score,d::ds)) xss
in
match matches with
| [] -> fail
| _ -> return & fold_left1 (fun (s1, d1) (s2, d2) -> if s1 >= s2 then (s1, d1) else (s2, d2)) matches
in
perm_max 0 score_table limit
>>= fun (limit, ds) ->
return (limit,
(combine ds targets
|> filter_map (function
| (_, (_, false)) -> None
| (d, (_, true)) -> Some d)))
in
match_type pattern target limit
(* Return distance, not score *)
let match_path_type (p1, ty1) (p2, ty2) limit_path limit_type =
let open TypeLimit in
match_path p1 p2 limit_path;
>>= fun (_, match_path) -> (* Once path test is done, we ignore its score. *)
match_type ty1 ty2 (create limit_type)
>>= fun (limit, match_xty) -> return (limit_type - limit.score, (match_path, match_xty))
(* Return distance, not score *)
let match_type (* ?no_target_type_instantiate *) t1 t2 limit_type =
let open TypeLimit in
match_type (* ?no_target_type_instantiate *) t1 t2 (create limit_type) >>= fun (limit, desc) ->
return (limit_type - limit.score, desc)
(* Return distance, not score *)
let match_path p1 p2 limit =
match_path p1 p2 limit >>= fun (score, desc) -> return (limit - score, desc)
end
module MakePooled( A: sig
val cache : Levenshtein.StringWithHashtbl.cache
val pooled_types : Stype.t array
end) = struct
module M = Make(A)
let error = M.error
(* Return distance, not score *)
let match_path = M.match_path
module WithType(T : sig
val pattern : Stype.t
val cache : [ `NotFoundWith of int | `Exact of int * Stype.t ] array
end) = struct
(* Return distance, not score *)
let match_type t2 limit_type =
let t1 = T.pattern in
match t2 with
| Item.Not_pooled t2 -> M.match_type t1 t2 limit_type
| Item.Pooled n ->
let t2 = Array.unsafe_get A.pooled_types n in
match Array.unsafe_get T.cache n with
| `NotFoundWith n when n >= limit_type -> None
| _ ->
match M.match_type t1 t2 limit_type with
| None ->
Array.unsafe_set T.cache n (`NotFoundWith limit_type);
None
| Some dtrace as res ->
Array.unsafe_set T.cache n (`Exact dtrace);
res
(* Return distance, not score *)
let match_path_type p1 (p2, ty2) limit_path limit_type =
match_path p1 p2 limit_path;
>>= fun (_, desc_path) -> (* Once path test is done, we ignore its dist. *)
match_type ty2 limit_type
>>= fun (dist, desc_type) -> return (dist, (desc_path, desc_type))
end
let report_prof_type = M.report_prof_type
end