-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregex.py
77 lines (68 loc) · 2.43 KB
/
regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import re
import sre_parse
from anytree import Node, RenderTree
def parse_regex_to_ast(regex):
try:
# Parse the regex into a structured representation
parsed = sre_parse.parse(regex)
return parsed
except re.error as e:
return f"Error parsing regex: {e}"
def build_tree(parsed, parent=None):
"""
Recursively build a tree structure from parsed regex.
"""
if parent is None:
parent = Node("Regex")
for token in parsed:
if isinstance(token, tuple):
token_type, token_value = token
node = Node(f"{token_type}: {token_value}", parent=parent)
if token_type == "SUBPATTERN":
_, (start, end, subpattern) = token_value
sub_node = Node(f"Subpattern {start}-{end}", parent=node)
build_tree(subpattern, sub_node)
elif token_type in ("MAX_REPEAT", "MIN_REPEAT"):
min_repeat, max_repeat, subpattern = token_value
repeat_node = Node(f"Repeat {min_repeat}-{max_repeat}", parent=node)
build_tree(subpattern, repeat_node)
elif isinstance(token, list):
build_tree(token, parent)
else:
Node(str(token), parent=parent)
return parent
def visualize_regex(regex):
"""
Parse the regex, build its AST, and render a tree visualization.
"""
try:
parsed = sre_parse.parse(regex)
root = build_tree(parsed)
for pre, _, node in RenderTree(root):
print(f"{pre}{node.name}")
except Exception as e:
print(f"Error parsing regex: {e}")
# Example usage
if __name__ == "__main__":
regex = r"[abc][def]"
visualize_regex(regex)
# ast = parse_regex_to_ast(regex)
# print("Parsed AST:")
# for component in ast:
# print(component)
# ^a(bc|de)*f$
# Regex
# ├── AT: AT_BEGINNING
# ├── LITERAL: 97
# ├── SUBPATTERN: (1, 0, 0, [(LITERAL, 98), (LITERAL, 99)])
# │ └── Subpattern 0-0
# │ ├── LITERAL: 98
# │ └── LITERAL: 99
# ├── MAX_REPEAT: (0, MAXREPEAT, [(SUBPATTERN, (1, 0, 0, [(LITERAL, 98), (LITERAL, 99)]))])
# │ └── Repeat 0-MAXREPEAT
# │ └── SUBPATTERN: (1, 0, 0, [(LITERAL, 98), (LITERAL, 99)])
# │ └── Subpattern 0-0
# │ ├── LITERAL: 98
# │ └── LITERAL: 99
# ├── LITERAL: 102
# └── AT: AT_END