Skip to content

Commit

Permalink
Extend regex consume rewrite to incorporate star (cvc5#11480)
Browse files Browse the repository at this point in the history
This extends the regular expression consume rule to handle unfoldings of
top-level `re.*`, as done by the rewriter. This allows the rewriter to
be more uniform and simpler, and allows proof holes of this form to be
filled by this rule.

Will need to performance test this.
  • Loading branch information
ajreynol authored Jan 3, 2025
1 parent 00c276d commit bb815cf
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 71 deletions.
30 changes: 28 additions & 2 deletions proofs/eo/cpc/programs/Strings.eo
Original file line number Diff line number Diff line change
Expand Up @@ -1494,7 +1494,7 @@
)
)

; define: $str_re_consume
; define: $str_re_consume_process
; args:
; - s String: The string argument of the membership to rewrite.
; - r RegLan: The regular expression argument of the membership to rewrite.
Expand All @@ -1505,7 +1505,7 @@
; reversing them initially and converting them to flat forms. We then reverse
; them again and consume from the beginning of the remainder. Finally, we convert
; back from flat form when applicable.
(define $str_re_consume ((s String) (r RegLan))
(define $str_re_consume_process ((s String) (r RegLan))
(eo::define ((ss ($str_to_flat_form s true)))
(eo::define ((rr ($re_to_flat_form r true)))
(eo::match ((s1 String) (r1 RegLan))
Expand All @@ -1524,3 +1524,29 @@
)
)))
)

; define: $str_re_consume
; args:
; - s String: The string argument of the membership to rewrite.
; - r RegLan: The regular expression argument of the membership to rewrite.
; returns: >
; false if `(str.in_re s r)` can be shown to be equivalent to false, or
; otherwise `(str.in_re sr rr)` where sr and rr are the result of "consuming"
; prefixes/suffixes from s and r. In addition to the above method
; $str_re_consume_process, we additionally handle the case where we reason
; about the body of re.*, succeeding if a conflict is found or we full
; consume one copy.
(program $str_re_consume ((s String) (r RegLan))
(String RegLan) Bool
(
(($str_re_consume s (re.* r))
(eo::match ((s1 String))
($str_re_consume_process s r)
(
(false false) ; conflict
((str.in_re s1 @re.empty) (str.in_re s1 (re.* r))) ; one full copy
)
))
(($str_re_consume s r) ($str_re_consume_process s r))
)
)
4 changes: 0 additions & 4 deletions src/theory/strings/rewrites.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,6 @@ const char* toString(Rewrite r)
case Rewrite::CONCAT_NORM: return "CONCAT_NORM";
case Rewrite::IS_DIGIT_ELIM: return "IS_DIGIT_ELIM";
case Rewrite::RE_CONCAT_EMPTY: return "RE_CONCAT_EMPTY";
case Rewrite::RE_CONSUME_CCONF: return "RE_CONSUME_CCONF";
case Rewrite::RE_CONSUME_S: return "RE_CONSUME_S";
case Rewrite::RE_CONSUME_S_CCONF: return "RE_CONSUME_S_CCONF";
case Rewrite::RE_CONSUME_S_FULL: return "RE_CONSUME_S_FULL";
case Rewrite::RE_IN_EMPTY: return "RE_IN_EMPTY";
case Rewrite::RE_IN_SIGMA: return "RE_IN_SIGMA";
case Rewrite::RE_IN_EVAL: return "RE_IN_EVAL";
Expand Down
4 changes: 0 additions & 4 deletions src/theory/strings/rewrites.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,6 @@ enum class Rewrite : uint32_t
CONCAT_NORM,
IS_DIGIT_ELIM,
RE_CONCAT_EMPTY,
RE_CONSUME_CCONF,
RE_CONSUME_S,
RE_CONSUME_S_CCONF,
RE_CONSUME_S_FULL,
RE_IN_EMPTY,
RE_IN_SIGMA,
RE_IN_EVAL,
Expand Down
75 changes: 14 additions & 61 deletions src/theory/strings/sequences_rewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1553,32 +1553,38 @@ Node SequencesRewriter::rewriteRangeRegExp(TNode node)
return node;
}


Node SequencesRewriter::rewriteViaStrInReConsume(const Node& node)
{
if (node.getKind() != Kind::STRING_IN_REGEXP)
{
return Node::null();
}
std::vector<Node> children;
utils::getConcat(node[1], children);
// if star, we consider the body of the star
bool isStar = (node[1].getKind()==Kind::REGEXP_STAR);
Node r = isStar ? node[1][0] : node[1];
utils::getConcat(r, children);
std::vector<Node> mchildren;
utils::getConcat(node[0], mchildren);
Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
if (!scn.isNull())
{
return scn;
}
else
else if (!isStar || children.empty())
{
// Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
// above, we strip components to construct an equivalent membership:
// (str.++ xi .. xj) in (re.++ rk ... rl).
Node xn = utils::mkConcat(mchildren, node[0].getType());
// if we considered the body of the star, we revert to the original RE
Node rn = isStar ? node[1] : utils::mkConcat(children, node[1].getType());
// construct the updated regular expression
Node newMem =
nodeManager()->mkNode(Kind::STRING_IN_REGEXP,
xn,
utils::mkConcat(children, node[1].getType()));
rn);
if (newMem != node)
{
return newMem;
Expand Down Expand Up @@ -1782,65 +1788,12 @@ Node SequencesRewriter::rewriteMembership(TNode node)
}

// do simple consumes
Node retNode = node;
if (r.getKind() == Kind::REGEXP_STAR)
{
for (unsigned dir = 0; dir <= 1; dir++)
{
std::vector<Node> mchildren;
utils::getConcat(x, mchildren);
bool success = true;
while (success)
{
success = false;
std::vector<Node> children;
utils::getConcat(r[0], children);
Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
if (!scn.isNull())
{
Trace("regexp-ext-rewrite")
<< "Regexp star : const conflict : " << node << std::endl;
return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
}
else if (children.empty())
{
// fully consumed one copy of the STAR
if (mchildren.empty())
{
Trace("regexp-ext-rewrite")
<< "Regexp star : full consume : " << node << std::endl;
Node ret = nodeManager()->mkConst(true);
return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
}
else
{
Node prev = retNode;
retNode = nm->mkNode(
Kind::STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
// Iterate again if the node changed. It may not have changed if
// nothing was consumed from mchildren (e.g. if the body of the
// re.* accepts the empty string.
success = (retNode != prev);
}
}
}
if (retNode != node)
{
Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
<< " -> " << retNode << std::endl;
return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
}
}
}
else
Node retNode = rewriteViaStrInReConsume(node);
if (!retNode.isNull())
{
retNode = rewriteViaStrInReConsume(node);
if (!retNode.isNull())
{
Trace("regexp-ext-rewrite")
<< "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
}
Trace("regexp-ext-rewrite")
<< "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
}
// check regular expression inclusion
// This makes a regular expression that contains all possible model values
Expand Down

0 comments on commit bb815cf

Please sign in to comment.