From 5a85c4c3f4af27a41f16416a993b8f1cb99932c4 Mon Sep 17 00:00:00 2001 From: Andrew Reynolds Date: Wed, 22 Jan 2025 08:23:14 -0600 Subject: [PATCH] Simplify the behavior of the arithmetic entail utility in strings (#11538) By default, we no longer use approximation reasoning to recursively prove that an approximation holds. The motivation for this is proof holes that are very difficult to reason about. The old behavior can be reenabled with expert option `--strings-rec-arith-approx`. I tested this change on SMT-LIB and an industrial set with very little performance differences -5+0 (all 5 benchmarks close to timeout). Also eliminates another recursive call to the rewriter in the strings rewriter. This reduces the set of rewrite steps for strings in our "safe" configuration from 35 -> 29. --- src/options/strings_options.toml | 8 ++++++++ src/theory/quantifiers/extended_rewrite.cpp | 4 +++- src/theory/strings/arith_entail.cpp | 14 +++++++++++--- src/theory/strings/arith_entail.h | 6 +++++- src/theory/strings/sequences_rewriter.cpp | 5 +++-- src/theory/strings/sequences_rewriter.h | 1 + src/theory/strings/strings_rewriter.cpp | 3 ++- src/theory/strings/strings_rewriter.h | 1 + src/theory/strings/theory_strings.cpp | 9 ++++++--- src/theory/strings/theory_strings.h | 2 ++ test/regress/cli/CMakeLists.txt | 1 + .../cli/regress0/strings/dd_1302_str_rewrite.smt2 | 7 +++++++ .../regress2/strings/proj-439-cyclic-str-ipc.smt2 | 2 ++ test/unit/theory/sequences_rewriter_white.cpp | 11 +++++++---- 14 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 test/regress/cli/regress0/strings/dd_1302_str_rewrite.smt2 diff --git a/src/options/strings_options.toml b/src/options/strings_options.toml index 79e17bfd1aa..5568888d4e8 100644 --- a/src/options/strings_options.toml +++ b/src/options/strings_options.toml @@ -243,3 +243,11 @@ name = "Strings Theory" type = "bool" default = "false" help = "use regular expression derive for conflict finding" + +[[option]] + name = "stringRecArithApprox" + category = "expert" + long = "strings-rec-arith-approx" + type = "bool" + default = "false" + help = "use possibly recursive reasoning when finding approximations for arithmetic string terms" diff --git a/src/theory/quantifiers/extended_rewrite.cpp b/src/theory/quantifiers/extended_rewrite.cpp index b9e44bde5f4..be59fb9908f 100644 --- a/src/theory/quantifiers/extended_rewrite.cpp +++ b/src/theory/quantifiers/extended_rewrite.cpp @@ -1747,7 +1747,9 @@ Node ExtendedRewriter::extendedRewriteStrings(const Node& node) const Kind k = node.getKind(); if (k == Kind::EQUAL) { - strings::SequencesRewriter sr(d_nm, &d_rew, nullptr); + // allow recursive approximations + strings::ArithEntail ae(&d_rew, true); + strings::SequencesRewriter sr(d_nm, &d_rew, ae, nullptr); return sr.rewriteEqualityExt(node); } else if (k == Kind::STRING_SUBSTR) diff --git a/src/theory/strings/arith_entail.cpp b/src/theory/strings/arith_entail.cpp index 062ce95ba15..a6a374c47fd 100644 --- a/src/theory/strings/arith_entail.cpp +++ b/src/theory/strings/arith_entail.cpp @@ -33,7 +33,8 @@ namespace cvc5::internal { namespace theory { namespace strings { -ArithEntail::ArithEntail(Rewriter* r) : d_rr(r) +ArithEntail::ArithEntail(Rewriter* r, bool recApprox) + : d_rr(r), d_recApprox(recApprox) { d_one = NodeManager::currentNM()->mkConstInt(Rational(1)); d_zero = NodeManager::currentNM()->mkConstInt(Rational(0)); @@ -273,6 +274,11 @@ Node ArithEntail::findApprox(Node ar, bool isSimple) Node ArithEntail::findApproxInternal(Node ar, bool isSimple) { + // if not using recursive approximations, we always set isSimple to true + if (!d_recApprox) + { + isSimple = true; + } Assert(rewriteArith(ar) == ar) << "Not rewritten " << ar << ", got " << rewriteArith(ar); NodeManager* nm = NodeManager::currentNM(); @@ -371,9 +377,11 @@ Node ArithEntail::findApproxInternal(Node ar, bool isSimple) { if (approxMsums.find(aa) == approxMsums.end()) { + // ensure rewritten, which makes a difference if isSimple is true + Node aar = rewriteArith(aa); CVC5_UNUSED bool ret = - ArithMSum::getMonomialSum(aa, approxMsums[aa]); - Assert(ret); + ArithMSum::getMonomialSum(aar, approxMsums[aa]); + Assert(ret) << "Could not find sum " << aa; } } changed = true; diff --git a/src/theory/strings/arith_entail.h b/src/theory/strings/arith_entail.h index 337f1cde8e8..b9de78b1256 100644 --- a/src/theory/strings/arith_entail.h +++ b/src/theory/strings/arith_entail.h @@ -43,8 +43,10 @@ class ArithEntail /** * @param r The rewriter, used for rewriting arithmetic terms. If none * is provided, we rely on the ArithPolyNorm utility. + * @param recApprox Whether to use recursive arithmetic approxiations in this + * class. */ - ArithEntail(Rewriter* r); + ArithEntail(Rewriter* r, bool recApprox = false); /** * Returns the rewritten form of a term, which must be an integer term. * This method invokes the rewriter, if one is provided, and uses the @@ -287,6 +289,8 @@ class ArithEntail static bool getConstantBoundCache(TNode n, bool isLower, Node& c); /** The underlying rewriter, if one exists */ Rewriter* d_rr; + /** Whether we are using recursive arithmetic approximations */ + bool d_recApprox; /** Constant zero */ Node d_zero; Node d_one; diff --git a/src/theory/strings/sequences_rewriter.cpp b/src/theory/strings/sequences_rewriter.cpp index 59004795c23..040609b3a1d 100644 --- a/src/theory/strings/sequences_rewriter.cpp +++ b/src/theory/strings/sequences_rewriter.cpp @@ -39,12 +39,13 @@ namespace strings { SequencesRewriter::SequencesRewriter(NodeManager* nm, Rewriter* r, + ArithEntail& ae, HistogramStat* statistics) : TheoryRewriter(nm), d_statistics(statistics), d_rr(r), - d_arithEntail(r), - d_stringsEntail(r, d_arithEntail, this) + d_arithEntail(ae), + d_stringsEntail(r, ae, this) { d_sigmaStar = nm->mkNode(Kind::REGEXP_STAR, nm->mkNode(Kind::REGEXP_ALLCHAR)); d_true = nm->mkConst(true); diff --git a/src/theory/strings/sequences_rewriter.h b/src/theory/strings/sequences_rewriter.h index e465ee2a6a3..72a4fba84fd 100644 --- a/src/theory/strings/sequences_rewriter.h +++ b/src/theory/strings/sequences_rewriter.h @@ -36,6 +36,7 @@ class SequencesRewriter : public TheoryRewriter public: SequencesRewriter(NodeManager* nm, Rewriter* r, + ArithEntail& ae, HistogramStat* statistics); /** The underlying entailment utilities */ ArithEntail& getArithEntail(); diff --git a/src/theory/strings/strings_rewriter.cpp b/src/theory/strings/strings_rewriter.cpp index 5e9a78f94cf..43dfb9bd4e7 100644 --- a/src/theory/strings/strings_rewriter.cpp +++ b/src/theory/strings/strings_rewriter.cpp @@ -29,9 +29,10 @@ namespace strings { StringsRewriter::StringsRewriter(NodeManager* nm, Rewriter* r, + ArithEntail& ae, HistogramStat* statistics, uint32_t alphaCard) - : SequencesRewriter(nm, r, statistics), d_alphaCard(alphaCard) + : SequencesRewriter(nm, r, ae, statistics), d_alphaCard(alphaCard) { } diff --git a/src/theory/strings/strings_rewriter.h b/src/theory/strings/strings_rewriter.h index 7c1b382061b..ed9999b678d 100644 --- a/src/theory/strings/strings_rewriter.h +++ b/src/theory/strings/strings_rewriter.h @@ -34,6 +34,7 @@ class StringsRewriter : public SequencesRewriter public: StringsRewriter(NodeManager* nm, Rewriter* r, + ArithEntail& ae, HistogramStat* statistics, uint32_t alphaCard = 196608); diff --git a/src/theory/strings/theory_strings.cpp b/src/theory/strings/theory_strings.cpp index 4a6c341b458..faab36c6f15 100644 --- a/src/theory/strings/theory_strings.cpp +++ b/src/theory/strings/theory_strings.cpp @@ -57,8 +57,10 @@ TheoryStrings::TheoryStrings(Env& env, OutputChannel& out, Valuation valuation) d_statistics(statisticsRegistry()), d_state(env, d_valuation), d_termReg(env, *this, d_state, d_statistics), + d_arithEntail(env.getRewriter(), options().strings.stringRecArithApprox), d_rewriter(env.getNodeManager(), env.getRewriter(), + d_arithEntail, &d_statistics.d_rewrites, d_termReg.getAlphabetCardinality()), d_eagerSolver(options().strings.stringEagerSolver @@ -101,9 +103,10 @@ TheoryStrings::TheoryStrings(Env& env, OutputChannel& out, Valuation valuation) d_absModelCounter(0), d_strGapModelCounter(0), d_cpacb(*this), - d_psrewPg(env.isTheoryProofProducing() ? new TrustProofGenerator( - env, TrustId::STRINGS_PP_STATIC_REWRITE, {}) - : nullptr) + d_psrewPg(env.isTheoryProofProducing() + ? new TrustProofGenerator( + env, TrustId::STRINGS_PP_STATIC_REWRITE, {}) + : nullptr) { d_termReg.finishInit(&d_im); diff --git a/src/theory/strings/theory_strings.h b/src/theory/strings/theory_strings.h index 5ab42758ecc..fe361ae3c49 100644 --- a/src/theory/strings/theory_strings.h +++ b/src/theory/strings/theory_strings.h @@ -250,6 +250,8 @@ class TheoryStrings : public Theory { SolverState d_state; /** The term registry for this theory */ TermRegistry d_termReg; + /** An arithmetic entailment utility */ + ArithEntail d_arithEntail; /** The theory rewriter for this theory. */ StringsRewriter d_rewriter; /** The eager solver */ diff --git a/test/regress/cli/CMakeLists.txt b/test/regress/cli/CMakeLists.txt index 3fc04f700eb..cada19ca2a7 100644 --- a/test/regress/cli/CMakeLists.txt +++ b/test/regress/cli/CMakeLists.txt @@ -1843,6 +1843,7 @@ set(regress_0_tests regress0/strings/complement-simple.smt2 regress0/strings/ctn-decompose-3-sym.smt2 regress0/strings/ctn-decompose-3-sym-nf.smt2 + regress0/strings/dd_1302_str_rewrite.smt2 regress0/strings/dd_issue6913-str-inf.smt2 regress0/strings/dd_norn_235_extf_d.smt2 regress0/strings/dd_norn_675.smt2 diff --git a/test/regress/cli/regress0/strings/dd_1302_str_rewrite.smt2 b/test/regress/cli/regress0/strings/dd_1302_str_rewrite.smt2 new file mode 100644 index 00000000000..76a97d7d2f5 --- /dev/null +++ b/test/regress/cli/regress0/strings/dd_1302_str_rewrite.smt2 @@ -0,0 +1,7 @@ +; COMMAND-LINE: +; COMMAND-LINE: --strings-rec-arith-approx +; EXPECT: unsat +(set-logic ALL) +(declare-fun u () String) +(assert (= 0 (ite (not (= "o" (str.substr (str.substr u 0 0) 0 (str.len (str.substr (str.substr u 1 1) 0 (str.indexof (str.substr u 0 1) "/" 0)))))) 1 0))) +(check-sat) diff --git a/test/regress/cli/regress2/strings/proj-439-cyclic-str-ipc.smt2 b/test/regress/cli/regress2/strings/proj-439-cyclic-str-ipc.smt2 index 5a6927e1b78..294f311ad2a 100644 --- a/test/regress/cli/regress2/strings/proj-439-cyclic-str-ipc.smt2 +++ b/test/regress/cli/regress2/strings/proj-439-cyclic-str-ipc.smt2 @@ -1,3 +1,5 @@ +; COMMAND-LINE: +; COMMAND-LINE: --strings-rec-arith-approx (set-logic QF_SLIA) (set-info :status unsat) (declare-const x Int) diff --git a/test/unit/theory/sequences_rewriter_white.cpp b/test/unit/theory/sequences_rewriter_white.cpp index 5132550a0fd..1a595bb867d 100644 --- a/test/unit/theory/sequences_rewriter_white.cpp +++ b/test/unit/theory/sequences_rewriter_white.cpp @@ -44,11 +44,14 @@ class TestTheoryWhiteSequencesRewriter : public TestSmt TestSmt::SetUp(); Options opts; d_rewriter = d_slvEngine->getEnv().getRewriter(); - d_seqRewriter.reset( - new SequencesRewriter(d_nodeManager, d_rewriter, nullptr)); + // allow recursive approximations + d_arithEntail.reset(new ArithEntail(d_rewriter, true)); + d_seqRewriter.reset(new SequencesRewriter( + d_nodeManager, d_rewriter, *d_arithEntail.get(), nullptr)); } Rewriter* d_rewriter; + std::unique_ptr d_arithEntail; std::unique_ptr d_seqRewriter; void inNormalForm(Node t) @@ -285,7 +288,7 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_nth) TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_substr) { - StringsRewriter sr(d_nodeManager, d_rewriter, nullptr); + StringsRewriter sr(d_nodeManager, d_rewriter, *d_arithEntail.get(), nullptr); TypeNode intType = d_nodeManager->integerType(); TypeNode strType = d_nodeManager->stringType(); @@ -595,7 +598,7 @@ TEST_F(TestTheoryWhiteSequencesRewriter, rewrite_concat) TEST_F(TestTheoryWhiteSequencesRewriter, length_preserve_rewrite) { - StringsRewriter sr(d_nodeManager, d_rewriter, nullptr); + StringsRewriter sr(d_nodeManager, d_rewriter, *d_arithEntail.get(), nullptr); TypeNode intType = d_nodeManager->integerType(); TypeNode strType = d_nodeManager->stringType();