diff --git a/src/smt/theory_str_noodler/aut_assignment.h b/src/smt/theory_str_noodler/aut_assignment.h index d3fa7a2c1ce..ed6201f0f41 100644 --- a/src/smt/theory_str_noodler/aut_assignment.h +++ b/src/smt/theory_str_noodler/aut_assignment.h @@ -77,6 +77,23 @@ namespace smt::noodler { return nfa; } + // represents code point of digit 0 + static const mata::Symbol DIGIT_SYMBOL_START = 48; + // represents code point of digit 9 + static const mata::Symbol DIGIT_SYMBOL_END = 57; + + /** + * @brief Returns automaton that accept non-empty words containing only symbols encoding digits (symbols from 48 to 57) + */ + static mata::nfa::Nfa digit_automaton() { + mata::nfa::Nfa only_digits_aut(2, {0}, {1}); + for (mata::Symbol digit = DIGIT_SYMBOL_START; digit <= DIGIT_SYMBOL_END; ++digit) { + only_digits_aut.delta.add(0, digit, 1); + only_digits_aut.delta.add(1, digit, 1); + } + return only_digits_aut; + } + mata::nfa::Nfa get_automaton_concat(const std::vector& concat) const { mata::nfa::Nfa ret = mata::nfa::builder::create_empty_string_nfa(); for(const BasicTerm& t : concat) { diff --git a/src/smt/theory_str_noodler/decision_procedure.cpp b/src/smt/theory_str_noodler/decision_procedure.cpp index 4be044d1e65..ed6dcce8cb0 100644 --- a/src/smt/theory_str_noodler/decision_procedure.cpp +++ b/src/smt/theory_str_noodler/decision_procedure.cpp @@ -592,10 +592,12 @@ namespace smt::noodler { return LenNode(LenFormulaType::AND, conjuncts); } - LenNode DecisionProcedure::get_lengths() { + std::pair DecisionProcedure::get_lengths() { + LenNodePrecision precision = LenNodePrecision::PRECISE; // start with precise and possibly change it later + if (solution.length_sensitive_vars.empty()) { // There are no length vars (which also means no disequations nor conversions), it is not needed to create the lengths formula. - return LenNode(LenFormulaType::TRUE); + return {LenNode(LenFormulaType::TRUE), precision}; } // start with formula for disequations @@ -612,9 +614,11 @@ namespace smt::noodler { solution.flatten_substition_map(); // add formula for conversions - conjuncts.push_back(get_formula_for_conversions()); + auto conv_form_with_precision = get_formula_for_conversions(); + conjuncts.push_back(conv_form_with_precision.first); + precision = conv_form_with_precision.second; - return LenNode(LenFormulaType::AND, conjuncts); + return {LenNode(LenFormulaType::AND, conjuncts), precision}; } std::set DecisionProcedure::get_vars_substituted_in_code_conversions() { @@ -701,8 +705,8 @@ namespace smt::noodler { for (mata::Symbol s : word) { is_invalid = false; // word is not empty, it might not be invalid - if (48 <= s && s <= 57) { // s is a code point of digit - rational real_digit(s - 48); + if (AutAssignment::DIGIT_SYMBOL_START <= s && s <= AutAssignment::DIGIT_SYMBOL_END) { // s is a code point of digit + rational real_digit(s - AutAssignment::DIGIT_SYMBOL_START); resulting_int = resulting_int*10 + real_digit; } else { // it is possible that s is a dummy symbol, but we assume that all digits are explicitly in the alphabet, see the assumptions @@ -761,11 +765,12 @@ namespace smt::noodler { } // see the comment of get_formula_for_conversions for explanation - LenNode DecisionProcedure::get_formula_for_int_conversion(const TermConversion& conv, const std::set& code_subst_vars) { + std::pair DecisionProcedure::get_formula_for_int_conversion(const TermConversion& conv, const std::set& code_subst_vars, const unsigned underapproximating_length) { const BasicTerm& s = conv.string_var; const BasicTerm& i = conv.int_var; LenNode result(LenFormulaType::OR); + LenNodePrecision res_precision = LenNodePrecision::PRECISE; // s = s_1 ... s_n, subst_vars = const std::vector& subst_vars = solution.get_substituted_vars(s); @@ -773,7 +778,7 @@ namespace smt::noodler { // automaton representing all valid inputs (only digits) // - we also keep empty word, because we will use it for substituted vars, and one of them can be empty, while other has only digits (for example s1="12", s2="" but s="12" is valid) mata::nfa::Nfa only_digits(1, {0}, {0}); - for (mata::Symbol digit = 48; digit <= 57; ++digit) { + for (mata::Symbol digit = AutAssignment::DIGIT_SYMBOL_START; digit <= AutAssignment::DIGIT_SYMBOL_END; ++digit) { only_digits.delta.add(0, digit, 0); } STRACE("str-conversion-int", tout << "only-digit NFA:" << std::endl << only_digits << std::endl;); @@ -784,40 +789,56 @@ namespace smt::noodler { // cases should be the collection of all words w = w_1 ... w_n, where w_i is the word of the language L_i of the automaton for s_i std::vector> cases = {{}}; for (const BasicTerm& subst_var : solution.get_substituted_vars(s)) { // s_i = subst_var - auto aut = solution.aut_ass.at(subst_var); + std::shared_ptr aut = solution.aut_ass.at(subst_var); STRACE("str-conversion-int", tout << "NFA for " << subst_var << ":" << std::endl << *aut << std::endl;); // part containing only digits - mata::nfa::Nfa aut_valid_part = mata::nfa::reduce(mata::nfa::intersection(*aut, only_digits).trim()); - STRACE("str-conversion-int", tout << "only-digit NFA:" << std::endl << aut_valid_part << std::endl;); + std::shared_ptr aut_valid_part; // part containing some non-digit - mata::nfa::Nfa aut_non_valid_part = mata::nfa::reduce(mata::nfa::intersection(*aut, contain_non_digit).trim()); - STRACE("str-conversion-int", tout << "contains-non-digit NFA:" << std::endl << aut_non_valid_part << std::endl;); - - if (!aut_non_valid_part.is_lang_empty()) { - // aut_non_valid_part is language of words that contain at least one non-digit - // - we can get here only for the case that conv.type is to_int (for from_int, by assumptions, s should be restricted to language of "valid numbers + empty string") - // - if s_i = one of these words, then s must also contain non-digit => result i = -1 - // we therefore create following formula: - // |s_i| is length of some word from aut_non_valid_part && int_version_of(s_i) = -1 && i = -1 - result.succ.emplace_back(LenFormulaType::AND, std::vector{ solution.aut_ass.get_lengths(aut_non_valid_part, subst_var), LenNode(LenFormulaType::EQ, { int_version_of(subst_var), -1 }), LenNode(LenFormulaType::EQ, {i, -1}) }); - if (code_subst_vars.contains(subst_var)) { - // s_i is used in some to_code/from_code - // => we need to add to the previous formula also the fact, that s_i cannot encode code point of a digit - // .. && !(48 <= code_version_of(s_i) <= 57) - result.succ.back().succ.emplace_back(LenFormulaType::LT, std::vector{ code_version_of(subst_var), 48 }); - result.succ.back().succ.emplace_back(LenFormulaType::LT, std::vector{ 57, code_version_of(subst_var) }); + std::shared_ptr aut_non_valid_part; + + if (conv.type == ConversionType::TO_INT) { + aut_valid_part = std::make_shared(mata::nfa::reduce(mata::nfa::intersection(*aut, only_digits).trim())); + STRACE("str-conversion-int", tout << "only-digit NFA:" << std::endl << *aut_valid_part << std::endl;); + aut_non_valid_part = std::make_shared(mata::nfa::reduce(mata::nfa::intersection(*aut, contain_non_digit).trim())); + STRACE("str-conversion-int", tout << "contains-non-digit NFA:" << std::endl << *aut_non_valid_part << std::endl;); + if (!aut_non_valid_part->is_lang_empty()) { + // aut_non_valid_part is language of words that contain at least one non-digit + // - we can get here only for the case that conv.type is to_int (for from_int, by assumptions, s should be restricted to language of "valid numbers + empty string") + // - if s_i = one of these words, then s must also contain non-digit => result i = -1 + // we therefore create following formula: + // |s_i| is length of some word from aut_non_valid_part && int_version_of(s_i) = -1 && i = -1 + result.succ.emplace_back(LenFormulaType::AND, std::vector{ solution.aut_ass.get_lengths(*aut_non_valid_part, subst_var), LenNode(LenFormulaType::EQ, { int_version_of(subst_var), -1 }), LenNode(LenFormulaType::EQ, {i, -1}) }); + if (code_subst_vars.contains(subst_var)) { + // s_i is used in some to_code/from_code + // => we need to add to the previous formula also the fact, that s_i cannot encode code point of a digit + // .. && !(AutAssignment::DIGIT_SYMBOL_START <= code_version_of(s_i) <= AutAssignment::DIGIT_SYMBOL_END) + result.succ.back().succ.emplace_back(LenFormulaType::LT, std::vector{ code_version_of(subst_var), AutAssignment::DIGIT_SYMBOL_START }); + result.succ.back().succ.emplace_back(LenFormulaType::LT, std::vector{ AutAssignment::DIGIT_SYMBOL_END, code_version_of(subst_var) }); + } } + } else { + // for from_int, we assume that s is restricted to language of "valid numbers + empty string", which means that + // s_i should also be restricted to this language => 'aut intersected with only_digits == aut' + aut_valid_part = aut; + STRACE("str-conversion-int", tout << "only-digit NFA:" << std::endl << *aut_valid_part << std::endl;); } + unsigned max_length_of_words = aut_valid_part->num_of_states(); + // we want to enumerate all words containing digits -> cannot be infinite language - if (!aut_valid_part.is_acyclic()) { - STRACE("str-conversion", tout << "failing NFA:" << std::endl << aut_valid_part << std::endl;); - util::throw_error("cannot process to_int/from_int for automaton with infinite language"); + if (!aut_valid_part->is_acyclic()) { + STRACE("str-conversion", tout << "failing NFA:" << std::endl << *aut_valid_part << std::endl;); + res_precision = LenNodePrecision::UNDERAPPROX; + if (max_length_of_words > underapproximating_length) { + // there are 10^max_length_of_words possible cases, we put limit so there is not MEMOUT + // but (experimentally) it seems to be better to reduce it even more if the automaton has less states + max_length_of_words = underapproximating_length; + } } std::vector> new_cases; - for (auto word : aut_valid_part.get_words(aut_valid_part.num_of_states())) { + for (auto word : aut_valid_part->get_words(max_length_of_words)) { for (const auto& old_case : cases) { std::vector new_case = old_case; new_case.push_back(word); @@ -853,7 +874,7 @@ namespace smt::noodler { if (word_of_subst_var.size() == 1) { // |w_i| = 1 mata::Symbol code_point = word_of_subst_var[0]; // this must be a code point of a digit, as w_i can only contain digits formula_for_case.succ.emplace_back(LenFormulaType::EQ, std::vector{ code_version_of(subst_var), code_point }); - } // "else" part is not needed, that should be solved by setting "|s_i| = |w_i|" and by using the forula from function get_formula_for_code_subst_vars() + } // "else" part is not needed, that should be solved by setting "|s_i| = |w_i|" and by using the formula from function get_formula_for_code_subst_vars() } // add w_i to the end of w @@ -867,7 +888,7 @@ namespace smt::noodler { } - return result; + return {result, res_precision}; } /** @@ -920,13 +941,14 @@ namespace smt::noodler { * - we do the same thing as to_int, but instead of 'i == to_int(w)' in (2) for non-valid w, we put 'i < 0' * - we assume (as in from_code) that s is restricted to only possible outputs of from_int, mainly that w cannot start with 0 and the only non-valid w is empty string */ - LenNode DecisionProcedure::get_formula_for_conversions() { + std::pair DecisionProcedure::get_formula_for_conversions() { STRACE("str-conversion", tout << "Creating formula for conversions" << std::endl; ); // the resulting formula LenNode result(LenFormulaType::AND); + LenNodePrecision res_precision = LenNodePrecision::PRECISE; // collect all code variables, i.e. those that substitute string variables used in to_code/from_code predicates std::set code_subst_vars = get_vars_substituted_in_code_conversions(); @@ -949,7 +971,11 @@ namespace smt::noodler { case ConversionType::TO_INT: case ConversionType::FROM_INT: { - result.succ.push_back(get_formula_for_int_conversion(conv, code_subst_vars)); + auto int_conv_formula_with_precision = get_formula_for_int_conversion(conv, code_subst_vars); + result.succ.push_back(int_conv_formula_with_precision.first); + if (int_conv_formula_with_precision.second != LenNodePrecision::PRECISE) { + res_precision = int_conv_formula_with_precision.second; + } break; } default: @@ -960,7 +986,7 @@ namespace smt::noodler { STRACE("str-conversion", tout << "Formula for conversions: " << result << std::endl; ); - return result; + return {result, res_precision}; } /** @@ -1018,6 +1044,12 @@ namespace smt::noodler { lbool DecisionProcedure::preprocess(PreprocessType opt, const BasicTermEqiv &len_eq_vars) { FormulaPreprocessor prep_handler{std::move(this->formula), std::move(this->init_aut_ass), std::move(this->init_length_sensitive_vars), m_params}; + // we collect variables used in conversions, some preprocessing rules cannot be applied for them + std::unordered_set conv_vars; + for (const auto &conv : conversions) { + conv_vars.insert(conv.string_var); + } + // So-far just lightweight preprocessing prep_handler.remove_trivial(); prep_handler.reduce_diseqalities(); @@ -1038,7 +1070,7 @@ namespace smt::noodler { prep_handler.propagate_variables(); prep_handler.propagate_eps(); prep_handler.infer_alignment(); - prep_handler.remove_regular(); + prep_handler.remove_regular(conv_vars); // Skip_len_sat is not compatible with not(contains) and conversions as the preprocessing may skip equations with variables // inside not(contains)/conversion. (Note that if opt == PreprocessType::UNDERAPPROX, there is no not(contains)). if(this->not_contains.get_predicates().empty() && this->conversions.empty()) { @@ -1050,7 +1082,7 @@ namespace smt::noodler { prep_handler.reduce_diseqalities(); prep_handler.remove_trivial(); prep_handler.reduce_regular_sequence(3); - prep_handler.remove_regular(); + prep_handler.remove_regular(conv_vars); // the following should help with Leetcode /// TODO: should be simplyfied? So many preprocessing steps now @@ -1067,18 +1099,20 @@ namespace smt::noodler { prep_handler.common_prefix_propagation(); prep_handler.propagate_variables(); prep_handler.generate_identities(); - prep_handler.remove_regular(); + prep_handler.remove_regular(conv_vars); prep_handler.propagate_variables(); // underapproximation if(opt == PreprocessType::UNDERAPPROX) { prep_handler.underapprox_languages(); prep_handler.skip_len_sat(); prep_handler.reduce_regular_sequence(3); - prep_handler.remove_regular(); + prep_handler.remove_regular(conv_vars); prep_handler.skip_len_sat(); } prep_handler.reduce_regular_sequence(1); - prep_handler.remove_regular(); + prep_handler.remove_regular(conv_vars); + + prep_handler.conversions_validity(conversions); // Refresh the instance this->formula = prep_handler.get_modified_formula(); diff --git a/src/smt/theory_str_noodler/decision_procedure.h b/src/smt/theory_str_noodler/decision_procedure.h index 2cd89f82465..0d71c66c8ac 100644 --- a/src/smt/theory_str_noodler/decision_procedure.h +++ b/src/smt/theory_str_noodler/decision_procedure.h @@ -22,6 +22,15 @@ namespace smt::noodler { UNDERAPPROX }; + /** + * @brief Length formula precision + */ + enum struct LenNodePrecision { + PRECISE, + UNDERAPPROX, + OVERAPPROX, + }; + /** * @brief Get the value of the symbol representing all symbols not ocurring in the formula (i.e. a minterm) * @@ -81,8 +90,11 @@ namespace smt::noodler { * Get length constraints for the solution. Assumes that we have some solution from * running compute_next_solution(), the solution is actually solution if the length * constraints hold. + * + * The second element of the resulting pair marks whether the lennode is precise or + * over/underapproximation. */ - virtual LenNode get_lengths() { + virtual std::pair get_lengths() { throw std::runtime_error("Unimplemented"); } @@ -301,7 +313,7 @@ namespace smt::noodler { /** * @brief Gets the formula encoding to_code/from_code/to_int/from_int conversions */ - LenNode get_formula_for_conversions(); + std::pair get_formula_for_conversions(); /** * Returns the code var version of @p var used to encode to_code/from_code in get_formula_for_conversions @@ -348,8 +360,11 @@ namespace smt::noodler { /** * @brief Get the formula encoding to_int/from_int conversion + * + * @param underapproximating_length For the case that we need to underapproximate, this variable sets + * the length up to which we underapproximate */ - LenNode get_formula_for_int_conversion(const TermConversion& conv, const std::set& code_subst_vars); + std::pair get_formula_for_int_conversion(const TermConversion& conv, const std::set& code_subst_vars, const unsigned underapproximating_length = 3); /** * Formula containing all not_contains predicate (nothing else) @@ -372,7 +387,7 @@ namespace smt::noodler { lbool can_unify_not_contains(const FormulaPreprocessor& prep); public: - + /** * Initialize a new decision procedure that can solve word equations * (equalities of concatenations of string variables) with regular constraints @@ -417,7 +432,7 @@ namespace smt::noodler { LenNode get_initial_lengths() override; - LenNode get_lengths() override; + std::pair get_lengths() override; }; } diff --git a/src/smt/theory_str_noodler/formula_preprocess.cpp b/src/smt/theory_str_noodler/formula_preprocess.cpp index c326caf99f3..1098c010db9 100644 --- a/src/smt/theory_str_noodler/formula_preprocess.cpp +++ b/src/smt/theory_str_noodler/formula_preprocess.cpp @@ -298,12 +298,14 @@ namespace smt::noodler { } /** - * @brief Iteratively remove regular predicates. A regular predicate is of the form X = X_1 X_2 ... X_n where + * Iteratively remove regular predicates. A regular predicate is of the form X = X_1 X_2 ... X_n where * X_1 ... X_n does not occurr elsewhere in the system. Formally, L = R is regular if |L| = 1 and each variable * from Vars(R) has a single occurrence in the system only. Regular predicates can be removed from the system * provided A(X) = A(X) \cap A(X_1).A(X_2)...A(X_n) where A(X) is the automaton assigned to variable X. + * + * @param disallowed_vars - if any of these var occurs in equation, it cannot be removed */ - void FormulaPreprocessor::remove_regular() { + void FormulaPreprocessor::remove_regular(const std::unordered_set& disallowed_vars) { std::vector> regs; this->formula.get_side_regulars(regs); std::deque> worklist(regs.begin(), regs.end()); @@ -312,8 +314,15 @@ namespace smt::noodler { std::pair pr = worklist.front(); worklist.pop_front(); + STRACE("str-prep-remove_regular", tout << "Remove regular:" << pr.second << std::endl;); + assert(pr.second.get_left_side().size() == 1); + bool contains_disallowed = !set_disjoint(disallowed_vars, pr.second.get_vars()); + if (contains_disallowed) { + continue; + } + // if right side contains len vars (except when we have X = Y), we must do splitting => cannot remove bool is_right_side_len = !set_disjoint(this->len_variables, pr.second.get_side_vars(Predicate::EquationSideType::Right)); if(pr.second.get_side_vars(Predicate::EquationSideType::Right).size() > 1 && is_right_side_len) { @@ -332,6 +341,7 @@ namespace smt::noodler { } this->formula.remove_predicate(pr.first); + STRACE("str-prep-remove_regular", tout << "removed" << std::endl;); // check if by removing the regular equation, some other equations did not become regular // we only need to check this for left_var, as the variables from the right side do not occur @@ -1520,4 +1530,26 @@ namespace smt::noodler { return can_unify(left, right, check); } + + /** + * @brief Adds restrictions from conversions to the len_formula, so that (underapproximating) unsat check can be better + * + * Specifically, it checks if for to_code(x)/to_int(x) there is any valid word in the language of automaton for x, i.e, + * some one-symbol word for to_code(x) or some word containing only digits for to_int(x). + * + * @param conversions + */ + void FormulaPreprocessor::conversions_validity(std::vector& conversions) { + mata::nfa::Nfa sigma_aut = aut_ass.sigma_automaton(); + mata::nfa::Nfa only_digits_aut = AutAssignment::digit_automaton(); + + for (const auto& conv : conversions) { + if ((conv.type == ConversionType::TO_CODE && mata::nfa::reduce(mata::nfa::intersection(sigma_aut, *aut_ass.at(conv.string_var))).is_lang_empty()) || + (conv.type == ConversionType::TO_INT && mata::nfa::reduce(mata::nfa::intersection(only_digits_aut, *aut_ass.at(conv.string_var))).is_lang_empty())) + { + len_formula.succ.emplace_back(LenFormulaType::EQ, std::vector{conv.int_var, -1}); + } + } + } + } // Namespace smt::noodler. diff --git a/src/smt/theory_str_noodler/formula_preprocess.h b/src/smt/theory_str_noodler/formula_preprocess.h index dd8b7ac3c50..c40f65a6f4f 100644 --- a/src/smt/theory_str_noodler/formula_preprocess.h +++ b/src/smt/theory_str_noodler/formula_preprocess.h @@ -58,10 +58,16 @@ namespace smt::noodler { template bool set_disjoint(const std::unordered_set& t1, const std::set& t2) { - std::set inter; - for(const auto& t : t2) { - if(t1.find(t) != t1.end()) - return false; + if (t1.size() < t2.size()) { + for(const auto& t : t1) { + if(t2.contains(t)) + return false; + } + } else { + for(const auto& t : t2) { + if(t1.contains(t)) + return false; + } } return true; } @@ -357,7 +363,7 @@ namespace smt::noodler { Formula get_modified_formula() const; - void remove_regular(); + void remove_regular(const std::unordered_set& disallowed_vars); void propagate_variables(); void propagate_eps(); void generate_identities(); @@ -377,6 +383,8 @@ namespace smt::noodler { bool contains_unsat_eqs_or_diseqs(); bool can_unify_contain(const Concat& left, const Concat& right) const; + void conversions_validity(std::vector& conversions); + /** * @brief Replace all occurrences of find with replace. Warning: do not modify the automata assignment. * diff --git a/src/smt/theory_str_noodler/nielsen_decision_procedure.cpp b/src/smt/theory_str_noodler/nielsen_decision_procedure.cpp index 5321adbb8b7..14f9e1c719e 100644 --- a/src/smt/theory_str_noodler/nielsen_decision_procedure.cpp +++ b/src/smt/theory_str_noodler/nielsen_decision_procedure.cpp @@ -10,9 +10,9 @@ namespace smt::noodler { - LenNode NielsenDecisionProcedure::get_lengths() { + std::pair NielsenDecisionProcedure::get_lengths() { STRACE("str", tout << "Nielsen lengths: " << length_formula_for_solution << std::endl;); - return std::move(length_formula_for_solution); + return {std::move(length_formula_for_solution), LenNodePrecision::PRECISE}; } /** diff --git a/src/smt/theory_str_noodler/nielsen_decision_procedure.h b/src/smt/theory_str_noodler/nielsen_decision_procedure.h index 8ada9b6102f..7b2712c199a 100644 --- a/src/smt/theory_str_noodler/nielsen_decision_procedure.h +++ b/src/smt/theory_str_noodler/nielsen_decision_procedure.h @@ -344,7 +344,7 @@ namespace smt::noodler { LenNode get_initial_lengths() override { return LenNode(LenFormulaType::TRUE); } - LenNode get_lengths() override; + std::pair get_lengths() override; void init_computation() override; lbool preprocess(PreprocessType opt = PreprocessType::PLAIN, const BasicTermEqiv &len_eq_vars = {}) override; diff --git a/src/smt/theory_str_noodler/theory_str_noodler.cpp b/src/smt/theory_str_noodler/theory_str_noodler.cpp index 883497fece0..66a832ed230 100644 --- a/src/smt/theory_str_noodler/theory_str_noodler.cpp +++ b/src/smt/theory_str_noodler/theory_str_noodler.cpp @@ -492,6 +492,8 @@ namespace smt::noodler { expr_ref l{get_enode(x)->get_expr(), m}; expr_ref r{get_enode(y)->get_expr(), m}; + STRACE("str", tout << "new_eq: " << l << " = " << r << std::endl;); + app* equation = m.mk_eq(l, r); // TODO explain what is happening here @@ -526,8 +528,6 @@ namespace smt::noodler { } } } - - STRACE("str", tout << "new_eq: " << l << " = " << r << std::endl;); } void theory_str_noodler::new_diseq_eh(theory_var x, theory_var y) { @@ -590,6 +590,7 @@ namespace smt::noodler { m_word_diseq_todo.push_scope(); m_membership_todo.push_scope(); m_not_contains_todo.push_scope(); + m_conversion_todo.push_scope(); STRACE("str", tout << "push_scope: " << m_scope_level << '\n';); } @@ -603,6 +604,7 @@ namespace smt::noodler { m_word_diseq_todo.pop_scope(num_scopes); m_membership_todo.pop_scope(num_scopes); m_not_contains_todo.pop_scope(num_scopes); + m_conversion_todo.pop_scope(num_scopes); m_rewrite.reset(); STRACE("str", tout << "pop_scope: " << num_scopes << " (back to level " << m_scope_level << ")\n";); @@ -876,23 +878,37 @@ namespace smt::noodler { dec_proc.init_computation(); expr_ref block_len(m.mk_false(), m); + bool was_something_approximated = false; while (true) { result = dec_proc.compute_next_solution(); if (result == l_true) { - lengths = len_node_to_z3_formula(dec_proc.get_lengths()); - if (check_len_sat(lengths) == l_true) { + auto [noodler_lengths, precision] = dec_proc.get_lengths(); + lengths = len_node_to_z3_formula(noodler_lengths); + lbool is_lengths_sat = check_len_sat(lengths); + + if (is_lengths_sat == l_true && precision != LenNodePrecision::OVERAPPROX) { STRACE("str", tout << "len sat " << mk_pp(lengths, m) << std::endl;); // save the current assignment to catch it during the loop protection block_curr_len(lengths, true, false); return FC_DONE; - } else { + } else if (is_lengths_sat == l_false && precision != LenNodePrecision::UNDERAPPROX) { + // TODO is handling underapprox correct here? is it even safe to underapproximate? we do not have a case where we underapproximate, but for the future STRACE("str", tout << "len unsat " << mk_pp(lengths, m) << std::endl;); block_len = m.mk_or(block_len, lengths); + } else { + was_something_approximated = true; } } else if (result == l_false) { // we did not find a solution (with satisfiable length constraints) // we need to block current assignment STRACE("str", tout << "assignment unsat " << mk_pp(block_len, m) << std::endl;); + + if (was_something_approximated) { + // if some length formula was an approximation and it did not lead to solution, we have to give up + STRACE("str", tout << "there was approximating - giving up" << std::endl); + return FC_GIVEUP; + } + if(m.is_false(block_len)) { block_curr_len(block_len, false, true); } else { diff --git a/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp b/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp index f9e9cf25582..2b4e26f772b 100644 --- a/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp +++ b/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp @@ -235,7 +235,7 @@ namespace smt::noodler { dec_proc.init_computation(); while(dec_proc.compute_next_solution() == l_true) { - expr_ref lengths = len_node_to_z3_formula(dec_proc.get_lengths()); + expr_ref lengths = len_node_to_z3_formula(dec_proc.get_lengths().first); if(check_len_sat(lengths) == l_true) { return l_true; } @@ -475,7 +475,7 @@ namespace smt::noodler { while (true) { lbool result = nproc.compute_next_solution(); if (result == l_true) { - expr_ref lengths = len_node_to_z3_formula(nproc.get_lengths()); + expr_ref lengths = len_node_to_z3_formula(nproc.get_lengths().first); if (check_len_sat(lengths) == l_true) { return l_true; } else {