Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple regex membership heuristic #139

Merged
merged 11 commits into from
May 1, 2024
29 changes: 29 additions & 0 deletions src/smt/theory_str_noodler/regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,17 @@ namespace smt::noodler::regex {
// intermediate automata reduction
// if the automaton is too big --> skip it. The computation of the simulation would be too expensive.
if(nfa.num_of_states() < RED_BOUND) {
STRACE("str-create_nfa-reduce",
tout << "--------------" << "NFA for: " << mk_pp(const_cast<app*>(expression), const_cast<ast_manager&>(m)) << " that is going to be reduced" << "---------------" << std::endl;
nfa.print_to_DOT(tout);
);
nfa = mata::nfa::reduce(nfa);
}
if(determinize) {
STRACE("str-create_nfa-reduce",
tout << "--------------" << "NFA for: " << mk_pp(const_cast<app*>(expression), const_cast<ast_manager&>(m)) << " that is going to be minimized" << "---------------" << std::endl;
nfa.print_to_DOT(tout);
);
nfa = mata::nfa::minimize(nfa);
}

Expand Down Expand Up @@ -472,4 +480,25 @@ namespace smt::noodler::regex {

return nfa;
}

unsigned get_loop_sum(const app* reg, const seq_util& m_util_s) {
expr* body;
unsigned lo, hi;
if (m_util_s.re.is_loop(reg, body, lo, hi)) {
unsigned body_loop = get_loop_sum(to_app(body), m_util_s);
if (body_loop == 0) {
return hi;
} else {
return hi*body_loop;
}
} else if (m_util_s.str.is_string(reg)) {
return 0;
} else {
unsigned sum = 0;
for (unsigned arg_num = 0; arg_num < reg->get_num_args(); ++arg_num) {
sum += get_loop_sum(to_app(reg->get_arg(arg_num)), m_util_s);
}
return sum;
}
}
}
9 changes: 9 additions & 0 deletions src/smt/theory_str_noodler/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,15 @@ namespace smt::noodler::regex {
* @return mata::nfa::Nfa NFA
*/
mata::nfa::Nfa create_large_concat(const mata::nfa::Nfa& body_nfa, unsigned count);

/**
* @brief Get the sum of loops of a regex (loop inside a loop is multiplied)
*
* @param reg some regular expression predicate (can be also string literal/var)
* @param m_util_s string ast util
* @return sum of loops inside @p regex, with nested loops multiplied
*/
unsigned get_loop_sum(const app* reg, const seq_util& m_util_s);
}

#endif
17 changes: 13 additions & 4 deletions src/smt/theory_str_noodler/theory_str_noodler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,10 +775,6 @@ namespace smt::noodler {
}
);

bool contains_word_equations = !this->m_word_eq_todo_rel.empty();
bool contains_word_disequations = !this->m_word_diseq_todo_rel.empty();
bool contains_conversions = !this->m_conversion_todo.empty();

// Solve Language (dis)equations
if (!solve_lang_eqs_diseqs()) {
// one of the (dis)equations is unsat
Expand All @@ -799,6 +795,10 @@ namespace smt::noodler {
}
}

bool contains_word_equations = !this->m_word_eq_todo_rel.empty();
bool contains_word_disequations = !this->m_word_diseq_todo_rel.empty();
bool contains_conversions = !this->m_conversion_todo.empty();

// As a heuristic, for the case we have exactly one constraint, which is of type 'x notin RE', we use universality
// checking instead of constructing the automaton for complement of RE. The complement can sometimes blow up, so
// universality checking should be faster.
Expand All @@ -811,6 +811,15 @@ namespace smt::noodler {
}
}

if (is_mult_membership_suitable()) {
lbool result = run_mult_membership_heur();
if(result == l_true) {
return FC_DONE;
} else if(result == l_false) {
return FC_CONTINUE;
}
}

// Gather relevant word (dis)equations to noodler formula
Formula instance = get_word_formula_from_relevant();
STRACE("str",
Expand Down
19 changes: 19 additions & 0 deletions src/smt/theory_str_noodler/theory_str_noodler.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,16 @@ namespace smt::noodler {
*/
bool is_underapprox_suitable(const Formula& instance, const AutAssignment& aut_ass, const std::vector<TermConversion>& convs) const;

/**
* @brief Checks if the relevant predicates are suitable for multiple membership heuristic
*
* Multiple membership heuristic is used when we have formula containing only memberships.
* We then only need to compute intersection from the regular languages and check if it is not empty.
* The heuristics sorts the regexes by expected complexity of computing nfa, and iteratively computes
* the intersection, so that if the formula is unsat, we do not need to build all automata.
*/
bool is_mult_membership_suitable();

/**
* @brief Wrapper for running the Nielsen transformation.
*
Expand All @@ -385,6 +395,15 @@ namespace smt::noodler {
*/
lbool run_membership_heur();

/**
* @brief Wrapper for running the mulitple membership query heuristics.
*
* Check is_mult_membership_suitable() for explanation.
*
* @return lbool Outcome of the heuristic procedure.
*/
lbool run_mult_membership_heur();

/**
* @brief Wrapper for running the loop protection.
*
Expand Down
106 changes: 106 additions & 0 deletions src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,112 @@ namespace smt::noodler {
return l_undef;
}

bool theory_str_noodler::is_mult_membership_suitable() {
if (!this->m_conversion_todo.empty() || !this->m_not_contains_todo_rel.empty()) {
return false;
}

// TODO handle (string_literal in RE) and also length vars, instead of giving up
for (const auto& membership: m_membership_todo_rel) {
if(m_util_s.str.is_string(to_app(std::get<0>(membership))) || len_vars.contains(std::get<0>(membership))) {
return false;
}
}

// we now check if we only have "trivial" (dis)equations, i.e. they can only be of form
// x == str_literal or x != str_literal
// i.e., one variable on the left, a string literal on the right

for (const auto& diseq : m_word_diseq_todo_rel) {
if (!(util::is_str_variable(diseq.first, m_util_s) && m_util_s.str.is_string(diseq.second))
|| len_vars.contains(diseq.first)) // TODO handle length vars in run_mult_membership_heur() by using the lengths from the final intersection automaton
{
return false;
}
}

for (const auto& eq : m_word_eq_todo_rel) {
if (!(util::is_str_variable(eq.first, m_util_s) && m_util_s.str.is_string(eq.second))
|| len_vars.contains(eq.first)) // TODO handle length vars in run_mult_membership_heur() by using the lengths from the final intersection automaton
{
return false;
}
}

return true;
}

lbool theory_str_noodler::run_mult_membership_heur() {
STRACE("str", tout << "trying multiple regex membership heuristic" << std::endl;);

regex::Alphabet alph(get_symbols_from_relevant());
// to each var x we map all the regexes RE where we have (x in RE) + bool that is true if it is (x not in RE)
std::map<BasicTerm, std::vector<std::pair<bool,app*>>> var_to_list_of_regexes_and_complement_flag;

// collect from relevant memberships
for (const auto &membership: m_membership_todo_rel) {
BasicTerm var(BasicTermType::Variable, to_app(std::get<0>(membership))->get_decl()->get_name().str());
app* reg = to_app(std::get<1>(membership));
var_to_list_of_regexes_and_complement_flag[var].push_back(std::make_pair(!std::get<2>(membership), reg));
}

// we assume that is_mult_membership_heur was run before, therefore we have only disequations
// x != str_literal
// i.e., one var on left and some string literal on right, we can replace this with (x not in {str_literal})
for (const auto& diseq : m_word_diseq_todo_rel) {
BasicTerm var(BasicTermType::Variable, to_app(diseq.first)->get_decl()->get_name().str());
app* reg = to_app(diseq.second);
var_to_list_of_regexes_and_complement_flag[var].push_back(std::make_pair(true, reg));
}

// we assume that is_mult_membership_heur was run before, therefore we have only equations
// x == str_literal
// i.e., one var on left and some string literal on right, we can replace this with (x in {str_literal})
for (const auto& eq : m_word_eq_todo_rel) {
BasicTerm var(BasicTermType::Variable, to_app(eq.first)->get_decl()->get_name().str());
app* reg = to_app(eq.second);
var_to_list_of_regexes_and_complement_flag[var].push_back(std::make_pair(false, reg));
}

for (auto& [var, list_of_regexes] : var_to_list_of_regexes_and_complement_flag) {
// sort the regexes using get_loop_sum, where those regexes that needs to be complemented should all be at the end
std::sort(list_of_regexes.begin(), list_of_regexes.end(), [this](const std::pair<bool,app*>& l, const std::pair<bool,app*>& r) {
return ((!l.first && r.first) | (regex::get_loop_sum(l.second, m_util_s) < regex::get_loop_sum(r.second, m_util_s)));
});
STRACE("str-mult-memb-heur",
tout << "Sorted NFAs for var " << var << std::endl;
unsigned i = 0;
for (const auto & [is_complement, nfa] : list_of_regexes) {
tout << i << " (" << (is_complement ? "" : "not ") <<"complemented):" << std::endl;
tout << nfa << std::endl;
}
);

std::shared_ptr<mata::nfa::Nfa> intersection = nullptr; // we save the intersected automata here
for (auto& [is_complement, reg] : list_of_regexes) {
STRACE("str", tout << "building intersection for var " << var << " and regex " << mk_pp(reg, m) << (is_complement ? " that needs to be first complemented" : " that does not need to be first complemented") << std::endl;);

std::shared_ptr<mata::nfa::Nfa> nfa = std::make_shared<mata::nfa::Nfa>(regex::conv_to_nfa(reg, m_util_s, m, alph, is_complement, is_complement));

if (intersection == nullptr) {
intersection = nfa; // this is first nfa
} else {
intersection = std::make_shared<mata::nfa::Nfa>(mata::nfa::reduce(mata::nfa::intersection(*nfa, *intersection)));
}
nfa = nullptr;

if (intersection->is_lang_empty()) {
STRACE("str", tout << "intersection is empty => UNSAT" << std::endl;);
block_curr_len(expr_ref(this->m.mk_false(), this->m));
return l_false;
}
}
}

STRACE("str", tout << "intersection is not empty => SAT" << std::endl;);
return l_true;
}

lbool theory_str_noodler::run_loop_protection() {
expr_ref refine = construct_refinement();
if (refine != nullptr) {
Expand Down