Skip to content
Snippets Groups Projects
Commit 12bd92ab authored by Tom Kranz's avatar Tom Kranz
Browse files

Add language intersection to NFA builder

parent d0abf52c
No related branches found
No related tags found
No related merge requests found
......@@ -282,6 +282,20 @@ bool nfa::isAccepting(size_t q) const {
return p->accepting[q];
}
/// Tests whether a set of states contains an accept state within this NFA.
/**
* @param qs valarray with `true` at indices of states in question (the `size` of which == @ref getNumberOfStates)
* @return `true` if the set of states contains an accept states, `false` else
*/
bool nfa::hasAccepting(valarray<bool> const& qs) const {
for (size_t q = 0; q < getNumberOfStates(); q++) {
if (qs[q] && p->accepting[q]) {
return true;
}
}
return false;
}
/// Copy-constructs an NFA by copying another one&apos;s [private implementation object](@ref pImpl).
nfa::nfa(nfa const& n) : p(new pImpl(*(n.p))) {}
......@@ -315,12 +329,14 @@ nfa& nfa::operator=(nfa&& n) {
nfa::~nfa() = default;
using Ntransitionmap = unordered_map<string, unordered_map<char32_t, unordered_set<string>>>;
/// Private implementation details of NFA builders.
struct nfa::builder::pImpl {
string initial; ///< Name of the prospective NFA's initial state.
unordered_set<string> acceptingStates; ///< Set of names of the prospective NFA's accept states.
unordered_set<char32_t> alphabet; ///< Set of symbols processable by the prospective NFA.
unordered_map<string,unordered_map<char32_t,unordered_set<string>>> transitions;
Ntransitionmap transitions;
///< Transition function (_state &times; symbol &rarr; set of states_) of the prospective NFA.
/**< Also encodes the set of states in its set of keys. */
......@@ -332,7 +348,7 @@ struct nfa::builder::pImpl {
string& initial,
unordered_set<string>& acceptingStates,
unordered_set<char32_t>& alphabet,
unordered_map<string,unordered_map<char32_t,unordered_set<string>>>& transitions
Ntransitionmap& transitions
) :
initial(move(initial)),
acceptingStates(move(acceptingStates)),
......@@ -373,7 +389,7 @@ nfa::builder::builder(dfa const& dfa) {
string initial(dfa.getLabelOf(0));
unordered_set<string> acceptingStates;
unordered_set<char32_t> alphabet(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
unordered_map<string,unordered_map<char32_t,unordered_set<string>>> transitions;
Ntransitionmap transitions;
for (size_t q(0); q < dfa.getNumberOfStates(); q++) {
for (char32_t symbol : alphabet) {
transitions[dfa.getLabelOf(q)][symbol].insert(dfa.getLabelOf(dfa.delta(q, symbol)));
......@@ -488,7 +504,7 @@ nfa::builder& nfa::builder::normalizeStateNames(string const& prefix) {
stateNames.push_back(fromTo.first);
}
}
unordered_map<string,unordered_map<char32_t,unordered_set<string>>> newTr(p->transitions.size());
Ntransitionmap newTr(p->transitions.size());
unordered_set<string> newAcc(p->acceptingStates.size());
for (size_t q = 0; q < stateNames.size(); q++) {
auto const& vias = p->transitions[stateNames[q]];
......@@ -525,7 +541,7 @@ nfa::builder& nfa::builder::normalizeStateNames(string const& prefix) {
nfa::builder& nfa::builder::unite(nfa const& other) {
string newInitial("q0");
if (!p->transitions.empty()) {
unordered_map<string,unordered_map<char32_t,unordered_set<string>>> tempTr(p->transitions.size());
Ntransitionmap tempTr(p->transitions.size());
for (auto const& fromVia : p->transitions) {
unordered_map<char32_t,unordered_set<string>> tempVia(fromVia.second.size());
for (auto const& viaTo : fromVia.second) {
......@@ -573,6 +589,86 @@ nfa::builder& nfa::builder::unite(nfa const& other) {
return *this;
}
/// Makes the prospective NFA accept only words accepted also by another NFA.
/**
* Concatenates the names of states defined so far with the other NFA's state names
* and resolves conflicts by appending `_`.
* @param other the NFA whose language should also be accepted
* @return reference to this object, for chaining operations
*/
nfa::builder& nfa::builder::intersect(nfa const& other) {
if (!p->transitions.empty()) {
vector<string> stateNames;
stateNames.reserve(p->transitions.size());
stateNames.push_back(p->initial);
for (auto const& fromTo : p->transitions) {
if (fromTo.first != p->initial) {
stateNames.push_back(fromTo.first);
}
}
auto const& oAlph = other.getAlphabet();
size_t commonSymbols = 0;
for (char32_t symbol : p->alphabet) {
if (index_of(oAlph, symbol) < oAlph.size()) {
commonSymbols++;
} else {
for (auto & fromVia : p->transitions) {
fromVia.second.erase(symbol);
}
}
}
p->alphabet.insert(oAlph.begin(), oAlph.end());
Ntransitionmap newTr(stateNames.size() * other.getNumberOfStates());
unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
for (size_t q = 0; q < stateNames.size(); q++) {
for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
string potentialName = stateNames[q] + other.getLabelOf(qq);
for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
potentialName.append("_");
}
auto nameIt = newTr.emplace(potentialName, commonSymbols);
pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.isAccepting(qq)) {
newAcc.insert(potentialName);
}
}
p->transitions[stateNames[q]][U'\0'].insert(stateNames[q]);
// Needed due to the equivalence of standing still to “taking” an ε-transition.
}
for (size_t q = 0; q < stateNames.size(); q++) {
auto const& qLabel = stateNames[q];
auto const& viaTos = p->transitions[qLabel];
for (auto const& viaTo : viaTos) {
for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
valarray<bool> const& reached = other.delta(qq, viaTo.first);
for (auto const& to : viaTo.second) {
size_t p = index_of(stateNames, to);
for (size_t pp = 0; pp < reached.size(); pp++) {
if (reached[pp] || (pp == qq && viaTo.first == U'\0')) {
// Needed due to the equivalence of standing still to “taking” an ε-transition.
newTr[*(pairToName[q][qq])][viaTo.first].insert(*(pairToName[p][pp]));
}
}
}
}
}
}
for (auto & fromVia : newTr) {
auto const& from = fromVia.first;
auto to = fromVia.second.find(U'\0');
to->second.erase(from);
if (to->second.empty()) {
fromVia.second.erase(to);
}
}
p->transitions = newTr;
p->acceptingStates = newAcc;
p->initial = *(pairToName[0][0]);
}
return *this;
}
/// Builds the NFA, as defined by previous operations.
/**
* @return an NFA object with exactly the states, alphabet and transitions that were defined
......
......@@ -47,6 +47,7 @@ public:
std::vector<std::string> const& getUtf8Alphabet() const;
size_t getNumberOfStates() const;
bool isAccepting(size_t q) const;
bool hasAccepting(std::valarray<bool> const& qs) const;
/// Constructs NFAs step by step.
/**
* Any mention of a symbol or state will add them to the alphabet/set of states.
......@@ -70,6 +71,7 @@ public:
builder& addTransition(std::string const& from, std::string const& to, char32_t symbol);
builder& addTransition(std::string const& from, std::string const& to, std::string const& utf8Symbol);
builder& unite(nfa const& other);
builder& intersect(nfa const& other);
builder& normalizeStateNames(std::string const& prefix);
nfa build();
private:
......
......@@ -162,6 +162,91 @@ TEST(NfaTest, UnionTest) {
}
}
TEST(NfaTest, IntersectionTest) {
using namespace reg;
nfa::builder b;
b.addTransition("0", "00", "");
b.addTransition("0", "00000", "");
b.addTransition("00", "00", "1");
b.addTransition("00", "00", "0");
b.addTransition("00", "000", "0");
b.addTransition("000", "0000", "0");
b.addTransition("0000", "0000", "0");
b.addTransition("0000", "0000", "1");
b.addTransition("0000", "00000", "");
b.setAccepting("00000", true);
nfa::builder c;
c.addTransition("0", "00", "");
c.addTransition("0", "00000", "");
c.addTransition("00", "00", "1");
c.addTransition("00", "00", "0");
c.addTransition("00", "000", "1");
c.addTransition("000", "0000", "1");
c.addTransition("0000", "0000", "0");
c.addTransition("0000", "0000", "1");
c.addTransition("0000", "00000", "");
c.setAccepting("00000", true);
nfa m(c);
size_t acc = 0;
for (size_t q = 0; q < m.getNumberOfStates(); q++) {
if (m.isAccepting(q)) {
acc = q;
break;
}
}
ASSERT_EQ(m.getLabelOf(0), "0");
ASSERT_EQ(m.getLabelOf(acc), "00000");
std::valarray<bool> reached = m.deltaHat(0, "");
for (size_t q = 0; q < m.getNumberOfStates(); q++) {
ASSERT_EQ(reached[q], q == 0 || q == acc || m.getLabelOf(q) == "00");
}
reached = m.deltaHat(0, "1100");
ASSERT_TRUE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "0011");
ASSERT_TRUE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "11100");
ASSERT_TRUE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "00111");
ASSERT_TRUE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "11");
ASSERT_TRUE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "00");
ASSERT_FALSE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "1");
ASSERT_FALSE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "0");
ASSERT_FALSE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "10");
ASSERT_FALSE(m.hasAccepting(reached)) << m.getLabelOf(reached);
reached = m.deltaHat(0, "01");
ASSERT_FALSE(m.hasAccepting(reached)) << m.getLabelOf(reached);
b.intersect(c.build());
nfa n(b);
EXPECT_EQ(n.getNumberOfStates(), 25);
reached = n.deltaHat(0, "");
EXPECT_TRUE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "1100");
EXPECT_TRUE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "0011");
EXPECT_TRUE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "11100");
EXPECT_TRUE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "00111");
EXPECT_TRUE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "11");
EXPECT_FALSE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "00");
EXPECT_FALSE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "1");
EXPECT_FALSE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "0");
EXPECT_FALSE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "10");
EXPECT_FALSE(n.hasAccepting(reached)) << n.getLabelOf(reached);
reached = n.deltaHat(0, "01");
EXPECT_FALSE(n.hasAccepting(reached)) << n.getLabelOf(reached);
}
TEST(DfaTest, SimplestBuilderTest) {
using namespace reg;
dfa::builder b;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment