6 using std::make_unique;
12 #include <unordered_set> 13 using std::unordered_set;
15 #include <unordered_map> 16 using std::unordered_map;
50 vector<string>&
labels, valarray<bool>& acceptingStates) :
56 for (char32_t symbol : this->alphabet) {
57 if (symbol == U
'\0') {
82 nfa::nfa(vector<char32_t>& alphabet, vector<vector<valarray<bool>>>& transitions,
83 vector<string>& labels, valarray<bool>& acceptingStates) :
84 p(new pImpl(alphabet, transitions, labels, acceptingStates)) {}
88 return p->getEquivalentDfa(
this) == n.p->getEquivalentDfa(&n);
93 return p->getEquivalentDfa(
this) != n.p->getEquivalentDfa(&n);
102 valarray<bool>
const&
nfa::delta(
size_t q, char32_t symbol)
const {
103 size_t i =
index_of(p->alphabet, symbol);
104 if (i >= p->alphabet.size()) {
106 throw std::domain_error(
107 string(u8
"δ is not defined for symbol ") +
converter.to_bytes(symbol)
111 if (q >= p->labels.size()) {
113 throw std::out_of_range(
114 string(
"There is no state with index ").append(std::to_string(q))
118 return p->transitions[q][i];
122 valarray<bool>
const&
nfa::delta(
size_t q,
string const& utf8Symbol)
const {
133 valarray<bool> qs(p->labels.size());
149 valarray<bool>
nfa::deltaHat(valarray<bool>
const& qs, std::u32string
const& word)
const {
151 for (char32_t symbol : word) {
152 valarray<bool> reached(p->labels.size());
166 valarray<bool>
nfa::deltaHat(valarray<bool>
const& qs,
string const& utf8Word)
const {
184 if (p->accepting[0]) {
187 unordered_map<size_t, u32string> shortestWords(p->labels.size());
188 shortestWords.emplace(0, U
"");
190 for (
auto const& stateWord : shortestWords) {
191 for (
auto symbol : p->alphabet) {
192 valarray<bool> reached =
deltaHat(stateWord.first, u32string(!!symbol, symbol));
193 u32string shWord = stateWord.second + u32string(!!symbol, symbol);
194 for (
size_t q = 0; q < reached.size(); q++) {
if (reached[q]) {
195 if (p->accepting[q]) {
198 if (shortestWords.find(q) == shortestWords.end()) {
199 shortestWords.emplace(q, shWord);
218 if (p->epsClosures[q].size() == 0) {
219 p->epsClosures[q].resize(p->labels.size());
220 p->epsClosures[q][q] =
true;
224 valarray<bool> old(p->epsClosures[q]);
226 for (
bool qb : old) {
229 for (
bool pb : p->transitions[qi][0]) {
231 if (!p->epsClosures[q][pi]) {
233 p->epsClosures[q][pi] =
true;
243 return p->epsClosures[q];
252 valarray<bool> closure(p->labels.size());
282 label = label.append(1, label.length() == 0 ?
'{' :
',');
283 label = label.append(p->labels[qi]);
287 if (label.length() == 0) {
290 return label.append(1,
'}');
307 return p->utf8Alphabet;
315 return p->labels.size();
324 return p->accepting[q];
334 if (qs[q] && p->accepting[q]) {
410 p.reset(
new pImpl(*(n.p)));
425 nfa::~nfa() =
default;
427 using Ntransitionmap = unordered_map<string, unordered_map<char32_t, unordered_set<string>>>;
463 for (
size_t qi = 0; qi <
nfa.p->labels.size(); ++qi) {
465 for (char32_t symbol : p->alphabet) {
466 valarray<bool> ps =
nfa.
delta(qi, symbol);
485 unordered_set<string> acceptingStates;
487 Ntransitionmap transitions;
489 for (char32_t symbol : alphabet) {
496 p = make_unique<pImpl>(initial, acceptingStates, alphabet, transitions);
509 p.reset(
new pImpl(*(b.p)));
530 p->alphabet.insert(symbol);
536 return addSymbol(
converter.from_bytes(utf8Symbol)[0]);
546 if (p->transitions.empty()) {
549 p->transitions[state];
551 p->acceptingStates.insert(state);
553 p->acceptingStates.erase(state);
565 p->transitions[state];
577 if (p->transitions.empty()) {
581 p->transitions[from][symbol].insert(to);
588 return addTransition(from, to,
converter.from_bytes(utf8Symbol)[0]);
593 if (!p->transitions.empty()) {
594 vector<string> stateNames;
595 stateNames.reserve(p->transitions.size());
596 stateNames.push_back(p->initial);
597 for (
auto const& fromTo : p->transitions) {
598 if (fromTo.first != p->initial) {
599 stateNames.push_back(fromTo.first);
602 Ntransitionmap newTr(p->transitions.size());
603 unordered_set<string> newAcc(p->acceptingStates.size());
604 for (
size_t q = 0; q < stateNames.size(); q++) {
605 auto const& vias = p->transitions[stateNames[q]];
606 unordered_map<char32_t,unordered_set<string>> newVias(vias.size());
607 for (
auto const& viaTo : vias) {
608 unordered_set<string> newTos(viaTo.second.size());
609 for (
size_t p = 0; p < stateNames.size(); p++) {
610 if (viaTo.second.find(stateNames[p]) != viaTo.second.cend()) {
611 newTos.insert(
string(prefix).append(std::to_string(p)));
614 newVias.insert(make_pair(viaTo.first, newTos));
616 newTr.insert(make_pair(
string(prefix).append(std::to_string(q)), newVias));
617 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
618 newAcc.insert(
string(prefix).append(std::to_string(q)));
621 p->initial = string(
string(prefix).append(
"0"));
622 p->transitions = newTr;
623 p->acceptingStates = newAcc;
637 string newInitial(
"q0");
638 if (!p->transitions.empty()) {
639 Ntransitionmap tempTr(p->transitions.size());
640 for (
auto const& fromVia : p->transitions) {
641 unordered_map<char32_t,unordered_set<string>> tempVia(fromVia.second.size());
642 for (
auto const& viaTo : fromVia.second) {
643 unordered_set<string> tempTo(viaTo.second.size());
644 for (
auto const& to : viaTo.second) {
645 tempTo.insert(
string(to.length() + 1,
'1').replace(1, string::npos, to));
647 tempVia.insert(make_pair(viaTo.first, tempTo));
649 tempTr.insert(make_pair(
string(fromVia.first.length() + 1,
'1').replace(1, string::npos, fromVia.first), tempVia));
651 p->transitions = tempTr;
652 unordered_set<string> tempAcc(p->acceptingStates.size());
653 for (
auto const& acc : p->acceptingStates) {
654 tempAcc.insert(
string(acc.length() + 1,
'1').replace(1, string::npos, acc));
656 p->acceptingStates = tempAcc;
657 p->initial = string(p->initial.length() + 1,
'1').replace(1, string::npos, p->initial);
658 addTransition(newInitial, p->initial, U
'\0');
660 makeInitial(newInitial);
662 p->alphabet.insert(oAlphabet.cbegin(), oAlphabet.cend());
665 string newQLabel = string(qLabel.length() + 1,
'2').replace(1, string::npos, qLabel);
666 unordered_map<char32_t,unordered_set<string>> tempVia(oAlphabet.size() + 1);
667 for (char32_t symbol : oAlphabet) {
668 valarray<bool>
const& to = other.
delta(q, symbol);
672 tempTo.insert(
string(pLabel.length() + 1,
'2').replace(1, string::npos, pLabel));
674 tempVia.insert(make_pair(symbol, tempTo));
676 p->transitions.insert(make_pair(newQLabel, tempVia));
678 p->acceptingStates.insert(newQLabel);
681 addTransition(newInitial, newQLabel, U
'\0');
695 if (!p->transitions.empty()) {
696 vector<string> stateNames;
697 stateNames.reserve(p->transitions.size());
698 stateNames.push_back(p->initial);
699 for (
auto const& fromTo : p->transitions) {
700 if (fromTo.first != p->initial) {
701 stateNames.push_back(fromTo.first);
705 size_t commonSymbols = 0;
706 for (char32_t symbol : p->alphabet) {
707 if (
index_of(oAlph, symbol) < oAlph.size()) {
710 for (
auto & fromVia : p->transitions) {
711 fromVia.second.erase(symbol);
715 p->alphabet.insert(oAlph.begin(), oAlph.end());
718 unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.
getNumberOfStates());
719 for (
size_t q = 0; q < stateNames.size(); q++) {
721 string potentialName = stateNames[q] + other.
getLabelOf(qq);
722 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
723 potentialName.append(
"_");
725 auto nameIt = newTr.emplace(potentialName, commonSymbols);
726 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
727 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.
isAccepting(qq)) {
728 newAcc.insert(potentialName);
731 p->transitions[stateNames[q]][U
'\0'].insert(stateNames[q]);
734 for (
size_t q = 0; q < stateNames.size(); q++) {
735 auto const& qLabel = stateNames[q];
736 auto const& viaTos = p->transitions[qLabel];
737 for (
auto const& viaTo : viaTos) {
739 valarray<bool>
const& reached = other.
delta(qq, viaTo.first);
740 for (
auto const& to : viaTo.second) {
741 size_t p =
index_of(stateNames, to);
742 for (
size_t pp = 0; pp < reached.size(); pp++) {
743 if (reached[pp] || (pp == qq && viaTo.first == U
'\0')) {
745 newTr[*(pairToName[q][qq])][viaTo.first].insert(*(pairToName[p][pp]));
752 for (
auto & fromVia : newTr) {
753 auto const& from = fromVia.first;
754 auto to = fromVia.second.find(U
'\0');
755 to->second.erase(from);
756 if (to->second.empty()) {
757 fromVia.second.erase(to);
760 p->transitions = newTr;
761 p->acceptingStates = newAcc;
762 p->initial = *(pairToName[0][0]);
772 p->alphabet.insert(U
'\0');
773 vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
774 std::sort(alphabetV.begin(), alphabetV.end());
775 vector<string> labelV(p->transitions.size());
776 labelV[0] = p->initial;
777 valarray<bool> acceptingV(p->transitions.size());
778 acceptingV[0] = (p->acceptingStates.find(p->initial) != p->acceptingStates.end());
780 for (
auto entry : p->transitions) {
781 if (entry.first == p->initial) {
784 labelV[i] = entry.first;
786 p->acceptingStates.find(entry.first) != p->acceptingStates.end()
789 vector<vector<valarray<bool>>> transitionV(
790 p->transitions.size(),
791 vector<valarray<bool>>(
792 p->alphabet.size()+1,
793 valarray<bool>(labelV.size())
796 unordered_set<string> emptySet;
798 for (
auto const& q : labelV) {
799 unordered_map<char32_t,unordered_set<string>>
const& fromQ = p->transitions[q];
801 for (char32_t symbol : alphabetV) {
802 auto to = fromQ.find(symbol);
803 unordered_set<string>
const& viaSymbol(to != fromQ.end() ? to->second : emptySet);
805 for (
auto const& p : labelV) {
806 transitionV[qi][si][i++] = (viaSymbol.count(p) != 0);
812 return {alphabetV, transitionV, labelV, acceptingV};
815 nfa::builder::~builder() =
default;
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this DFA.
Ntransitionmap transitions
Transition function (state × symbol → set of states) of the prospective NFA.
nfa build()
Builds the NFA, as defined by previous operations.
static nfa::builder complement(nfa const &n)
Creates a builder for an NFA accepting the complement of the language of an NFA.
std::string getShortestUtf8Word() const
Same as above for a UTF-8-encoded word, INCLUDING POTENTIAL INFINITE LOOP.
size_t delta(size_t q, char32_t symbol) const
Computes this DFA's transition function for a state and a symbol.
Represents nondeterministic finite automata with ε-moves.
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Represents deterministic finite automata.
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this NFA's set of processable symbols as UTF-8-encoded strings.
vector< vector< valarray< bool > > > transitions
Stores the transition function as a table viz state index × symbol index → list of bools with true a...
std::valarray< bool > deltaHat(size_t q, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a specified ...
Constructs NFAs step by step.
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
bool operator==(nfa const &n) const
Tests whether this NFA accepts exactly the same language as another one.
pImpl()
Constructs private implementation object for an NFA accepting the empty language ∅.
vector< string > labels
Stores the names of states.
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this NFA.
static nfa::builder intersect(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the intersection of the languages of two NFAs.
builder & unite(nfa const &other)
Makes the prospective NFA also accept every word of another NFA's language.
nfa & operator=(nfa const &n)
Copy-assigns this NFA by copying another one's private implementation object.
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
string initial
Name of the prospective NFA's initial state.
bool operator!=(nfa const &n) const
Tests whether this NFA doesn't accept the same language as another one.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective NFA's alphabet.
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective NFA.
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA's state names to consecutive numbers, prefixed with a given string...
Contains the reg::nfa class definition.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Contains the reg::expression class defintion.
vector< char32_t > alphabet
Represents the set of processable symbols.
std::u32string getShortestWord() const
Searches the shortest UTF-32-encoded word accepted by this NFA.
static nfa::builder unite(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the union of the languages of two NFAs.
builder & complement()
Inverts the prospective DFA's language with respect to all possible strings over its alphabet...
Private implementation details of NFAs.
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective NFA.
Constructs DFAs step by step.
vector< valarray< bool > > epsClosures
Cache for every state's ε-closures.
size_t getNumberOfStates() const
Counts this DFA's states.
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective NFA.
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
builder & intersect(nfa const &other)
Makes the prospective NFA accept only words accepted also by another NFA.
Private implementation details of NFA builders.
std::shared_ptr< dfa const > equivalent
Stores a minimal DFA accepting the same language as this object's NFA.
size_t getNumberOfStates() const
Counts this NFA's states.
static nfa::builder subtract(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the set difference of the languages of two NFAs...
pImpl(string &initial, unordered_set< string > &acceptingStates, unordered_set< char32_t > &alphabet, Ntransitionmap &transitions)
Constructs private implementation object with provided members.
pImpl(vector< char32_t > &alphabet, vector< vector< valarray< bool >>> &transitions, vector< string > &labels, valarray< bool > &acceptingStates)
Constructs private implementation object with provided members and empty ε-closure cache...
dfa const & getEquivalentDfa(nfa const *owner)
Returns the equivalent DFA safely, constructing it if it wasn't already.
bool hasAccepting(std::valarray< bool > const &qs) const
Tests whether a set of states contains an accept state within this NFA.
size_t index_of(vector< T > const &vec, T const &element)
Basically Java's List interface's indexOf, but as a non-member function and returning the container's...
nfa()
Constructs an NFA accepting the empty language ∅.
builder & operator=(builder const &b)
Copy-assigns a builder by copying another one's private implementation object.
unordered_set< string > acceptingStates
Set of names of the prospective NFA's accept states.
builder & addTransition(std::string const &from, std::string const &to, char32_t symbol)
Adds a transition for the prospective NFA.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
std::valarray< bool > const & delta(size_t q, char32_t symbol) const
Computes this NFA's transition function for a state and a symbol.
dfa build()
Builds the DFA, as defined by previous operations, including completion.
pImpl()=default
Constructs empty private implementation object.
std::valarray< bool > const & epsilonClosure(size_t q) const
Computes a state's ε-closure.
Contains the reg::dfa class definition.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
builder()
Constructs a blank builder object.