13 #include <unordered_set> 14 using std::unordered_set;
16 #include <unordered_map> 17 using std::unordered_map;
19 #include <forward_list> 20 using std::forward_list;
53 for (char32_t symbol : this->alphabet) {
66 vector<valarray<bool>> distinguishable;
70 distinguishable.push_back(valarray<bool>(
false,
transitions.size()));
71 for (
size_t p(0); p < q; p++) {
73 distinguishable[q][p] = (qAcc != pAcc);
74 distinguishable[p][q] = (qAcc != pAcc);
81 for (
size_t p(0); p < q; p++) {
82 if (distinguishable[q][p]) {
88 if (distinguishable[qS][pS]) {
90 distinguishable[q][p] =
true;
91 distinguishable[p][q] =
true;
99 for (
size_t i(0); i < distinguishable.size(); i++) {
100 distinguishable[i] ^= allTrue;
102 return distinguishable;
110 dfa::dfa(vector<char32_t>& alphabet,
111 vector<vector<size_t>>& transitions,
112 vector<string>& labels,
113 valarray<bool>& acceptingStates) :
114 p(unique_ptr<pImpl>(new pImpl(alphabet, transitions, labels, acceptingStates))) {}
129 p.reset(
new pImpl(*(d.p)));
144 dfa::~dfa() =
default;
153 forward_list<char32_t> unitedAlphabet(p->alphabet.begin(), p->alphabet.end());
154 size_t unitedAlphabetSize(p->alphabet.size());
156 if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
157 unitedAlphabet.push_front(symbol);
158 unitedAlphabetSize++;
165 vector<size_t>& row = tTable[q];
166 for (char32_t symbol : unitedAlphabet) {
169 row[s] =
delta(q, symbol);
170 }
catch (std::domain_error e) {
174 size_t r(
delta(q, symbol));
175 if (r >= SIZE_MAX-1) {
188 for (char32_t symbol : unitedAlphabet) {
192 }
catch (std::domain_error e) {
196 size_t r(
delta(q, symbol));
197 if (r >= SIZE_MAX-1) {
207 for (
size_t s(0); s < unitedAlphabetSize; s++) {
226 find(p->alphabet.begin(), p->alphabet.end(), symbol)
228 if (i >= p->alphabet.size()) {
230 throw std::domain_error(
231 string(u8
"δ is not defined for symbol ").append(1, symbol)
237 if (q >= p->labels.size()) {
239 throw std::out_of_range(
240 string(
"There is no state with index ").append(std::to_string(q))
246 return p->transitions[q][i];
250 size_t dfa::delta(
size_t q,
string const& utf8Symbol)
const {
262 for (char32_t symbol : word) {
263 q =
delta(q, symbol);
297 return p->utf8Alphabet;
306 return p->labels.size();
316 return p->accepting[q];
336 unordered_map<
string,unordered_map<char32_t,string>>&
transitions 349 auto via = from.find(symbol);
350 if (via != from.end() && (*via).second != q) {
367 return transitions.insert(std::make_pair(newState, unordered_map<char32_t, string>())).first->first;
373 bool trashUsed(
false);
374 string const& trashState = [&]{
375 for (
auto const& entry : this->transitions) {
385 if (from.second.find(symbol) == from.second.end()) {
386 from.second[symbol] = trashState;
387 trashUsed |= (from.first != trashState);
398 size_t operator()(valarray<bool>
const& value)
const {
400 for (
bool v : value) {
410 bool fullTrue =
true;
419 valarray<bool>
const& val;
423 bool operator()(valarray<bool>
const& other)
const {
430 bool operator()(valarray<bool>
const& a, valarray<bool>
const& b)
const {
444 auto transitions = unordered_map<string,unordered_map<char32_t,string>>(
dfa.
getNumberOfStates());
445 p = unique_ptr<pImpl>(
new pImpl(
466 alphabet.erase(alphabet.begin());
467 unordered_set<char32_t> alphabetS(alphabet.begin(), alphabet.end());
468 unordered_set<string> acceptingStates;
469 unordered_map<string,unordered_map<char32_t,string>> transitions;
470 transitions[initial];
471 p = unique_ptr<pImpl>(
new pImpl(
477 while (stackSize != 0) {
478 valarray<bool> current(move(stack.front()));
481 for (char32_t symbol : p->alphabet) {
482 valarray<bool> next(
nfa.
deltaHat(current, u32string(1, symbol)));
485 if (!equalToNext(current) &&
486 none_of(stack.begin(), stack.end(), equalToNext) &&
487 none_of(done.begin(), done.end(), equalToNext)) {
488 stack.push_front(next);
492 for (
size_t qi(0); qi < current.size(); qi++) {
498 done.insert(current);
512 p.reset(
new pImpl(*(b.p)));
527 dfa::builder::~builder() =
default;
536 p->alphabet.insert(symbol);
553 if (p->transitions.empty()) {
556 p->transitions[state];
558 p->acceptingStates.insert(state);
560 p->acceptingStates.erase(state);
573 p->transitions[state];
588 if (p->transitions.empty()) {
592 p->transitions[from][symbol] = to;
613 vector<vector<size_t>> tTable(p->transitions.size());
614 valarray<bool> accepting(
false, p->transitions.size());
615 vector<char32_t> sortedAlphabet(p->alphabet.begin(), p->alphabet.end());
616 sort(sortedAlphabet.begin(), sortedAlphabet.end());
617 vector<string> orderedStates;
618 orderedStates.reserve(p->transitions.size());
619 orderedStates.push_back(p->initial);
620 for (
auto const& entry : p->transitions) {
621 if (entry.first != p->initial) {
622 orderedStates.push_back(entry.first);
625 for (
size_t q(0); q < orderedStates.size(); q++) {
626 string const& qLabel(orderedStates[q]);
627 if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
630 for (
size_t s(0); s < sortedAlphabet.size(); s++) {
631 tTable[q].push_back(distance(orderedStates.begin(), find(orderedStates.begin(), orderedStates.end(), p->transitions[qLabel][sortedAlphabet[s]])));
635 for (
size_t q(0); q < orderedStates.size(); q++) {
636 for (
size_t first(0); first < equivalences[q].size(); first++) {
637 if (equivalences[q][first] && q > first) {
638 string const& superfluous(orderedStates[q]);
639 string const& remaining(orderedStates[first]);
640 p->acceptingStates.erase(superfluous);
641 p->transitions.erase(superfluous);
642 for (
auto& from : p->transitions) {
643 for (
auto& via : from.second) {
644 if (via.second == superfluous) {
645 via.second = remaining;
664 unordered_set<string> reachable(&(p->initial), &(p->initial)+1);
665 unordered_set<string> newReachable(reachable);
666 while (newReachable.size() > 0) {
667 unordered_set<string> oldReachable(move(newReachable));
668 newReachable.clear();
669 for (
string const& reachableState : oldReachable) {
670 for (
auto const& reachedState : p->transitions[reachableState]) {
671 if (reachable.insert(reachedState.second).second) {
672 newReachable.insert(reachedState.second);
677 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
678 if (reachable.find(it->first) == reachable.end()) {
679 p->acceptingStates.erase(it->first);
680 it = p->transitions.erase(it);
685 unordered_set<string> producing(p->acceptingStates);
686 unordered_set<string> newProducing(producing);
687 while (newProducing.size() > 0) {
688 unordered_set<string> oldProducing(move(newProducing));
689 newProducing.clear();
690 for (
auto const& from : p->transitions) {
691 for (
auto const& via : from.second) {
692 if (producing.find(via.second) != producing.end()) {
693 if (producing.insert(from.first).second) {
694 newProducing.insert(from.first);
701 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
702 if (producing.find(it->first) == producing.end() && it->first != p->initial) {
703 p->acceptingStates.erase(it->first);
704 for (
auto& via : p->transitions) {
705 for (
auto itv = via.second.begin(); itv != via.second.end(); ) {
706 if (itv->second == it->first) {
707 itv = via.second.erase(itv);
713 it = p->transitions.erase(it);
727 vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
728 sort(alphabetV.begin(), alphabetV.end());
730 vector<string> labelV;
731 labelV.reserve(p->transitions.size());
732 labelV.push_back(p->initial);
733 valarray<bool> acceptingV(labelV.size(),
false);
734 for (
auto const& tr : p->transitions) {
735 if (tr.first == p->initial) {
738 labelV.push_back(tr.first);
740 vector<vector<size_t>> transitionV(labelV.size(), vector<size_t>(alphabetV.size()));
741 for (
size_t q(0); q < labelV.size(); q++) {
742 string const& qLabel = labelV[q];
743 acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
744 for (
size_t s(0); s < alphabetV.size(); s++) {
745 transitionV[q][s] = distance(
747 find(labelV.begin(), labelV.end(), p->transitions[qLabel][alphabetV[s]])
751 return {alphabetV, transitionV, labelV, acceptingV};
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this DFA.
bool operator!=(const dfa &d) const
Tests whether this DFA doesn't accept the same language as another one.
unordered_map< string, unordered_map< char32_t, string > > transitions
Transition function (state × symbol → state) of the prospective DFA.
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
static std::unique_ptr< std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > > const converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Comparison object for valarray<bool>s against a specific instance.
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
size_t delta(size_t q, char32_t symbol) const
Computes this DFA's transition function for a state and a symbol.
vector< char32_t > alphabet
Represents the set of processable symbols.
dfa()
Constructs a DFA accepting the empty language ∅.
Represents nondeterministic finite automata with ε-moves.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Comparison object for valarray<bool>s, so they can be used as unordered_set keys. ...
Represents deterministic finite automata.
Private implementation details of DFAs.
std::valarray< bool > deltaHat(size_t q, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a specified ...
vector< string > labels
Stores the names of states.
Hasher object for valarray<bool>s, so they can be used as unordered_set keys.
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this NFA.
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, unordered_map< string, unordered_map< char32_t, string >> &transitions)
Constructs private implementation object with provided members.
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
dfa & operator=(const dfa &d)
Copy-assigns this DFA by copying another one's private implementation object.
Private implementation details of DFA builders.
Contains the reg::nfa class definition.
Contains the reg::expression class defintion.
static bool valarrayFullTrue(valarray< bool > const &a)
Tests whether all of a valarray<bool>'s entries are true.
bool operator==(const dfa &d) const
Tests whether this DFA accepts exactly the same language as another one.
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Constructs DFAs step by step.
size_t deltaHat(size_t q, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a specified ...
size_t getNumberOfStates() const
Counts this DFA's states.
string initial
Name of the prospective DFA's initial state.
builder()
Constructs a blank builder object.
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
unordered_set< string > acceptingStates
Set of names of the prospective DFA's accept states.
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
pImpl()=default
Constructs empty private implementation object.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
dfa build()
Builds the DFA, as defined by previous operations, including completion.
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
std::valarray< bool > const & epsilonClosure(size_t q) const
Computes a state's ε-closure.
Contains the reg::dfa class definition.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.