reglibcpp  1.0.0
(Naïve) C++ implementation of models for regular languages
nfa.cpp
Go to the documentation of this file.
1 #include "nfa.h"
3 using std::valarray;
4 using std::vector;
5 using std::string;
6 using std::unique_ptr;
7 using std::shared_ptr;
8 using std::u32string;
9 
10 using std::move;
11 
12 #include <unordered_set>
13 using std::unordered_set;
14 
15 #include <unordered_map>
16 using std::unordered_map;
17 
18 #include <algorithm>
19 using std::find;
20 using std::sort;
21 
22 #include <iterator>
23 using std::distance;
24 
25 #include "dfa.h"
26 #include "expression.h"
27 
28 namespace reg {
29 
31 struct nfa::pImpl {
32  shared_ptr<dfa const> equivalent;
33  valarray<bool> accepting;
34  vector<char32_t> alphabet;
35  vector<string> utf8Alphabet;
36  vector<valarray<bool>> mutable epsClosures;
37  vector<string> labels;
38  vector<vector<valarray<bool>>> transitions;
39 
41  pImpl() :
42  equivalent(),
43  accepting(),
44  alphabet(1, U'\0'),
45  utf8Alphabet(1, ""),
46  epsClosures(),
47  labels(),
48  transitions() {}
49 
51  pImpl(vector<char32_t>& alphabet, vector<vector<valarray<bool>>>& transitions,
52  vector<string>& labels, valarray<bool>& acceptingStates) :
53  accepting(move(acceptingStates)),
54  alphabet(move(alphabet)),
55  labels(move(labels)),
56  transitions(move(transitions)) {
57  utf8Alphabet.reserve(this->alphabet.size());
58  for (char32_t symbol : this->alphabet) {
59  if (symbol == U'\0') {
60  utf8Alphabet.push_back("");
61  } else {
62  utf8Alphabet.push_back(expression::converter->to_bytes(symbol));
63  }
64  }
65  epsClosures = vector<valarray<bool>>(pImpl::labels.size());
66  }
67 
69 
73  dfa const& getEquivalentDfa(nfa const* owner) {
74  if (!equivalent) {
75  equivalent.reset(new dfa::dfa(dfa::builder(*owner).purge().merge()));
76  }
77  return *equivalent;
78  }
79 };
80 
82 nfa::nfa() : p(new pImpl) {}
83 
85 nfa::nfa(vector<char32_t>& alphabet, vector<vector<valarray<bool>>>& transitions,
86  vector<string>& labels, valarray<bool>& acceptingStates) :
87  p(new pImpl(alphabet, transitions, labels, acceptingStates)) {}
88 
90 bool nfa::operator==(nfa const& n) const {
91  return p->getEquivalentDfa(this) == n.p->getEquivalentDfa(&n);
92 }
93 
95 bool nfa::operator!=(nfa const& n) const {
96  return p->getEquivalentDfa(this) != n.p->getEquivalentDfa(&n);
97 }
98 
100 
106 valarray<bool> const& nfa::delta(size_t q, char32_t symbol) const {
107  size_t i = distance(
108  p->alphabet.begin(),
109  find(p->alphabet.begin(), p->alphabet.end(), symbol)
110  );
111  if (i >= p->alphabet.size()) {
112  #ifdef __EXCEPTIONS
113  throw std::domain_error(
114  string(u8"δ is not defined for symbol ").append(1, symbol)
115  );
116  #endif
117  }
118  if (q >= p->labels.size()) {
119  #ifdef __EXCEPTIONS
120  throw std::out_of_range(
121  string("There is no state with index ").append(std::to_string(q))
122  );
123  #endif
124  }
125  return p->transitions[q][i];
126 }
127 
129 valarray<bool> const& nfa::delta(size_t q, string const& utf8Symbol) const {
130  return delta(q, expression::converter->from_bytes(utf8Symbol)[0]);
131 }
132 
134 
140 valarray<bool> nfa::deltaHat(size_t q, std::u32string const& word) const {
141  valarray<bool> qs(p->labels.size());
142  qs[q] = true;
143  return deltaHat(qs, word);
144 }
145 
147 valarray<bool> nfa::deltaHat(size_t q, string const& utf8Word) const {
148  return deltaHat(q, expression::converter->from_bytes(utf8Word));
149 }
150 
152 
158 valarray<bool> nfa::deltaHat(valarray<bool> const& qs, std::u32string const& word) const {
159  valarray<bool> ps = epsilonClosure(qs);
160  for (char32_t symbol : word) {
161  valarray<bool> reached(p->labels.size());
162  size_t qi(0);
163  for (bool qb : ps) {
164  if (qb) {
165  reached |= epsilonClosure(delta(qi, symbol));
166  }
167  qi++;
168  }
169  ps = move(reached);
170  }
171  return ps;
172 }
173 
175 valarray<bool> nfa::deltaHat(valarray<bool> const& qs, string const& utf8Word) const {
176  return deltaHat(qs, expression::converter->from_bytes(utf8Word));
177 }
178 
180 
185 valarray<bool> const& nfa::epsilonClosure(size_t q) const {
186  if (p->epsClosures[q].size() == 0) {
187  p->epsClosures[q].resize(p->labels.size());
188  p->epsClosures[q][q] = true;
189  int growth(1);
190  while (growth > 0) {
191  growth = 0;
192  valarray<bool> old(p->epsClosures[q]);
193  size_t qi(0);
194  for (bool qb : old) {
195  if (qb) {
196  size_t pi(0);
197  for (bool pb : p->transitions[qi][0]) {
198  if (pb) {
199  if (!p->epsClosures[q][pi]) {
200  growth++;
201  p->epsClosures[q][pi] = true;
202  }
203  }
204  pi++;
205  } // going through the true bools in transitions[qi][0]
206  }
207  qi++;
208  } // going through the true bools in old
209  }
210  }
211  return p->epsClosures[q];
212 }
213 
215 
220 valarray<bool> nfa::epsilonClosure(valarray<bool> const& qs) const {
221  valarray<bool> closure(p->labels.size());
222  size_t qi(0);
223  for (bool qb : qs) {
224  if (qb) {
225  closure |= epsilonClosure(qi);
226  }
227  qi++;
228  } // going through the true bools in qs
229  return closure;
230 }
231 
233 
237 string const& nfa::getLabelOf(size_t q) const {
238  return p->labels[q];
239 }
240 
242 
246 string nfa::getLabelOf(valarray<bool> qs) const {
247  string label;
248  size_t qi(0);
249  for (bool qb : qs) {
250  if (qb) {
251  label = label.append(1, label.length() == 0 ? '{' : ',');
252  label = label.append(p->labels[qi]);
253  }
254  qi++;
255  }
256  if (label.length() == 0) {
257  return string("{}");
258  } else {
259  return label.append(1, '}');
260  }
261 }
262 
264 
268 vector<char32_t> const& nfa::getAlphabet() const {
269  return p->alphabet;
270 }
271 
273 
277 vector<string> const& nfa::getUtf8Alphabet() const {
278  return p->utf8Alphabet;
279 }
280 
282 
286 size_t nfa::getNumberOfStates() const {
287  return p->labels.size();
288 }
289 
291 
296 bool nfa::isAccepting(size_t q) const {
297  return p->accepting[q];
298 }
299 
301 nfa::nfa(nfa const& n) : p(new pImpl(*(n.p))) {}
302 
304 
305 nfa::nfa(nfa&& n) : p(new pImpl) {p.swap(n.p);}
306 
308 nfa& nfa::operator=(nfa const& n) {
309  if (this != &n) {
310  p.reset(new pImpl(*(n.p)));
311  }
312  return *this;
313 }
314 
316 
318  if (this != &n) {
319  p.reset(new pImpl);
320  p.swap(n.p);
321  }
322  return *this;
323 }
324 
325 nfa::~nfa() = default;
326 
329  string initial;
330  unordered_set<string> acceptingStates;
331  unordered_set<char32_t> alphabet;
332  unordered_map<string,unordered_map<char32_t,unordered_set<string>>> transitions;
334 
336  pImpl() = default;
338 
341  string& initial,
342  unordered_set<string>& acceptingStates,
343  unordered_set<char32_t>& alphabet,
344  unordered_map<string,unordered_map<char32_t,unordered_set<string>>>& transitions
345  ) :
346  initial(move(initial)),
348  alphabet(move(alphabet)),
349  transitions(move(transitions)) {}
350 };
351 
354 
356 nfa::builder::builder(nfa const& nfa) : p(new pImpl) {
358  for (char32_t symbol : nfa.getAlphabet()) {
359  addSymbol(symbol);
360  }
361  for (size_t qi = 0; qi < nfa.p->labels.size(); ++qi) {
362  string const& qLabel = nfa.getLabelOf(qi);
363  for (char32_t symbol : p->alphabet) {
364  valarray<bool> ps = nfa.delta(qi, symbol);
365  size_t pi(0);
366  for (bool pb : ps) {
367  if (pb) {
368  addTransition(qLabel, nfa.getLabelOf(pi), symbol);
369  }
370  pi++;
371  }
372  }
373  if (nfa.isAccepting(qi)) {
374  setAccepting(qLabel, true);
375  }
376  }
377 }
378 
380 
382  string initial(dfa.getLabelOf(0));
383  unordered_set<string> acceptingStates;
384  unordered_set<char32_t> alphabet(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
385  unordered_map<string,unordered_map<char32_t,unordered_set<string>>> transitions;
386  for (size_t q(0); q < dfa.getNumberOfStates(); q++) {
387  for (char32_t symbol : alphabet) {
388  transitions[dfa.getLabelOf(q)][symbol].insert(dfa.getLabelOf(dfa.delta(q, symbol)));
389  }
390  }
391  p = unique_ptr<pImpl>(new pImpl(initial, acceptingStates, alphabet, transitions));
392 }
393 
395 nfa::builder::builder(builder& b) : p(new pImpl(*(b.p))) {}
396 
398 
399 nfa::builder::builder(builder&& b) : p(new pImpl) {p.swap(b.p);}
400 
403  if (this != &b) {
404  p.reset(new pImpl(*(b.p)));
405  }
406  return *this;
407 }
408 
410 
412  if (this != &b) {
413  p.reset(new pImpl);
414  p.swap(b.p);
415  }
416  return *this;
417 }
418 
420 
426  p->alphabet.insert(symbol);
427  return *this;
428 }
429 
431 nfa::builder& nfa::builder::addSymbol(string const& utf8Symbol) {
432  return addSymbol(expression::converter->from_bytes(utf8Symbol)[0]);
433 }
434 
436 
442 nfa::builder& nfa::builder::setAccepting(string const& state, bool accept) {
443  if (p->transitions.empty()) {
444  p->initial = state;
445  }
446  p->transitions[state];
447  if (accept) {
448  p->acceptingStates.insert(state);
449  } else {
450  p->acceptingStates.erase(state);
451  }
452  return *this;
453 }
454 
456 
462  p->initial = state;
463  p->transitions[state];
464  return *this;
465 }
466 
468 
475 nfa::builder& nfa::builder::addTransition(string const& from, string const& to, char32_t symbol) {
476  if (p->transitions.empty()) {
477  p->initial = from;
478  }
479  p->transitions[to];
480  p->transitions[from][symbol].insert(to);
481  addSymbol(symbol);
482  return *this;
483 }
484 
486 nfa::builder& nfa::builder::addTransition(string const& from, string const& to, string const& utf8Symbol) {
487  return addTransition(from, to, expression::converter->from_bytes(utf8Symbol)[0]);
488 }
489 
491 
496  p->alphabet.insert(U'\0');
497  vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
498  sort(alphabetV.begin(), alphabetV.end());
499  vector<string> labelV(p->transitions.size());
500  labelV[0] = p->initial;
501  valarray<bool> acceptingV(p->transitions.size());
502  acceptingV[0] = (p->acceptingStates.find(p->initial) != p->acceptingStates.end());
503  size_t i(1);
504  for (auto entry : p->transitions) {
505  if (entry.first == p->initial) {
506  continue;
507  }
508  labelV[i] = entry.first;
509  acceptingV[i++] = (
510  p->acceptingStates.find(entry.first) != p->acceptingStates.end()
511  );
512  }
513  vector<vector<valarray<bool>>> transitionV(
514  p->transitions.size(),
515  vector<valarray<bool>>(
516  p->alphabet.size()+1,
517  valarray<bool>(labelV.size())
518  )
519  );
520  unordered_set<string> emptySet;
521  size_t qi(0);
522  for (auto const& q : labelV) {
523  unordered_map<char32_t,unordered_set<string>> const& fromQ = p->transitions[q];
524  size_t si(0);
525  for (char32_t symbol : alphabetV) {
526  auto to = fromQ.find(symbol);
527  unordered_set<string> const& viaSymbol(to != fromQ.end() ? to->second : emptySet);
528  i = 0;
529  for (auto const& p : labelV) {
530  transitionV[qi][si][i++] = (viaSymbol.count(p) != 0);
531  }
532  si++;
533  }
534  qi++;
535  }
536  return {alphabetV, transitionV, labelV, acceptingV};
537 }
538 
539 nfa::builder::~builder() = default;
540 } // namespace reg
static std::unique_ptr< std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > > const converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: expression.h:97
nfa build()
Builds the NFA, as defined by previous operations.
Definition: nfa.cpp:495
size_t delta(size_t q, char32_t symbol) const
Computes this DFA&#39;s transition function for a state and a symbol.
Definition: dfa.cpp:223
dfa()
Constructs a DFA accepting the empty language ∅.
Definition: dfa.cpp:107
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:22
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA&#39;s set of processable symbols.
Definition: nfa.cpp:268
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Definition: nfa.cpp:237
Represents deterministic finite automata.
Definition: dfa.h:22
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this NFA&#39;s set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:277
vector< vector< valarray< bool > > > transitions
Stores the transition function as a table viz state index × symbol index → list of bools with true a...
Definition: nfa.cpp:38
std::valarray< bool > deltaHat(size_t q, std::u32string const &word) const
Computes this NFA&#39;s transition function recursively for a string of symbols, starting in a specified ...
Definition: nfa.cpp:140
Constructs NFAs step by step.
Definition: nfa.h:52
bool operator==(nfa const &n) const
Tests whether this NFA accepts exactly the same language as another one.
Definition: nfa.cpp:90
pImpl()
Constructs private implementation object for an NFA accepting the empty language ∅.
Definition: nfa.cpp:41
vector< string > labels
Stores the names of states.
Definition: nfa.cpp:37
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:296
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
Definition: dfa.cpp:663
nfa & operator=(nfa const &n)
Copy-assigns this NFA by copying another one&#39;s private implementation object.
Definition: nfa.cpp:308
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA&#39;s set of processable symbols.
Definition: dfa.cpp:287
string initial
Name of the prospective NFA&#39;s initial state.
Definition: nfa.cpp:329
pImpl(string &initial, unordered_set< string > &acceptingStates, unordered_set< char32_t > &alphabet, unordered_map< string, unordered_map< char32_t, unordered_set< string >>> &transitions)
Constructs private implementation object with provided members.
Definition: nfa.cpp:340
bool operator!=(nfa const &n) const
Tests whether this NFA doesn&#39;t accept the same language as another one.
Definition: nfa.cpp:95
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective NFA&#39;s alphabet.
Definition: nfa.cpp:425
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective NFA.
Definition: nfa.cpp:442
Contains the reg::nfa class definition.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: nfa.cpp:33
Contains the reg::expression class defintion.
vector< char32_t > alphabet
Represents the set of processable symbols.
Definition: nfa.cpp:34
Private implementation details of NFAs.
Definition: nfa.cpp:31
builder & merge()
Merges the prospective DFA&#39;s indistinguishable states, allowing for minimization. ...
Definition: dfa.cpp:611
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective NFA.
Definition: nfa.cpp:331
Constructs DFAs step by step.
Definition: dfa.h:49
vector< valarray< bool > > epsClosures
Cache for every state&#39;s ε-closures.
Definition: nfa.cpp:36
size_t getNumberOfStates() const
Counts this DFA&#39;s states.
Definition: dfa.cpp:305
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective NFA.
Definition: nfa.cpp:461
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:35
Private implementation details of NFA builders.
Definition: nfa.cpp:328
size_t getNumberOfStates() const
Counts this NFA&#39;s states.
Definition: nfa.cpp:286
pImpl(vector< char32_t > &alphabet, vector< vector< valarray< bool >>> &transitions, vector< string > &labels, valarray< bool > &acceptingStates)
Constructs private implementation object with provided members and empty ε-closure cache...
Definition: nfa.cpp:51
dfa const & getEquivalentDfa(nfa const *owner)
Returns the equivalent DFA safely, constructing it if it wasn&#39;t already.
Definition: nfa.cpp:73
Definition: dfa.cpp:32
nfa()
Constructs an NFA accepting the empty language ∅.
Definition: nfa.cpp:82
builder & operator=(builder const &b)
Copy-assigns a builder by copying another one&#39;s private implementation object.
Definition: nfa.cpp:402
unordered_set< string > acceptingStates
Set of names of the prospective NFA&#39;s accept states.
Definition: nfa.cpp:330
builder & addTransition(std::string const &from, std::string const &to, char32_t symbol)
Adds a transition for the prospective NFA.
Definition: nfa.cpp:475
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Definition: dfa.cpp:278
std::valarray< bool > const & delta(size_t q, char32_t symbol) const
Computes this NFA&#39;s transition function for a state and a symbol.
Definition: nfa.cpp:106
unordered_map< string, unordered_map< char32_t, unordered_set< string > > > transitions
Transition function (state × symbol → set of states) of the prospective NFA.
Definition: nfa.cpp:332
pImpl()=default
Constructs empty private implementation object.
std::valarray< bool > const & epsilonClosure(size_t q) const
Computes a state&#39;s ε-closure.
Definition: nfa.cpp:185
Contains the reg::dfa class definition.
shared_ptr< dfa const > equivalent
Stores a minimal DFA accepting the same language as this object&#39;s NFA.
Definition: nfa.cpp:32
builder()
Constructs a blank builder object.
Definition: nfa.cpp:353