reglibcpp  2.0.0
A C++ implementation of models for regular languages
dfa.cpp
Go to the documentation of this file.
1 // Copyright 2017, 2018 Tom Kranz
2 //
3 // This file is part of reglibcpp.
4 //
5 // reglibcpp is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // reglibcpp is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with reglibcpp. If not, see <https://www.gnu.org/licenses/>.
17 
19 #include "dfa.h"
20 using std::make_unique;
21 using std::string;
22 using std::to_string;
23 using std::u32string;
24 using std::valarray;
25 using std::vector;
26 
27 using std::move;
28 
29 #include <unordered_set>
30 using std::unordered_set;
31 
32 #include <unordered_map>
33 using std::unordered_map;
34 
35 #include <forward_list>
36 using std::forward_list;
37 
38 #include <algorithm>
39 using std::find;
40 using std::none_of;
41 
42 #include <cstdint>
43 
44 #include "fabuilder.h"
45 #include "nfa.h"
46 #include "utils.h"
47 
48 namespace reg {
49 
51 struct dfa::impl {
63 
67  impl()
68  : accepting(false, 1),
69  u32alphabet(0),
70  alphabet(0),
71  labels(1),
72  transitions(1) {}
73 
77  : accepting(move(accepting)),
78  u32alphabet(move(u32alphabet)),
79  labels(move(labels)),
80  transitions(move(transitions)) {
81  alphabet.reserve(this->u32alphabet.size());
82  for (char32_t symbol : this->u32alphabet) {
83  alphabet.push_back(converter.to_bytes(symbol));
84  }
85  }
86 };
87 
89 dfa::dfa() : pim(new impl) {}
90 
92 dfa::dfa(fabuilder& b, bool force) : dfa(b.buildDfa(force)) {}
93 
96 dfa::dfa(vector<char32_t>&& alphabet, vector<vector<size_t>>&& transitions,
97  vector<string>&& labels, valarray<bool>&& acceptingStates)
98  : pim(new impl(move(alphabet), move(transitions), move(labels),
99  move(acceptingStates))) {}
100 
103 dfa::dfa(dfa const& d) : pim(new impl(*(d.pim))) {}
104 
107 
108 dfa::dfa(dfa&& d) : pim(new impl) { pim.swap(d.pim); }
109 
112 dfa& dfa::operator=(dfa const& d) {
113  if (this != &d) {
114  pim.reset(new impl(*(d.pim)));
115  }
116  return *this;
117 }
118 
121 
123  if (this != &d) {
124  pim.reset(new impl);
125  pim.swap(d.pim);
126  }
127  return *this;
128 }
129 
130 dfa::~dfa() = default;
131 
132 #ifndef DOXYGEN_SHOULD_SKIP_THIS
133 dfa::operator nfa const&() const {
134  if (!pim->equivalent) {
135  pim->equivalent = std::make_shared<nfa const>(fabuilder(*this).buildNfa());
136  }
137  return *pim->equivalent;
138 }
139 #endif // DOXYGEN_SHOULD_SKIP_THIS
140 
142 
146 bool dfa::operator==(dfa const& d) const {
147  forward_list<char32_t> unitedAlphabet(getAlphabet_().begin(),
148  getAlphabet_().end());
149  size_t unitedAlphabetSize(getAlphabet_().size());
150  for (char32_t symbol : d.getAlphabet_()) {
151  if (index_of(getAlphabet_(), symbol) == getAlphabet_().size()) {
152  unitedAlphabet.push_front(symbol);
153  unitedAlphabetSize++;
154  }
155  }
156  vector<vector<size_t>> tTable(getStates().size() + d.getStates().size() + 1,
157  vector<size_t>(unitedAlphabetSize));
158  valarray<bool> accepting(false,
159  getStates().size() + d.getStates().size() + 1);
160  for (size_t q(0); q < getStates().size(); q++) {
161  size_t s(0);
162  vector<size_t>& row = tTable[q];
163  for (char32_t symbol : unitedAlphabet) {
164  try {
165  row[s] = delta_(q, symbol);
166  } catch (symbol_not_found e) {
167  row[s] = getStates().size() + d.getStates().size();
168  }
169  s++;
170  }
171  accepting[q] = isAccepting(q);
172  }
173  for (size_t q(0); q < d.getStates().size(); q++) {
174  size_t s(0);
175  vector<size_t>& row = tTable[q + getStates().size()];
176  for (char32_t symbol : unitedAlphabet) {
177  try {
178  row[s] = getStates().size() + d.delta_(q, symbol);
179  } catch (symbol_not_found e) {
180  row[s] = getStates().size() + d.getStates().size();
181  }
182  s++;
183  }
184  accepting[q + getStates().size()] = d.isAccepting(q);
185  }
186  for (size_t s(0); s < unitedAlphabetSize; s++) {
187  tTable[getStates().size() + d.getStates().size()][s] =
188  getStates().size() + d.getStates().size();
189  }
190  return indistinguishableStates(tTable, accepting)[0][getStates().size()];
191 }
192 
194 bool dfa::operator!=(dfa const& d) const { return !operator==(d); }
195 
198 bool dfa::operator==(nfa const& n) const {
199  return operator==(static_cast<dfa const&>(n));
200 }
201 
204 bool dfa::operator!=(nfa const& n) const { return !operator==(n); }
205 
208 
215 size_t dfa::delta(size_t qi, size_t si) const {
216  if (si >= pim->alphabet.size()) {
217  throw symbol_not_found(si);
218  }
219  if (qi >= pim->labels.size()) {
220  throw state_not_found(qi);
221  }
222  return pim->transitions[qi][si];
223 }
224 
227 
239 size_t dfa::delta_(size_t qi, char32_t u32symbol) const {
240  try {
241  return delta(qi, index_of(getAlphabet_(), u32symbol));
242  } catch (symbol_not_found e) {
243  throw symbol_not_found(u32symbol);
244  }
245 }
246 
249 
262 size_t dfa::delta(size_t qi, string const& symbol) const {
263  return delta_(qi, converter.from_bytes(symbol)[0]);
264 }
265 
268 
280 string const& dfa::delta_(string const& q, char32_t u32symbol) const {
281  try {
282  return getStates()[delta_(index_of(getStates(), q), u32symbol)];
283  } catch (state_not_found e) {
284  throw state_not_found(q);
285  }
286 }
287 
290 
303 string const& dfa::delta(string const& q, string const& symbol) const {
304  return delta_(q, converter.from_bytes(symbol)[0]);
305 }
306 
310 
320 size_t dfa::deltaHat_(size_t qi, u32string const& u32word) const {
321  for (char32_t symbol : u32word) {
322  qi = delta_(qi, symbol);
323  }
324  return qi;
325 }
326 
329 
343 size_t dfa::deltaHat(size_t qi, string const& word) const {
344  return deltaHat_(qi, converter.from_bytes(word));
345 }
346 
349 
363 string const& dfa::deltaHat_(string const& q, u32string const& u32word) const {
364  return pim->labels[deltaHat_(index_of(getStates(), q), u32word)];
365 }
366 
369 
383 string const& dfa::deltaHat(string const& q, string const& word) const {
384  return deltaHat_(q, converter.from_bytes(word));
385 }
386 
388 
391 string const& dfa::getInitialState() const { return getStates()[0]; }
392 
394 
398 vector<string> const& dfa::getStates() const { return pim->labels; }
399 
401 
405 vector<char32_t> const& dfa::getAlphabet_() const { return pim->u32alphabet; }
406 
408 
412 vector<string> const& dfa::getAlphabet() const { return pim->alphabet; }
413 
415 
420 bool dfa::isAccepting(size_t qi) const {
421  if (qi >= pim->labels.size()) {
422  throw state_not_found(qi);
423  }
424  return pim->accepting[qi];
425 }
426 
428 
433 bool dfa::isAccepting(string const& q) const {
434  try {
435  return isAccepting(index_of(getStates(), q));
436  } catch (state_not_found e) {
437  throw state_not_found(q);
438  }
439 }
440 
442 
448  auto const& pim = d.pim;
449  if (pim->accepting[0]) {
450  return U"";
451  }
452  unordered_map<size_t, u32string> shortestWords(pim->labels.size());
453  size_t oldSize = 0;
454  shortestWords.emplace(0, U"");
455  while (shortestWords.size() > oldSize) {
456  oldSize = shortestWords.size();
457  for (auto const& stateWord : shortestWords) {
458  for (auto symbol : d.getAlphabet_()) {
459  size_t reached = d.delta_(stateWord.first, symbol);
460  u32string shWord = stateWord.second + symbol;
461  if (pim->accepting[reached]) {
462  return shWord;
463  }
464  if (!shortestWords.count(reached)) {
465  shortestWords.emplace(reached, move(shWord));
466  }
467  }
468  }
469  }
470  throw std::logic_error("This DFA doesn't accept any words!");
471 }
472 
474 
483 string findShortestWord(dfa const& d) {
484  return converter.to_bytes(findShortestWord_(d));
485 }
486 
489 
498 fabuilder dfa::unite(dfa const& d1, dfa const& d2) {
499  return fabuilder(d1).unite(d2);
500 }
501 
504 
513 fabuilder dfa::intersect(dfa const& d1, dfa const& d2) {
514  return fabuilder(d1).intersect(d2);
515 }
516 
519 
528 fabuilder dfa::subtract(dfa const& d1, dfa const& d2) {
529  fabuilder b1(d1);
530  for (auto symbol : d2.getAlphabet_()) {
531  b1.addSymbol_(symbol);
532  }
533  return b1.complement().unite(d2).complement();
534 }
535 
538 
547 
549 
558  vector<vector<size_t>> const& transitions,
559  valarray<bool> const& accepting) {
560  vector<valarray<bool>> distinguishable;
561  distinguishable.reserve(transitions.size());
562  for (size_t q(0); q < transitions.size(); q++) {
563  bool qAcc(accepting[q]);
564  distinguishable.push_back(valarray<bool>(false, transitions.size()));
565  for (size_t p(0); p < q; p++) {
566  bool pAcc(accepting[p]);
567  distinguishable[q][p] = (qAcc != pAcc);
568  distinguishable[p][q] = (qAcc != pAcc);
569  }
570  }
571  bool changes(true);
572  while (changes) {
573  changes = false;
574  for (size_t q(0); q < transitions.size(); q++) {
575  for (size_t p(0); p < q; p++) {
576  if (distinguishable[q][p]) {
577  continue;
578  }
579  for (size_t s(0); s < transitions[q].size(); s++) {
580  size_t qS(transitions[q][s]);
581  size_t pS(transitions[p][s]);
582  if (distinguishable[qS][pS]) {
583  changes = true;
584  distinguishable[q][p] = true;
585  distinguishable[p][q] = true;
586  break;
587  }
588  }
589  }
590  }
591  }
592  valarray<bool> allTrue(true, transitions.size());
593  for (size_t i(0); i < distinguishable.size(); i++) {
594  distinguishable[i] ^= allTrue;
595  }
596  return distinguishable;
597 }
598 
599 } // namespace reg
u32string findShortestWord_(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:447
vector< string > labels
Stores the names of states.
Definition: dfa.cpp:60
std::string const & getInitialState() const
Names this DFA's initial state.
Definition: dfa.cpp:391
string findShortestWord(dfa const &d)
Searches the shortest UTF-8-encoded word accepted by a given DFA.
Definition: dfa.cpp:483
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:420
dfa()
Constructs a DFA accepting the empty language ∅.
Definition: dfa.cpp:89
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:38
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
Definition: dfa.cpp:62
bool operator==(dfa const &d) const
Tests whether this DFA accepts exactly the same language as another one.
Definition: dfa.cpp:146
Signals that an input symbol was used that the FA doesn't recognize.
Definition: utils.h:74
std::shared_ptr< nfa const > equivalent
Holds an equivalent NFA in case it is ever needed.
Definition: dfa.cpp:52
size_t deltaHat_(size_t qi, std::u32string const &u32word) const
Computes this DFA's transition function recursively for a UTF-32-encoded string of symbols...
Definition: dfa.cpp:320
Represents deterministic finite automata.
Definition: dfa.h:41
fabuilder & intersect(nfa const &other)
Makes the prospective FA accept only words accepted also by another FA.
Definition: fabuilder.cpp:683
size_t deltaHat(size_t qi, std::string const &word) const
Computes this DFA's transition function recursively for a UTF-8-encoded string of symbols...
Definition: dfa.cpp:343
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
Definition: dfa.cpp:215
Contains utility classes, objects and functions used throughout the library.
vector< char32_t > u32alphabet
Represents the set of processable symbols.
Definition: dfa.cpp:57
bool operator!=(dfa const &d) const
Tests whether this DFA doesn't accept the same language as another one.
Definition: dfa.cpp:194
size_t index_of(C const &container, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: utils.h:43
Contains the reg::nfa class definition.
static fabuilder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
Definition: dfa.cpp:546
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
Definition: dfa.cpp:398
std::vector< std::string > const & getAlphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:412
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: utils.h:52
fabuilder & addSymbol_(char32_t u32symbol)
Adds a symbol to the prospective FA's alphabet.
Definition: fabuilder.cpp:292
fabuilder & unite(nfa const &other)
Makes the prospective FA also accept every word of another FA&#39;s language.
Definition: fabuilder.cpp:582
Signals that a state was mentioned that isn't part of the FA.
Definition: utils.h:55
Constructs NFAs step by step.
Definition: fabuilder.h:31
impl()
Constructs private implementation object for a DFA accepting the empty language ∅.
Definition: dfa.cpp:67
static std::vector< std::valarray< bool > > indistinguishableStates(std::vector< std::vector< size_t >> const &transitions, std::valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
Definition: dfa.cpp:557
Where this library lives.
Definition: dfa.cpp:48
impl(vector< char32_t > &&u32alphabet, vector< vector< size_t >> &&transitions, vector< string > &&labels, valarray< bool > &&accepting)
Constructs private implementation object with provided members.
Definition: dfa.cpp:75
static fabuilder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Definition: dfa.cpp:498
static fabuilder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Definition: dfa.cpp:528
Private implementation details of DFAs.
Definition: dfa.cpp:51
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: dfa.cpp:55
nfa buildNfa()
Builds an NFA as defined by previous operations.
Definition: fabuilder.cpp:859
vector< string > alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:58
Contains the reg::dfa class definition.
static fabuilder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
Definition: dfa.cpp:513
std::vector< char32_t > const & getAlphabet_() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:405
size_t delta_(size_t qi, char32_t u32symbol) const
Computes this DFA's transition function for a state index and a UTF-32-encoded symbol.
Definition: dfa.cpp:239
Contains the reg::fabuilder class definition.
fabuilder & complement(bool force=false)
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: fabuilder.cpp:780
dfa & operator=(dfa const &d)
Copy-assigns this DFA by copying another one's private implementation object.
Definition: dfa.cpp:112