reglibcpp  2.0.0
A C++ implementation of models for regular languages
nfa.cpp
Go to the documentation of this file.
1 // Copyright 2017, 2018 Tom Kranz
2 //
3 // This file is part of reglibcpp.
4 //
5 // reglibcpp is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // reglibcpp is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with reglibcpp. If not, see <https://www.gnu.org/licenses/>.
17 
19 #include "nfa.h"
20 using std::make_pair;
21 using std::make_unique;
23 using std::string;
24 using std::u32string;
25 using std::unordered_set;
26 using std::valarray;
27 using std::vector;
28 
29 using std::move;
30 
31 #include <unordered_map>
32 using std::unordered_map;
33 
34 #include <algorithm>
35 using std::find;
36 using std::min;
37 
38 #include <iterator>
39 
40 #include "dfa.h"
41 #include "expression.h"
42 #include "fabuilder.h"
43 
44 namespace reg {
45 
47 struct nfa::impl {
63 
66  impl()
67  : equivalent(),
68  accepting(false, 1),
69  u32alphabet(1, U'\0'),
70  alphabet(1, ""),
71  epsClosures(1),
72  labels(1),
73  transitions(1, vector<valarray<bool>>(1, valarray<bool>(false, 1))) {}
74 
79  valarray<bool>&& acceptingStates)
80  : accepting(move(acceptingStates)),
81  u32alphabet(move(u32alphabet)),
82  labels(move(labels)),
83  transitions(move(transitions)) {
84  alphabet.reserve(this->u32alphabet.size());
85  for (char32_t symbol : this->u32alphabet) {
86  alphabet.push_back(converter.to_bytes(u32string(!!symbol, symbol)));
87  }
88  epsClosures = vector<valarray<bool>>(this->labels.size());
89  }
90 };
91 
93 nfa::nfa() : pim(new impl) {}
94 
97 nfa::nfa(fabuilder& b) : nfa(b.buildNfa()) {}
98 
102  vector<vector<valarray<bool>>>&& transitions, vector<string>&& labels,
103  valarray<bool>&& acceptingStates)
104  : pim(new impl(move(alphabet), move(transitions), move(labels),
105  move(acceptingStates))) {}
106 
107 #ifndef DOXYGEN_SHOULD_SKIP_THIS
108 nfa::operator dfa const&() const {
109  if (!pim->equivalent) {
110  pim->equivalent = std::make_shared<dfa const>(
111  fabuilder(*this).minimize(true).normalizeStateNames("").buildDfa());
112  }
113  return *pim->equivalent;
114 }
115 #endif // DOXYGEN_SHOULD_SKIP_THIS
116 
119 
123 bool nfa::operator==(nfa const& n) const {
124  return static_cast<dfa const&>(*this).operator==(n);
125 }
126 
129 
133 bool nfa::operator!=(nfa const& n) const {
134  return static_cast<dfa const&>(*this).operator!=(n);
135 }
136 
139 
146 valarray<bool> const& nfa::delta(size_t qi, size_t si) const {
147  if (si >= pim->alphabet.size()) {
148  throw symbol_not_found(si);
149  }
150  if (qi >= pim->labels.size()) {
151  throw state_not_found(qi);
152  }
153  return pim->transitions[qi][si];
154 }
155 
158 
170 valarray<bool> const& nfa::delta_(size_t qi, char32_t u32symbol) const {
171  try {
172  return delta(qi, index_of(getAlphabet_(), u32symbol));
173  } catch (symbol_not_found e) {
174  throw symbol_not_found(u32symbol);
175  }
176 }
177 
180 
193 valarray<bool> const& nfa::delta(size_t qi, string const& symbol) const {
194  return delta_(qi, converter.from_bytes(symbol)[0]);
195 }
196 
199 
211 nfa::state_set nfa::delta_(string const& q, char32_t u32symbol) const {
212  try {
213  return decodeSet(delta_(index_of(getStates(), q), u32symbol));
214  } catch (state_not_found e) {
215  throw state_not_found(q);
216  }
217 }
218 
221 
234 nfa::state_set nfa::delta(string const& q, string const& symbol) const {
235  return delta_(q, converter.from_bytes(symbol)[0]);
236 }
237 
240 
250 valarray<bool> nfa::deltaHat_(size_t qi, u32string const& u32word) const {
251  if (qi >= pim->labels.size()) {
252  throw state_not_found(qi);
253  }
254  valarray<bool> qs(pim->labels.size());
255  qs[qi] = true;
256  return deltaHat_(qs, u32word);
257 }
258 
261 
275 valarray<bool> nfa::deltaHat(size_t qi, string const& word) const {
276  return deltaHat_(qi, converter.from_bytes(word));
277 }
278 
281 
295 nfa::state_set nfa::deltaHat_(string const& q, u32string const& u32word) const {
296  try {
297  return decodeSet(deltaHat_(index_of(getStates(), q), u32word));
298  } catch (state_not_found e) {
299  throw state_not_found(q);
300  }
301 }
302 
305 
319 nfa::state_set nfa::deltaHat(string const& q, string const& word) const {
320  return deltaHat_(q, converter.from_bytes(word));
321 }
322 
325 
334  u32string const& u32word) const {
336  for (char32_t symbol : u32word) {
337  valarray<bool> reached(pim->labels.size());
338  size_t qi(0);
339  for (bool qb : ps) {
340  if (qb) {
341  reached |= epsilonClosure(delta_(qi, symbol));
342  }
343  qi++;
344  }
345  ps = move(reached);
346  }
347  return ps;
348 }
349 
352 
366  string const& word) const {
367  return deltaHat_(qs, converter.from_bytes(word));
368 }
369 
373 
387  u32string const& u32word) const {
388  return decodeSet(deltaHat_(encodeSet(qs), u32word));
389 }
390 
393 
407  string const& word) const {
408  return deltaHat_(qs, converter.from_bytes(word));
409 }
410 
412 
418 valarray<bool> const& nfa::epsilonClosure(size_t qi) const {
419  if (qi >= pim->labels.size()) {
420  throw state_not_found(qi);
421  }
422  if (pim->epsClosures[qi].size() == 0) {
423  pim->epsClosures[qi].resize(pim->labels.size());
424  pim->epsClosures[qi][qi] = true;
425  int growth(1);
426  while (growth > 0) {
427  growth = 0;
428  valarray<bool> old(pim->epsClosures[qi]);
429  size_t qqi(0);
430  for (bool qqb : old) {
431  if (qqb) {
432  size_t pi(0);
433  for (bool pb : pim->transitions[qqi][0]) {
434  if (pb) {
435  if (!pim->epsClosures[qi][pi]) {
436  growth++;
437  pim->epsClosures[qi][pi] = true;
438  }
439  }
440  pi++;
441  } // going through the true bools in transitions[qqi][0]
442  }
443  qqi++;
444  } // going through the true bools in old
445  }
446  }
447  return pim->epsClosures[qi];
448 }
449 
451 
461 nfa::state_set nfa::epsilonClosure(string const& q) const {
462  try {
464  } catch (state_not_found e) {
465  throw state_not_found(q);
466  }
467 }
468 
470 
476  valarray<bool> closure(pim->labels.size());
477  size_t range = min(qs.size(), getStates().size());
478  for (size_t q = 0; q < range; q++) {
479  if (qs[q]) {
480  closure |= epsilonClosure(q);
481  }
482  }
483  return closure;
484 }
485 
487 
497  return decodeSet(epsilonClosure(encodeSet(qs)));
498 }
499 
501 
506 string nfa::to_string(valarray<bool> const& qs) const {
507  string label;
508  size_t range = min(qs.size(), getStates().size());
509  for (size_t q = 0; q < range; q++) {
510  if (qs[q]) {
511  label = label.append(1, label.length() == 0 ? '{' : ',');
512  // label = label.append(1, '{' - !!label.length() * ('{' - ','));
513  label = label.append(getStates()[q]);
514  }
515  }
516  if (label.length() == 0) {
517  return string("{}");
518  } else {
519  return label.append(1, '}');
520  }
521 }
522 
524 
529 string nfa::to_string(nfa::state_set const& qs) const {
530  return to_string(encodeSet(qs));
531 }
532 
534 
539  nfa::state_set states;
540  states.reserve(getStates().size());
541  size_t range = min(qs.size(), getStates().size());
542  for (size_t q = 0; q < range; q++) {
543  if (qs[q]) {
544  states.emplace(getStates()[q]);
545  }
546  }
547  return states;
548 }
549 
551 
556  valarray<bool> states(getStates().size());
557  for (size_t qi = 0; qi < getStates().size(); qi++) {
558  states[qi] = qs.count(getStates()[qi]);
559  }
560  return states;
561 }
562 
564 
567 string const& nfa::getInitialState() const { return getStates()[0]; }
568 
570 
574 vector<string> const& nfa::getStates() const { return pim->labels; }
575 
577 
581  return nfa::state_set(getStates().begin(), getStates().end());
582 }
583 
585 
590  return valarray<bool>(true, getStates().size());
591 }
592 
594 
598 vector<char32_t> const& nfa::getAlphabet_() const { return pim->u32alphabet; }
599 
601 
605 vector<string> const& nfa::getAlphabet() const { return pim->alphabet; }
606 
608 
613 bool nfa::isAccepting(size_t qi) const {
614  if (qi >= pim->labels.size()) {
615  throw state_not_found(qi);
616  }
617  return pim->accepting[qi];
618 }
619 
621 
626 bool nfa::isAccepting(string const& q) const {
627  try {
628  return isAccepting(index_of(getStates(), q));
629  } catch (state_not_found e) {
630  throw state_not_found(q);
631  }
632 }
633 
635 
639 bool nfa::isAccepting(valarray<bool> const& qs) const {
640  size_t range = min(qs.size(), getStates().size());
641  for (size_t q = 0; q < range; q++) {
642  if (qs[q] && pim->accepting[q]) {
643  return true;
644  }
645  }
646  return false;
647 }
648 
650 
659 bool nfa::isAccepting(nfa::state_set const& qs) const {
660  for (size_t qi = 0; qi < getStates().size(); qi++) {
661  if (pim->accepting[qi] && qs.count(getStates()[qi])) {
662  return true;
663  }
664  }
665  return false;
666 }
667 
669 
675  auto const& pim = n.pim;
676  if (pim->accepting[0]) {
677  return U"";
678  }
679  unordered_map<size_t, u32string> shortestWords(pim->labels.size());
680  size_t oldSize = 0;
681  shortestWords.emplace(0, U"");
682  while (shortestWords.size() > oldSize) {
683  oldSize = shortestWords.size();
684  for (auto const& stateWord : shortestWords) {
685  for (auto symbol : n.getAlphabet_()) {
686  valarray<bool> reached =
687  n.deltaHat_(stateWord.first, u32string(!!symbol, symbol));
688  u32string shWord = stateWord.second + u32string(!!symbol, symbol);
689  for (size_t q = 0; q < reached.size(); q++) {
690  if (reached[q]) {
691  if (pim->accepting[q]) {
692  return shWord;
693  }
694  if (!shortestWords.count(q)) {
695  shortestWords.emplace(q, shWord);
696  }
697  }
698  }
699  }
700  }
701  }
702  throw std::logic_error("This NFA doesn't accept any words!");
703 }
704 
706 string findShortestWord(nfa const& n) {
707  return converter.to_bytes(findShortestWord_(n));
708 }
709 
712 
721 fabuilder nfa::unite(nfa const& n1, nfa const& n2) {
722  return fabuilder(n1).unite(n2);
723 }
724 
727 
736 fabuilder nfa::intersect(nfa const& n1, nfa const& n2) {
737  return fabuilder(n1).intersect(n2);
738 }
739 
742 
750 fabuilder nfa::subtract(nfa const& n1, nfa const& n2) {
751  fabuilder b2(n2);
752  for (auto symbol : n1.getAlphabet_()) {
753  b2.addSymbol_(symbol);
754  }
755  b2.complement(true).minimize();
756  return fabuilder(n1).intersect(b2.buildNfa());
757 }
758 
761 
770  return fabuilder(n).complement(true).minimize();
771 }
772 
775 nfa::nfa(nfa const& n) : pim(new impl(*(n.pim))) {}
776 
779 
780 nfa::nfa(nfa&& n) : pim(new impl) { pim.swap(n.pim); }
781 
784 nfa& nfa::operator=(nfa const& n) {
785  if (this != &n) {
786  pim.reset(new impl(*(n.pim)));
787  }
788  return *this;
789 }
790 
793 
795  if (this != &n) {
796  pim.reset(new impl);
797  pim.swap(n.pim);
798  }
799  return *this;
800 }
801 
802 nfa::~nfa() = default;
803 
804 } // namespace reg
u32string findShortestWord_(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:447
vector< valarray< bool > > epsClosures
Cache for every state&#39;s ε-closures.
Definition: nfa.cpp:56
vector< char32_t > u32alphabet
Represents the set of processable symbols.
Definition: nfa.cpp:52
std::unordered_set< std::reference_wrapper< std::string const >, hash_reference_string_const, equal_to_reference_string_const > state_set
Nicer name for sets of names of states. Should store references to existing state names...
Definition: nfa.h:41
impl()
Constructs private implementation object for an NFA accepting the empty language ∅.
Definition: nfa.cpp:66
string findShortestWord(dfa const &d)
Searches the shortest UTF-8-encoded word accepted by a given DFA.
Definition: dfa.cpp:483
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:38
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: nfa.cpp:50
impl(vector< char32_t > &&u32alphabet, vector< vector< valarray< bool >>> &&transitions, vector< string > &&labels, valarray< bool > &&acceptingStates)
Constructs private implementation object with provided members and empty ε-closure cache...
Definition: nfa.cpp:77
Signals that an input symbol was used that the FA doesn't recognize.
Definition: utils.h:74
static fabuilder subtract(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the set difference of the languages of two NFAs...
Definition: nfa.cpp:750
Represents deterministic finite automata.
Definition: dfa.h:41
fabuilder & intersect(nfa const &other)
Makes the prospective FA accept only words accepted also by another FA.
Definition: fabuilder.cpp:683
bool operator==(nfa const &n) const
Checks whether this NFA describes the same regular language as another object.
Definition: nfa.cpp:123
std::vector< char32_t > const & getAlphabet_() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:598
std::string to_string(std::valarray< bool > const &qs) const
Puts a name to a set of indices.
Definition: nfa.cpp:506
nfa & operator=(nfa const &n)
Copy-assigns this NFA by copying another one's private implementation object.
Definition: nfa.cpp:784
std::valarray< bool > encodeSet(state_set const &qs) const
Translates a set of states from set to bool representation.
Definition: nfa.cpp:555
bool operator!=(nfa const &n) const
Checks whether this NFA describes a different regular language than another object.
Definition: nfa.cpp:133
state_set getStatesSet() const
Fetches this NFA's set of states as a set of (references to) strings.
Definition: nfa.cpp:580
std::valarray< bool > deltaHat(size_t qi, std::string const &word) const
Computes this DFA's transition function recursively for a UTF-8-encoded string of symbols...
Definition: nfa.cpp:275
Private implementation details of NFAs.
Definition: nfa.cpp:47
size_t index_of(C const &container, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: utils.h:43
Contains the reg::nfa class definition.
fabuilder & minimize(bool force=false)
Convenience method for chaining purge and merge to achieve proper DFA minimization.
Definition: fabuilder.cpp:570
Contains the reg::expression class defintion.
static fabuilder intersect(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the intersection of the languages of two NFAs.
Definition: nfa.cpp:736
dfa buildDfa(bool force=false)
Builds a DFA as defined by previous operations, including completion.
Definition: fabuilder.cpp:920
state_set decodeSet(std::valarray< bool > const &qs) const
Translates a set of states from bool to set representation.
Definition: nfa.cpp:538
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:613
vector< string > labels
Stores the names of states.
Definition: nfa.cpp:58
std::shared_ptr< dfa const > equivalent
Stores a minimal DFA accepting the same language as this object&#39;s NFA.
Definition: nfa.cpp:48
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: utils.h:52
fabuilder & addSymbol_(char32_t u32symbol)
Adds a symbol to the prospective FA's alphabet.
Definition: fabuilder.cpp:292
fabuilder & unite(nfa const &other)
Makes the prospective FA also accept every word of another FA&#39;s language.
Definition: fabuilder.cpp:582
fabuilder & normalizeStateNames(std::string const &prefix)
Reduces the prospective FA&#39;s state names to consecutive numbers, prefixed with a given string...
Definition: fabuilder.cpp:807
std::valarray< bool > const & delta_(size_t qi, char32_t u32symbol) const
Computes this NFA's transition function for a state index and a UTF-32-encoded symbol.
Definition: nfa.cpp:170
Signals that a state was mentioned that isn't part of the FA.
Definition: utils.h:55
Constructs NFAs step by step.
Definition: fabuilder.h:31
std::valarray< bool > getStatesBools() const
Fetches this NFA's set of states encoded as an array of bools.
Definition: nfa.cpp:589
Where this library lives.
Definition: dfa.cpp:48
nfa()
Constructs an NFA accepting the empty language ∅.
Definition: nfa.cpp:93
std::string const & getInitialState() const
Names this NFA's initial state.
Definition: nfa.cpp:567
vector< string > alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:54
static fabuilder unite(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the union of the languages of two NFAs.
Definition: nfa.cpp:721
std::valarray< bool > const & delta(size_t qi, size_t si) const
Computes this NFA's transition function for a state index and a symbol index.
Definition: nfa.cpp:146
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
Definition: nfa.cpp:418
std::vector< std::string > const & getStates() const
Fetches this NFA's set of states in order of internal representation.
Definition: nfa.cpp:574
std::valarray< bool > deltaHat_(size_t qi, std::u32string const &u32word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: nfa.cpp:250
static fabuilder complement(nfa const &n)
Creates a builder for an NFA accepting the complement of the language of an NFA.
Definition: nfa.cpp:769
nfa buildNfa()
Builds an NFA as defined by previous operations.
Definition: fabuilder.cpp:859
Contains the reg::dfa class definition.
vector< vector< valarray< bool > > > transitions
Stores the transition function as a table viz state index × symbol index → list of bools with true a...
Definition: nfa.cpp:59
Contains the reg::fabuilder class definition.
std::vector< std::string > const & getAlphabet() const
Fetches this NFA's set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:605
fabuilder & complement(bool force=false)
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: fabuilder.cpp:780