reglibcpp  2.0.0
A C++ implementation of models for regular languages
gnfa.cpp
Go to the documentation of this file.
1 // Copyright 2017, 2018 Tom Kranz
2 //
3 // This file is part of reglibcpp.
4 //
5 // reglibcpp is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // reglibcpp is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with reglibcpp. If not, see <https://www.gnu.org/licenses/>.
17 
19 #include "gnfa.h"
20 using std::pair;
21 using std::string;
22 using std::vector;
23 
24 #include <unordered_map>
25 using std::unordered_map;
26 
27 #include <functional>
29 
30 #include <forward_list>
31 using std::forward_list;
32 
33 #include "dfa.h"
34 #include "fabuilder.h"
35 #include "nfa.h"
36 
37 namespace reg {
38 
40 struct gnfa::impl {
46  string initial;
48  string accepting;
50 
53  impl() : initial("p0"), accepting("pF") {
56  }
57 
60  transitionMap,
61  string const& initialState,
63  nfa const* eq)
64  : equivalent(eq), transitions(move(transitionMap)) {
65  initial = generateState('p', '0');
66  auto const& epsilon = expression::spawnEmptyString();
67  transitions[initial][initialState] = epsilon;
68  accepting = generateState('p', 'F');
69  for (string const& a : acceptingStates) {
70  transitions[a][accepting] = epsilon;
71  }
72  }
73 
75 
81  expression::exptr const& getTransition(string const& qLabel,
82  string const& pLabel) const {
83  auto const& from = transitions.at(qLabel);
84  auto reIt = from.find(pLabel);
85  if (reIt == from.end() || !reIt->second) {
87  }
88  return reIt->second;
89  }
90 
92 
102  void addTransition(string const& qLabel, string const& pLabel,
103  expression::exptr re, bool optimized = true,
104  bool aggressive = false) {
105  transitions[qLabel][pLabel] = expression::spawnAlternation(
106  getTransition(qLabel, pLabel), re, optimized, aggressive);
107  }
108 
110 
124  string const& generateState(char prefix = 'q', char suffix = '\0') {
125  string state;
126  if (suffix == '\0') {
127  state = string(!!prefix, prefix).append(std::to_string(0));
128  for (size_t q(1); transitions.count(state) != 0; q++) {
129  state = string(!!prefix, prefix).append(std::to_string(q));
130  }
131  } else {
132  for (state = string(!!prefix, prefix).append(1, suffix);
133  transitions.count(state) != 0; state.append(!!prefix, suffix)) {
134  }
135  }
136  return transitions
137  .emplace(move(state), unordered_map<string, expression::exptr>())
138  .first->first;
139  }
140 };
141 
143 
156 gnfa::gnfa(dfa const& d) {
158  d.getStates().size() + 2);
160  for (size_t qi(0); qi < d.getStates().size(); qi++) {
161  auto const& qLabel = d.getStates()[qi];
162  auto& entry = transitions[qLabel];
163  if (d.isAccepting(qi)) {
164  acceptingStates.push_front(qLabel);
165  }
166  for (size_t si(0); si < d.getAlphabet_().size(); si++) {
167  char32_t symbol = d.getAlphabet_()[si];
168  size_t pi = d.delta(qi, si);
169  auto& dest = entry[d.getStates()[pi]];
170  if (!dest) {
171  dest = expression::spawnSymbol_(symbol);
172  } else {
173  dest = expression::spawnAlternation(dest,
174  expression::spawnSymbol_(symbol));
175  }
176  }
177  }
178  pim = std::make_unique<impl>(transitions, d.getInitialState(),
179  acceptingStates, new nfa(d));
180 }
181 
183 
196 gnfa::gnfa(nfa const& n) {
198  n.getStates().size() + 2);
200  for (size_t qi(0); qi < n.getStates().size(); qi++) {
201  auto const& qLabel = n.getStates()[qi];
202  auto& entry = transitions[qLabel];
203  if (n.isAccepting(qi)) {
204  acceptingStates.push_front(qLabel);
205  }
206  for (char32_t symbol : n.getAlphabet_()) {
207  auto& pSet = n.delta_(qi, symbol);
208  for (size_t pi(0); pi < pSet.size(); pi++) {
209  if (pSet[pi]) {
210  auto& dest = entry[n.getStates()[pi]];
211  if (!dest) {
212  dest = expression::spawnSymbol_(symbol);
213  } else {
215  dest, expression::spawnSymbol_(symbol));
216  }
217  }
218  }
219  }
220  }
221  pim = std::make_unique<impl>(transitions, n.getInitialState(),
222  acceptingStates, new nfa(n));
223 }
224 
226 
232  pim->transitions[pim->initial][pim->accepting] = r;
233 }
234 
235 gnfa::gnfa(expression const& r) : pim(new impl) {
236  switch (r.getOperation()) {
237  case expression::operation::alternation:
238  pim->transitions[pim->initial][pim->accepting] =
239  expression::spawnAlternation(*r.begin(), *(r.begin() + 1));
240  break;
241  case expression::operation::concatenation:
242  pim->transitions[pim->initial][pim->accepting] =
243  expression::spawnConcatenation(*r.begin(), *(r.begin() + 1));
244  break;
245  case expression::operation::kleene:
246  pim->transitions[pim->initial][pim->accepting] =
248  break;
249  case expression::operation::symbol:
250  pim->transitions[pim->initial][pim->accepting] =
252  break;
253  case expression::operation::empty:
254  pim->transitions[pim->initial][pim->accepting] =
256  break;
257  }
258 }
259 
261 string const& gnfa::getInitialState() const { return pim->initial; }
262 
264 string const& gnfa::getAcceptingState() const { return pim->accepting; }
265 
268  state_vector active;
269  for (auto const& entry : pim->transitions) {
270  if (entry.first != pim->initial && entry.first != pim->accepting) {
271  active.push_back(std::cref(entry.first));
272  }
273  }
274  return active;
275 }
276 
279  string const& pLabel) const {
280  return pim->getTransition(qLabel, pLabel);
281 }
282 
284 
289  state_pair_vector splittable;
290  splittable.reserve(pim->transitions.size() * pim->transitions.size());
291  for (auto const& from : pim->transitions) {
292  for (auto const& to : from.second) {
293  if (to.second &&
294  to.second->getOperation() > expression::operation::symbol) {
295  splittable.emplace_back(std::ref(from.first), std::ref(to.first));
296  }
297  }
298  }
299  splittable.shrink_to_fit();
300  return splittable;
301 }
302 
304 
312 gnfa::state_vector gnfa::splitTransition(string const& q, string const& p) {
313  state_vector newStates;
314  auto const& re = pim->getTransition(q, p);
315  switch (re->getOperation()) {
316  case expression::operation::alternation: {
317  auto sub1 = *(re->begin());
318  auto sub2 = *(re->begin() + 1);
319  pim->transitions[q][p] = expression::spawnEmptySet();
320  newStates.reserve(4);
321  for (size_t i(0); i < 4; i++) {
322  newStates.push_back(pim->generateState());
323  }
324  auto const& epsilon = expression::spawnEmptyString();
325  pim->transitions[q][newStates[0]] = epsilon;
326  pim->transitions[newStates[0]][newStates[1]] = sub1;
327  pim->transitions[newStates[1]][p] = epsilon;
328  pim->transitions[q][newStates[2]] = epsilon;
329  pim->transitions[newStates[2]][newStates[3]] = sub2;
330  pim->transitions[newStates[3]][p] = epsilon;
331  break;
332  }
333  case expression::operation::concatenation: {
334  auto sub1 = *(re->begin());
335  auto sub2 = *(re->begin() + 1);
336  pim->transitions[q][p] = expression::spawnEmptySet();
337  newStates.reserve(2);
338  for (size_t i(0); i < 2; i++) {
339  newStates.push_back(pim->generateState());
340  }
341  pim->transitions[q][newStates[0]] = sub1;
342  pim->transitions[newStates[0]][newStates[1]] =
344  pim->transitions[newStates[1]][p] = sub2;
345  break;
346  }
347  case expression::operation::kleene: {
348  auto sub = *(re->begin());
349  auto const& epsilon = expression::spawnEmptyString();
350  pim->transitions[q][p] = epsilon;
351  newStates.reserve(2);
352  for (size_t i(0); i < 2; i++) {
353  newStates.push_back(pim->generateState());
354  }
355  pim->transitions[q][newStates[0]] = epsilon;
356  pim->transitions[newStates[1]][p] = epsilon;
357  pim->transitions[newStates[1]][newStates[0]] = epsilon;
358  pim->transitions[newStates[0]][newStates[1]] = sub;
359  break;
360  }
361  case expression::operation::symbol:
362  case expression::operation::empty:;
363  }
364  return newStates;
365 }
366 
369 
379  for (auto splittable = getSplittableTransitions(); !splittable.empty();
380  splittable = getSplittableTransitions()) {
381  for (auto const& nodes : splittable) {
382  splitTransition(nodes.first, nodes.second);
383  }
384  }
385  fabuilder b;
386  auto const& empty = expression::spawnEmptySet();
387  for (auto const& from : pim->transitions) {
388  for (auto const& to : from.second) {
389  if (to.second != empty) {
390  b.addTransition_(from.first, to.first, to.second->extractSymbol_());
391  }
392  }
393  }
394  b.makeInitial(pim->initial);
395  b.setAccepting(pim->accepting, true);
396  return b.buildNfa();
397 }
398 
401 void gnfa::bypassTransition(string const& qLabel, string const& pLabel) {
402  auto const& re = pim->getTransition(qLabel, pLabel);
403  if (re == expression::spawnEmptySet()) {
404  return;
405  }
406  auto const& selfRe = pim->getTransition(qLabel, qLabel);
407  for (auto const& from : pim->transitions) {
408  if (from.first != qLabel) {
409  auto to = from.second.find(qLabel);
410  if (to != from.second.end() && to->second) {
411  pim->addTransition(
412  from.first, pLabel,
414  to->second, expression::spawnConcatenation(
415  expression::spawnKleene(selfRe), re)));
416  }
417  }
418  }
419  pim->transitions[qLabel].erase(pLabel);
420 }
421 
423 void gnfa::ripState(string const& qLabel) {
425  for (auto const& to : pim->transitions[qLabel]) {
426  right.push_front(to.first);
427  }
428  for (auto const& to : right) {
429  if (to.get() != qLabel) {
430  bypassTransition(qLabel, to);
431  }
432  }
433  for (auto& from : pim->transitions) {
434  from.second.erase(qLabel);
435  }
436  pim->transitions.erase(qLabel);
437 }
438 
441 
446  for (auto const& rippable : getActiveStates()) {
447  ripState(rippable);
448  }
450 }
451 
454 gnfa::gnfa(gnfa const& n) : pim(new impl(*(n.pim))) {}
455 
458 
462 gnfa::gnfa(gnfa&& n) : pim(new impl) { pim.swap(n.pim); }
463 
464 #ifndef DOXYGEN_SHOULD_SKIP_THIS
465 gnfa::operator nfa const&() const {
466  if (!pim->equivalent) {
467  pim->equivalent =
468  std::make_shared<nfa const>(gnfa(*this).splitAllTransitions());
469  }
470  return *pim->equivalent;
471 }
472 #endif // DOXYGEN_SHOULD_SKIP_THIS
473 
476 
480 bool gnfa::operator==(nfa const& other) const {
481  return other.operator==(*this);
482 }
483 
486 
490 bool gnfa::operator!=(nfa const& other) const { return !operator==(other); }
491 
495  if (this != &n) {
496  pim.reset(new impl(*(n.pim)));
497  }
498  return *this;
499 }
500 
503 
508  if (this != &n) {
509  pim.reset(new impl);
510  pim.swap(n.pim);
511  }
512  return *this;
513 }
514 
515 gnfa::~gnfa() = default;
516 } // namespace reg
void ripState(std::string const &q)
Removes a state, bypassing all its outgoing transitions.
Definition: gnfa.cpp:423
std::string const & getInitialState() const
Names this DFA's initial state.
Definition: dfa.cpp:391
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:420
std::shared_ptr< nfa const > equivalent
Points to a DFA accepting the same language as the owning GNFA.
Definition: gnfa.cpp:41
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:38
bool operator!=(nfa const &n) const
Checks whether this RE describes a different regular language than another object.
Definition: gnfa.cpp:490
expression::exptr getTransition(std::string const &q, std::string const &p) const
Extracts the RE labelling the transition between two states.
Definition: gnfa.cpp:278
Represents deterministic finite automata.
Definition: dfa.h:41
std::vector< exptr >::const_iterator begin() const
Returns an iterator pointing to this RE's first subexpression.
Definition: expression.cpp:378
static exptr const & spawnEmptyString()
Gives an RE representing the empty string ε.
Definition: expression.cpp:89
nfa splitAllTransitions()
Splits all transitions until only ∅, ε, and symbol REs remain and builds the resulting NFA...
Definition: gnfa.cpp:378
expression::exptr ripAllStates()
Removes all active states, constructing an RE semantically equivalent to this GNFA.
Definition: gnfa.cpp:445
std::string const & getAcceptingState() const
Reveals the name of this GNFA's accept state.
Definition: gnfa.cpp:264
static exptr spawnAlternation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the alternation of two given REs.
Definition: expression.cpp:178
std::vector< char32_t > const & getAlphabet_() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:598
Represents formal regular expressions.
Definition: expression.h:46
std::string const & getInitialState() const
Reveals the name of this GNFA's initial state.
Definition: gnfa.cpp:261
static exptr const & spawnSymbol_(char32_t u32symbol)
Gives an RE representing the given UTF-32-encoded symbol.
Definition: expression.cpp:100
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
Definition: dfa.cpp:215
string accepting
Holds the name of the accept state.
Definition: gnfa.cpp:48
bool operator==(nfa const &n) const
Checks whether this RE describes the same regular language as another object.
Definition: gnfa.cpp:480
string initial
Holds the name of the initial state.
Definition: gnfa.cpp:46
gnfa & operator=(gnfa const &n)
Copy-assigns this GNFA by copying another one's private implementation object.
Definition: gnfa.cpp:494
impl()
Constructs private implementation object for a GNFA accepting the empty language ∅.
Definition: gnfa.cpp:53
Contains the reg::nfa class definition.
operation getOperation() const
Reports this RE's function.
Definition: expression.cpp:269
state_vector splitTransition(std::string const &q, std::string const &p)
Splits a transition between two states, adding new states if needed.
Definition: gnfa.cpp:312
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:613
Represents generalized nondeterministic finite automata.
Definition: gnfa.h:35
expression::exptr const & getTransition(string const &qLabel, string const &pLabel) const
Safely fetches a transition RE between two states.
Definition: gnfa.cpp:81
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
Definition: dfa.cpp:398
std::valarray< bool > const & delta_(size_t qi, char32_t u32symbol) const
Computes this NFA's transition function for a state index and a UTF-32-encoded symbol.
Definition: nfa.cpp:170
fabuilder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective FA.
Definition: fabuilder.cpp:238
Contains the reg::gnfa class definition.
Constructs NFAs step by step.
Definition: fabuilder.h:31
state_vector getActiveStates() const
Reveals the names of this GNFA's non-initial, non-accepting states.
Definition: gnfa.cpp:267
Where this library lives.
Definition: dfa.cpp:48
fabuilder & makeInitial(std::string const &state)
Resets the initial state for the prospective FA.
Definition: fabuilder.cpp:256
std::string const & getInitialState() const
Names this NFA's initial state.
Definition: nfa.cpp:567
string const & generateState(char prefix='q', char suffix='\0')
Generates a unique new state name with given prefix (and suffix).
Definition: gnfa.cpp:124
unordered_map< string, unordered_map< string, expression::exptr > > transitions
Holds the transition table viz state × state → regular expression.
Definition: gnfa.cpp:43
char32_t extractSymbol_() const
Reports this symbol expression's UTF-32-encoded symbol.
Definition: expression.cpp:309
Private implementation details of GNFAs.
Definition: gnfa.cpp:40
std::vector< std::string > const & getStates() const
Fetches this NFA's set of states in order of internal representation.
Definition: nfa.cpp:574
gnfa(dfa const &d)
Constructs a GNFA with the same states and transitions as a given DFA.
Definition: gnfa.cpp:156
void addTransition(string const &qLabel, string const &pLabel, expression::exptr re, bool optimized=true, bool aggressive=false)
Safely adds a transition RE between two states.
Definition: gnfa.cpp:102
fabuilder & addTransition_(std::string const &from, std::string const &to, char32_t u32symbol=U'\0')
Adds a transition to the prospective FA's state transition function.
Definition: fabuilder.cpp:308
static exptr const & spawnEmptySet()
Gives an RE representing the empty set ∅.
Definition: expression.cpp:80
nfa buildNfa()
Builds an NFA as defined by previous operations.
Definition: fabuilder.cpp:859
Contains the reg::dfa class definition.
state_pair_vector getSplittableTransitions() const
Reveals this GNFA's splittable transitions.
Definition: gnfa.cpp:288
std::vector< char32_t > const & getAlphabet_() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:405
Contains the reg::fabuilder class definition.
static exptr spawnKleene(exptr const &b, bool optimized=true, bool aggressive=false)
Gives an RE representing the Kleene closure of a given RE.
Definition: expression.cpp:224
static exptr spawnConcatenation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the concatenation of two given REs.
Definition: expression.cpp:135
void bypassTransition(std::string const &q, std::string const &p)
Removes a transition between two states and replaces it with equivalent ones, bypassing its beginning...
Definition: gnfa.cpp:401
impl(unordered_map< string, unordered_map< string, expression::exptr >> &transitionMap, string const &initialState, forward_list< reference_wrapper< string const >> &acceptingStates, nfa const *eq)
Constructs private implementation object with provided members.
Definition: gnfa.cpp:59