reglibcpp  1.0.0
(Naïve) C++ implementation of models for regular languages
gnfa.cpp
Go to the documentation of this file.
1 #include "gnfa.h"
3 using std::vector;
4 using std::pair;
5 using std::string;
6 using std::shared_ptr;
7 using std::unique_ptr;
8 
9 #include <unordered_set>
10 using std::unordered_set;
11 
12 #include <unordered_map>
13 using std::unordered_map;
14 
15 #include <functional>
16 using std::reference_wrapper;
17 
18 #include <forward_list>
19 using std::forward_list;
20 
21 #include "nfa.h"
22 
23 namespace reg {
24 
26 struct gnfa::pImpl {
27  unordered_map<string,unordered_map<string,expression::exptr>> transitions;
29  string initial;
31  string accepting;
33 
35  pImpl() : initial("p0"), accepting("pF") {
38  }
39 
41  pImpl(unordered_map<string,unordered_map<string,expression::exptr>>& transitionMap,
42  string const& initialState,
43  forward_list<reference_wrapper<string const>>& acceptingStates) : transitions(move(transitionMap)) {
44  initial = generateState('p', '0');
45  auto const& epsilon = expression::spawnEmptyString();
46  transitions[initial][initialState] = epsilon;
47  accepting = generateState('p', 'F');
48  for (string const& a : acceptingStates) {
49  transitions[a][accepting] = epsilon;
50  }
51  }
52 
54 
60  expression::exptr const& getTransition(string const& qLabel, string const& pLabel) const {
61  auto const& from = transitions.at(qLabel);
62  auto reIt = from.find(pLabel);
63  if (reIt == from.end() || !reIt->second) {
65  }
66  return reIt->second;
67  }
68 
70 
81  void addTransition(string const& qLabel, string const& pLabel, expression::exptr re, bool optimized = true, bool aggressive = false) {
82  transitions[qLabel][pLabel] = expression::spawnAlternation(getTransition(qLabel, pLabel), re, optimized, aggressive);
83  }
84 
86 
97  string const& generateState(char prefix = 'q', char suffix = '\0') {
98  string state;
99  if (suffix == '\0') {
100  state = string(1, prefix).append(std::to_string(0));
101  for (size_t q(1); transitions.count(state) != 0; q++) {
102  state = string(1, prefix).append(std::to_string(q));
103  }
104  } else {
105  for (state = string(1, prefix).append(1, suffix);
106  transitions.count(state) != 0;
107  state.append(1, suffix)
108  ) {}
109  }
110  return transitions.insert(std::make_pair(state, unordered_map<string, expression::exptr>())).first->first;
111  }
112 };
113 
115 
125 gnfa::gnfa(nfa const& n) {
126  unordered_map<string,unordered_map<string,expression::exptr>> transitions(n.getNumberOfStates()+2);
127  forward_list<reference_wrapper<string const>> acceptingStates;
128  for (size_t qi(0); qi < n.getNumberOfStates(); qi++) {
129  auto const& qLabel = n.getLabelOf(qi);
130  auto& entry = transitions[qLabel];
131  if (n.isAccepting(qi)) {
132  acceptingStates.push_front(qLabel);
133  }
134  for (auto const& symbol : n.getAlphabet()) {
135  auto& pSet = n.delta(qi, symbol);
136  for (size_t pi(0); pi < pSet.size(); pi++) {
137  if (pSet[pi]) {
138  auto& dest = entry[n.getLabelOf(pi)];
139  if (!dest) {
140  dest = expression::spawnSymbol(symbol);
141  } else {
143  }
144  }
145  }
146  }
147  }
148  p = unique_ptr<pImpl>(new pImpl(transitions, n.getLabelOf(0), acceptingStates));
149 }
150 
152 
159  p->transitions[p->initial][p->accepting] = r;
160 }
161 
162 gnfa::gnfa(expression const& r) : p(new pImpl) {
163  switch (r.getOperation()) {
164  case expression::operation::alternation :
165  p->transitions[p->initial][p->accepting] = expression::spawnAlternation(*r.begin(), *(r.begin()+1));
166  break;
167  case expression::operation::concatenation :
168  p->transitions[p->initial][p->accepting] = expression::spawnConcatenation(*r.begin(), *(r.begin()+1));
169  break;
170  case expression::operation::kleene :
171  p->transitions[p->initial][p->accepting] = expression::spawnKleene(*r.begin());
172  break;
173  case expression::operation::symbol :
174  p->transitions[p->initial][p->accepting] = expression::spawnSymbol(r.extractSymbol());
175  break;
176  case expression::operation::empty :
177  p->transitions[p->initial][p->accepting] = expression::spawnEmptySet();
178  break;
179  }
180 }
181 
183 string const& gnfa::getInitialState() const {
184  return p->initial;
185 }
186 
188 string const& gnfa::getAcceptingState() const {
189  return p->accepting;
190 }
191 
193 vector<reference_wrapper<string const>> gnfa::getActiveStates() const {
194  vector<reference_wrapper<string const>> active;
195  for (auto const& entry : p->transitions) {
196  if (entry.first != p->initial && entry.first != p->accepting) {
197  active.push_back(entry.first);
198  }
199  }
200  return active;
201 }
202 
204 expression::exptr gnfa::getTransition(string const& qLabel, string const& pLabel) const {
205  return p->getTransition(qLabel, pLabel);
206 }
207 
209 
213 vector<pair<string const&, string const&>> gnfa::getSplittableTransitions() const {
214  vector<pair<string const&, string const&>> splittable;
215  splittable.reserve(p->transitions.size()*p->transitions.size());
216  for (pair<string const, unordered_map<string, expression::exptr>> const& from : p->transitions) {
217  for (pair<string const, expression::exptr> const& to : from.second) {
218  if (to.second && to.second->getOperation() > expression::operation::symbol) {
219  splittable.push_back(std::make_pair(std::ref(from.first), std::ref(to.first)));
220  }
221  }
222  }
223  splittable.shrink_to_fit();
224  return splittable;
225 }
226 
228 
236 vector<reference_wrapper<string const>> gnfa::splitTransition(string const& qLabel, string const& pLabel) {
237  vector<reference_wrapper<string const>> newStates;
238  auto const& re = p->getTransition(qLabel, pLabel);
239  switch (re->getOperation()) {
240  case expression::operation::alternation : {
241  auto sub1 = *(re->begin());
242  auto sub2 = *(re->begin()+1);
243  p->transitions[qLabel][pLabel] = expression::spawnEmptySet();
244  newStates.reserve(4);
245  for (size_t i(0); i < 4; i++) {
246  newStates.push_back(p->generateState());
247  }
248  auto const& epsilon = expression::spawnEmptyString();
249  p->transitions[qLabel][newStates[0]] = epsilon;
250  p->transitions[newStates[0]][newStates[1]] = sub1;
251  p->transitions[newStates[1]][pLabel] = epsilon;
252  p->transitions[qLabel][newStates[2]] = epsilon;
253  p->transitions[newStates[2]][newStates[3]] = sub2;
254  p->transitions[newStates[3]][pLabel] = epsilon;
255  break;
256  }
257  case expression::operation::concatenation : {
258  auto sub1 = *(re->begin());
259  auto sub2 = *(re->begin()+1);
260  p->transitions[qLabel][pLabel] = expression::spawnEmptySet();
261  newStates.reserve(2);
262  for (size_t i(0); i < 2; i++) {
263  newStates.push_back(p->generateState());
264  }
265  p->transitions[qLabel][newStates[0]] = sub1;
266  p->transitions[newStates[0]][newStates[1]] = expression::spawnEmptyString();
267  p->transitions[newStates[1]][pLabel] = sub2;
268  break;
269  }
270  case expression::operation::kleene : {
271  auto sub = *(re->begin());
272  auto const& epsilon = expression::spawnEmptyString();
273  p->transitions[qLabel][pLabel] = epsilon;
274  newStates.reserve(2);
275  for (size_t i(0); i < 2; i++) {
276  newStates.push_back(p->generateState());
277  }
278  p->transitions[qLabel][newStates[0]] = epsilon;
279  p->transitions[newStates[1]][pLabel] = epsilon;
280  p->transitions[newStates[1]][newStates[0]] = epsilon;
281  p->transitions[newStates[0]][newStates[1]] = sub;
282  break;
283  }
284  case expression::operation::symbol :
285  case expression::operation::empty :
286  ;
287  }
288  return newStates;
289 }
290 
292 
301  for (auto splittable = getSplittableTransitions(); !splittable.empty(); splittable = getSplittableTransitions()) {
302  for (auto const& nodes : splittable) {
303  splitTransition(nodes.first, nodes.second);
304  }
305  }
306  nfa::builder b;
307  auto const& empty = expression::spawnEmptySet();
308  for (auto const& from : p->transitions) {
309  for (auto const& to : from.second) {
310  if (to.second != empty) {
311  b.addTransition(from.first, to.first, to.second->extractSymbol());
312  }
313  }
314  }
315  b.makeInitial(p->initial);
316  b.setAccepting(p->accepting, true);
317  return b.build();
318 }
319 
321 void gnfa::bypassTransition(string const& qLabel, string const& pLabel) {
322  auto const& re = p->getTransition(qLabel, pLabel);
323  if (re == expression::spawnEmptySet()) {
324  return;
325  }
326  auto const& selfRe = p->getTransition(qLabel, qLabel);
327  for (auto const& from : p->transitions) {
328  if (from.first != qLabel) {
329  auto to = from.second.find(qLabel);
330  if (to != from.second.end() && to->second) {
331  p->addTransition(
332  from.first,
333  pLabel,
335  to->second,
337  expression::spawnKleene(selfRe),
338  re
339  )
340  )
341  );
342  }
343  }
344  }
345  p->transitions[qLabel].erase(pLabel);
346 }
347 
349 void gnfa::ripState(string const& qLabel) {
350  forward_list<reference_wrapper<string const>> right;
351  for (auto const& to : p->transitions[qLabel]) {
352  right.push_front(to.first);
353  }
354  for (auto const& to : right) {
355  if (to.get() != qLabel) {
356  bypassTransition(qLabel, to);
357  }
358  }
359  for (auto& from : p->transitions) {
360  from.second.erase(qLabel);
361  }
362  p->transitions.erase(qLabel);
363 }
364 
366 
371  for (auto const& rippable : getActiveStates()) {
372  ripState(rippable);
373  }
375 }
376 
378 gnfa::gnfa(gnfa const& n) : p(new pImpl(*(n.p))) {}
379 
381 
382 gnfa::gnfa(gnfa&& n) : p(new pImpl) {p.swap(n.p);}
383 
386  if (this != &n) {
387  p.reset(new pImpl(*(n.p)));
388  }
389  return *this;
390 }
391 
393 
395  if (this != &n) {
396  p.reset(new pImpl);
397  p.swap(n.p);
398  }
399  return *this;
400 }
401 
402 gnfa::~gnfa() = default;
403 }
pImpl()
Constructs private implementation object for a GNFA accepting the empty language ∅.
Definition: gnfa.cpp:35
void addTransition(string const &qLabel, string const &pLabel, expression::exptr re, bool optimized=true, bool aggressive=false)
Safely adds a transition RE between two states.
Definition: gnfa.cpp:81
void ripState(std::string const &qLabel)
Removes a state, bypassing all its outgoing transitions.
Definition: gnfa.cpp:349
std::vector< std::reference_wrapper< std::string const > > getActiveStates() const
Reveals the names of this GNFA&#39;s non-initial, non-accepting states.
Definition: gnfa.cpp:193
nfa build()
Builds the NFA, as defined by previous operations.
Definition: nfa.cpp:495
expression::exptr getTransition(std::string const &qLabel, std::string const &pLabel) const
Extracts the RE labelling the transition between two states.
Definition: gnfa.cpp:204
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:22
unordered_map< string, unordered_map< string, expression::exptr > > transitions
Holds the transition table viz state × state → regular expression.
Definition: gnfa.cpp:27
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA&#39;s set of processable symbols.
Definition: nfa.cpp:268
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Definition: nfa.cpp:237
std::vector< exptr >::const_iterator begin() const
Returns an iterator pointing to this RE&#39;s first subexpression.
Definition: expression.cpp:302
static exptr const & spawnEmptyString()
Gives an RE representing the empty string ε.
Definition: expression.cpp:50
Constructs NFAs step by step.
Definition: nfa.h:52
gnfa(nfa const &n)
Constructs a GNFA with the same states and transitions as a given NFA.
Definition: gnfa.cpp:125
nfa splitAllTransitions()
Splits all transitions until only ∅, ε, and symbol REs remain and builds the resulting NFA...
Definition: gnfa.cpp:300
expression::exptr ripAllStates()
Removes all active states, constructing an RE semantically equivalent to this GNFA.
Definition: gnfa.cpp:370
std::string const & getAcceptingState() const
Reveals the name of this GNFA&#39;s accept state.
Definition: gnfa.cpp:188
static exptr spawnAlternation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the alternation of two given REs.
Definition: expression.cpp:117
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:296
Represents formal regular expressions.
Definition: expression.h:31
char32_t extractSymbol() const
Reports this symbol expression&#39;s UTF-32-encoded symbol.
Definition: expression.cpp:229
std::string const & getInitialState() const
Reveals the name of this GNFA&#39;s initial state.
Definition: gnfa.cpp:183
gnfa & operator=(gnfa const &n)
Copy-assigns this GNFA by copying another one&#39;s private implementation object.
Definition: gnfa.cpp:385
static exptr const & spawnSymbol(char32_t symbol)
Gives an RE representing the given UTF-32-encoded symbol.
Definition: expression.cpp:61
std::vector< std::pair< std::string const &, std::string const & > > getSplittableTransitions() const
Reveals this GNFA&#39;s splittable transitions.
Definition: gnfa.cpp:213
string initial
Holds the name of the initial state.
Definition: gnfa.cpp:29
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective NFA.
Definition: nfa.cpp:442
expression::exptr const & getTransition(string const &qLabel, string const &pLabel) const
Safely fetches a transition RE between two states.
Definition: gnfa.cpp:60
Contains the reg::nfa class definition.
operation getOperation() const
Reports this RE&#39;s function.
Definition: expression.cpp:195
Represents generalized nondeterministic finite automata.
Definition: gnfa.h:18
string const & generateState(char prefix='q', char suffix='\0')
Generates a unique new state name with given prefix (and suffix).
Definition: gnfa.cpp:97
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective NFA.
Definition: nfa.cpp:461
std::vector< std::reference_wrapper< std::string const > > splitTransition(std::string const &qLabel, std::string const &pLabel)
Splits a transition between two states, adding new states if needed.
Definition: gnfa.cpp:236
size_t getNumberOfStates() const
Counts this NFA&#39;s states.
Definition: nfa.cpp:286
void bypassTransition(std::string const &qLabel, std::string const &pLabel)
Removes a transition between two states and replaces it with equivalent ones, bypassing its beginning...
Definition: gnfa.cpp:321
pImpl(unordered_map< string, unordered_map< string, expression::exptr >> &transitionMap, string const &initialState, forward_list< reference_wrapper< string const >> &acceptingStates)
Constructs private implementation object with provided members.
Definition: gnfa.cpp:41
Contains the reg::gnfa class definition.
Definition: dfa.cpp:32
builder & addTransition(std::string const &from, std::string const &to, char32_t symbol)
Adds a transition for the prospective NFA.
Definition: nfa.cpp:475
std::valarray< bool > const & delta(size_t q, char32_t symbol) const
Computes this NFA&#39;s transition function for a state and a symbol.
Definition: nfa.cpp:106
std::shared_ptr< expression const > exptr
This is the type used to handle regular expressions.
Definition: expression.h:43
string accepting
Holds the name of the accept state.
Definition: gnfa.cpp:31
static exptr const & spawnEmptySet()
Gives an RE representing the empty set ∅.
Definition: expression.cpp:40
static exptr spawnKleene(exptr const &b, bool optimized=true, bool aggressive=false)
Gives an RE representing the Kleene closure of a given RE.
Definition: expression.cpp:152
static exptr spawnConcatenation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the concatenation of two given REs.
Definition: expression.cpp:81
Private implementation details of GNFAs.
Definition: gnfa.cpp:26