21 using std::make_unique;
31 #include <unordered_map> 84 alphabet.reserve(this->u32alphabet.size());
85 for (char32_t symbol : this->u32alphabet) {
104 : pim(new
impl(move(alphabet), move(transitions), move(labels),
105 move(acceptingStates))) {}
107 #ifndef DOXYGEN_SHOULD_SKIP_THIS 108 nfa::operator
dfa const&()
const {
109 if (!pim->equivalent) {
110 pim->equivalent = std::make_shared<dfa const>(
113 return *pim->equivalent;
115 #endif // DOXYGEN_SHOULD_SKIP_THIS 124 return static_cast<dfa const&
>(*this).operator==(n);
134 return static_cast<dfa const&
>(*this).operator!=(n);
147 if (si >= pim->alphabet.size()) {
150 if (qi >= pim->labels.size()) {
153 return pim->transitions[qi][si];
251 if (qi >= pim->labels.size()) {
336 for (char32_t symbol : u32word) {
366 string const& word)
const {
407 string const& word)
const {
419 if (qi >= pim->labels.size()) {
422 if (pim->epsClosures[qi].size() == 0) {
423 pim->epsClosures[qi].resize(pim->labels.size());
424 pim->epsClosures[qi][qi] =
true;
430 for (
bool qqb : old) {
433 for (
bool pb : pim->transitions[qqi][0]) {
435 if (!pim->epsClosures[qi][pi]) {
437 pim->epsClosures[qi][pi] =
true;
447 return pim->epsClosures[qi];
477 size_t range = min(qs.size(),
getStates().size());
478 for (
size_t q = 0; q < range; q++) {
508 size_t range = min(qs.size(),
getStates().size());
509 for (
size_t q = 0; q < range; q++) {
511 label = label.append(1, label.length() == 0 ?
'{' :
',');
516 if (label.length() == 0) {
519 return label.append(1,
'}');
541 size_t range = min(qs.size(),
getStates().size());
542 for (
size_t q = 0; q < range; q++) {
557 for (
size_t qi = 0; qi <
getStates().size(); qi++) {
614 if (qi >= pim->labels.size()) {
617 return pim->accepting[qi];
640 size_t range = min(qs.size(),
getStates().size());
641 for (
size_t q = 0; q < range; q++) {
642 if (qs[q] && pim->accepting[q]) {
660 for (
size_t qi = 0; qi <
getStates().size(); qi++) {
661 if (pim->accepting[qi] && qs.count(
getStates()[qi])) {
675 auto const& pim = n.pim;
676 if (pim->accepting[0]) {
681 shortestWords.emplace(0, U
"");
682 while (shortestWords.size() > oldSize) {
683 oldSize = shortestWords.size();
684 for (
auto const& stateWord : shortestWords) {
689 for (
size_t q = 0; q < reached.size(); q++) {
691 if (pim->accepting[q]) {
694 if (!shortestWords.count(q)) {
695 shortestWords.emplace(q, shWord);
786 pim.reset(
new impl(*(n.pim)));
802 nfa::~nfa() =
default;
u32string findShortestWord_(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
vector< valarray< bool > > epsClosures
Cache for every state's ε-closures.
vector< char32_t > u32alphabet
Represents the set of processable symbols.
std::unordered_set< std::reference_wrapper< std::string const >, hash_reference_string_const, equal_to_reference_string_const > state_set
Nicer name for sets of names of states. Should store references to existing state names...
impl()
Constructs private implementation object for an NFA accepting the empty language ∅.
string findShortestWord(dfa const &d)
Searches the shortest UTF-8-encoded word accepted by a given DFA.
Represents nondeterministic finite automata with ε-moves.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
impl(vector< char32_t > &&u32alphabet, vector< vector< valarray< bool >>> &&transitions, vector< string > &&labels, valarray< bool > &&acceptingStates)
Constructs private implementation object with provided members and empty ε-closure cache...
Signals that an input symbol was used that the FA doesn't recognize.
static fabuilder subtract(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the set difference of the languages of two NFAs...
Represents deterministic finite automata.
fabuilder & intersect(nfa const &other)
Makes the prospective FA accept only words accepted also by another FA.
bool operator==(nfa const &n) const
Checks whether this NFA describes the same regular language as another object.
std::vector< char32_t > const & getAlphabet_() const
Fetches this NFA's set of processable symbols.
std::string to_string(std::valarray< bool > const &qs) const
Puts a name to a set of indices.
nfa & operator=(nfa const &n)
Copy-assigns this NFA by copying another one's private implementation object.
std::valarray< bool > encodeSet(state_set const &qs) const
Translates a set of states from set to bool representation.
bool operator!=(nfa const &n) const
Checks whether this NFA describes a different regular language than another object.
state_set getStatesSet() const
Fetches this NFA's set of states as a set of (references to) strings.
std::valarray< bool > deltaHat(size_t qi, std::string const &word) const
Computes this DFA's transition function recursively for a UTF-8-encoded string of symbols...
Private implementation details of NFAs.
size_t index_of(C const &container, T const &element)
Basically Java's List interface's indexOf, but as a non-member function and returning the container's...
Contains the reg::nfa class definition.
fabuilder & minimize(bool force=false)
Convenience method for chaining purge and merge to achieve proper DFA minimization.
Contains the reg::expression class defintion.
static fabuilder intersect(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the intersection of the languages of two NFAs.
dfa buildDfa(bool force=false)
Builds a DFA as defined by previous operations, including completion.
state_set decodeSet(std::valarray< bool > const &qs) const
Translates a set of states from bool to set representation.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
vector< string > labels
Stores the names of states.
std::shared_ptr< dfa const > equivalent
Stores a minimal DFA accepting the same language as this object's NFA.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
fabuilder & addSymbol_(char32_t u32symbol)
Adds a symbol to the prospective FA's alphabet.
fabuilder & unite(nfa const &other)
Makes the prospective FA also accept every word of another FA's language.
fabuilder & normalizeStateNames(std::string const &prefix)
Reduces the prospective FA's state names to consecutive numbers, prefixed with a given string...
std::valarray< bool > const & delta_(size_t qi, char32_t u32symbol) const
Computes this NFA's transition function for a state index and a UTF-32-encoded symbol.
Signals that a state was mentioned that isn't part of the FA.
Constructs NFAs step by step.
std::valarray< bool > getStatesBools() const
Fetches this NFA's set of states encoded as an array of bools.
Where this library lives.
nfa()
Constructs an NFA accepting the empty language ∅.
std::string const & getInitialState() const
Names this NFA's initial state.
vector< string > alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
static fabuilder unite(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the union of the languages of two NFAs.
std::valarray< bool > const & delta(size_t qi, size_t si) const
Computes this NFA's transition function for a state index and a symbol index.
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
std::vector< std::string > const & getStates() const
Fetches this NFA's set of states in order of internal representation.
std::valarray< bool > deltaHat_(size_t qi, std::u32string const &u32word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
static fabuilder complement(nfa const &n)
Creates a builder for an NFA accepting the complement of the language of an NFA.
nfa buildNfa()
Builds an NFA as defined by previous operations.
Contains the reg::dfa class definition.
vector< vector< valarray< bool > > > transitions
Stores the transition function as a table viz state index × symbol index → list of bools with true a...
Contains the reg::fabuilder class definition.
std::vector< std::string > const & getAlphabet() const
Fetches this NFA's set of processable symbols as UTF-8-encoded strings.
fabuilder & complement(bool force=false)
Inverts the prospective DFA's language with respect to all possible strings over its alphabet...