reglibcpp  1.0.0
(Naïve) C++ implementation of models for regular languages
expression.h
Go to the documentation of this file.
1 #ifndef REG_EXP_H
2 #define REG_EXP_H
3 
5 #include <vector>
6 
7 #include <unordered_map>
8 
9 #include <string>
10 
11 #include <memory>
12 
13 #include <locale>
14 
15 #include <codecvt>
16 
17 namespace reg {
18 class dfa;
20 
31 class expression {
32 public:
34 
43  typedef std::shared_ptr<expression const> exptr;
44  static exptr const& spawnEmptySet();
45  static exptr const& spawnEmptyString();
46  static exptr const& spawnSymbol(char32_t symbol);
47  static exptr const& spawnSymbol(std::string utf8Symbol);
48  static exptr spawnKleene(exptr const& b, bool optimized = true, bool aggressive = false);
49  static exptr spawnConcatenation(exptr const& l, exptr const& r, bool optimized = true, bool aggressive = false);
50  static exptr spawnAlternation(exptr const& l, exptr const& r, bool optimized = true, bool aggressive = false);
52  struct literals {
53  char32_t const L,
54  R,
55  S,
56  P,
57  EPSILON,
58  EMPTY;
59 
69  literals(char32_t plus = U'+', char32_t empty = U'∅', char32_t epsilon = U'ε',
70  char32_t star = U'*', char32_t rPar = U')', char32_t lPar = U'(')
71  : L(lPar), R(rPar), S(star), P(plus), EPSILON(epsilon), EMPTY(empty){}
72  };
73  static exptr spawnFromString(std::u32string const& re, literals lits = literals(),
74  bool optimized = false, bool aggressive = false);
75  static exptr spawnFromString(std::string const& utf8Re, literals lits = literals(),
76  bool optimized = false, bool aggressive = false);
84  enum struct operation { empty, symbol, kleene, concatenation, alternation };
85  size_t size() const;
86  operation getOperation() const;
87  bool operator==(expression const& r) const;
88  bool operator!=(expression const& r) const;
89  char32_t extractSymbol() const;
90  std::string extractUtf8Symbol() const;
91  std::u32string to_u32string() const;
92  std::string to_string() const;
93  std::vector<exptr>::const_iterator begin() const;
94  std::vector<exptr>::const_iterator end() const;
95  static std::unique_ptr<
96  std::wstring_convert<std::codecvt_utf8<char32_t>,char32_t>
97  > const converter;
98 private:
99  expression();
100  expression(char32_t symbol);
101  expression(exptr const& l, exptr const& r, operation op);
102  expression(exptr const& b);
103  expression(expression& e) = delete;
104  expression(expression&& e) = delete;
105  expression& operator=(expression& e) = delete;
106  expression& operator=(expression& e) const = delete;
107  expression& operator=(expression&& e) = delete;
108  expression& operator=(expression&& e) const = delete;
109  static exptr empty;
110  static std::unordered_map<char32_t, exptr> symbols;
111  std::vector<exptr> const subExpressions;
112  operation const op;
113  std::unique_ptr<dfa const> mutable acceptingDfa;
114  struct parser;
115 };
116 }
117 #endif
static std::unique_ptr< std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > > const converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: expression.h:97
char32_t const S
The Kleene star.
Definition: expression.h:53
bool operator!=(expression const &r) const
Checks whether this RE is semantically different from another one.
Definition: expression.cpp:219
Token literals as used in Introduction to Automata Theory, Languages, and Computation by Hopcroft...
Definition: expression.h:52
char32_t const EMPTY
Neutral element of alternation and annihilating element of concatenation, a.k.a. empty set...
Definition: expression.h:53
std::vector< exptr >::const_iterator begin() const
Returns an iterator pointing to this RE&#39;s first subexpression.
Definition: expression.cpp:302
static exptr const & spawnEmptyString()
Gives an RE representing the empty string ε.
Definition: expression.cpp:50
static exptr spawnFromString(std::u32string const &re, literals lits=literals(), bool optimized=false, bool aggressive=false)
Gives an RE encoded in a given string.
Definition: expression.cpp:613
static exptr spawnAlternation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the alternation of two given REs.
Definition: expression.cpp:117
Represents formal regular expressions.
Definition: expression.h:31
std::u32string to_u32string() const
Describes this RE in UTF-32-encoded human-readable form.
Definition: expression.cpp:259
char32_t extractSymbol() const
Reports this symbol expression&#39;s UTF-32-encoded symbol.
Definition: expression.cpp:229
static exptr const & spawnSymbol(char32_t symbol)
Gives an RE representing the given UTF-32-encoded symbol.
Definition: expression.cpp:61
operation
The different purposes an RE may fulfill.
Definition: expression.h:84
Parses regular expressions.
Definition: expression.cpp:338
operation getOperation() const
Reports this RE&#39;s function.
Definition: expression.cpp:195
std::vector< exptr >::const_iterator end() const
Returns an iterator pointing behind this RE&#39;s last subexpression.
Definition: expression.cpp:307
std::string to_string() const
Describes this RE in UTF-32-encoded human-readable form.
Definition: expression.cpp:297
bool operator==(expression const &r) const
Checks whether this RE is semantically equivalent to another one.
Definition: expression.cpp:204
char32_t const L
The left parenthesis.
Definition: expression.h:53
char32_t const P
The alternation symbol.
Definition: expression.h:53
size_t size() const
Reports the size of this RE&#39;s tree representation.
Definition: expression.cpp:181
Definition: dfa.cpp:32
char32_t const EPSILON
Neutral element of concatenation, a.k.a. empty string.
Definition: expression.h:53
std::shared_ptr< expression const > exptr
This is the type used to handle regular expressions.
Definition: expression.h:43
static exptr const & spawnEmptySet()
Gives an RE representing the empty set ∅.
Definition: expression.cpp:40
char32_t const R
The right parenthesis.
Definition: expression.h:53
std::string extractUtf8Symbol() const
Reports this symbol expression&#39;s UTF-8-encoded symbol.
Definition: expression.cpp:249
literals(char32_t plus=U'+', char32_t empty=U '∅', char32_t epsilon=U 'ε', char32_t star=U' *', char32_t rPar=U')', char32_t lPar=U'(')
Definition: expression.h:69
static exptr spawnKleene(exptr const &b, bool optimized=true, bool aggressive=false)
Gives an RE representing the Kleene closure of a given RE.
Definition: expression.cpp:152
static exptr spawnConcatenation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the concatenation of two given REs.
Definition: expression.cpp:81