| 1 |
#ifndef CRASH_LEXER |
|---|
| 2 |
#define CRASH_LEXER |
|---|
| 3 |
|
|---|
| 4 |
#include <stdexcept> |
|---|
| 5 |
#include <string> |
|---|
| 6 |
#include <map> |
|---|
| 7 |
#include <iterator> |
|---|
| 8 |
#include "Strings.h" |
|---|
| 9 |
#include "Regex.h" |
|---|
| 10 |
|
|---|
| 11 |
using namespace std; |
|---|
| 12 |
|
|---|
| 13 |
/** |
|---|
| 14 |
Lexer is a lexical analyser. |
|---|
| 15 |
|
|---|
| 16 |
02/01/01 Fixed the bug where ignored tokens caused it to die horribly. |
|---|
| 17 |
20/09/00 Restarted after I couldn't find an annoying bug. The interface |
|---|
| 18 |
needed to be cleaned up anyway. |
|---|
| 19 |
24/08/00 Fixed a bug where the list of tokens wasn't being cleared |
|---|
| 20 |
in between calls to the scanner. |
|---|
| 21 |
13/08/00 Created. |
|---|
| 22 |
*/ |
|---|
| 23 |
|
|---|
| 24 |
class Lexer { |
|---|
| 25 |
public : |
|---|
| 26 |
class exception : public runtime_error { |
|---|
| 27 |
public : |
|---|
| 28 |
exception(string const &str, unsigned line) : runtime_error(str), _line(line) {} |
|---|
| 29 |
unsigned line() { return _line; } |
|---|
| 30 |
private : |
|---|
| 31 |
unsigned _line; |
|---|
| 32 |
}; |
|---|
| 33 |
|
|---|
| 34 |
class iterator { |
|---|
| 35 |
public : |
|---|
| 36 |
iterator() : tokeniser(0), in(0), _line(1) {} |
|---|
| 37 |
|
|---|
| 38 |
operator string const & () const { return _value; } |
|---|
| 39 |
iterator operator ++ (int) { |
|---|
| 40 |
iterator i = *this; |
|---|
| 41 |
|
|---|
| 42 |
get(); |
|---|
| 43 |
return i; |
|---|
| 44 |
} |
|---|
| 45 |
|
|---|
| 46 |
iterator &operator ++ () { |
|---|
| 47 |
get(); |
|---|
| 48 |
return *this; |
|---|
| 49 |
} |
|---|
| 50 |
|
|---|
| 51 |
int operator != (iterator const &other) const { |
|---|
| 52 |
return in != other.in || tokeniser != other.tokeniser; |
|---|
| 53 |
} |
|---|
| 54 |
int operator == (iterator const &other) const { |
|---|
| 55 |
return in == other.in && tokeniser == other.tokeniser; |
|---|
| 56 |
} |
|---|
| 57 |
|
|---|
| 58 |
int operator [] (unsigned index) const { return _value[index]; } |
|---|
| 59 |
unsigned type() const { return _type; } |
|---|
| 60 |
unsigned line() const { return _line; } |
|---|
| 61 |
unsigned size() const { return _value.size(); } |
|---|
| 62 |
string const &value() const { return _value; } |
|---|
| 63 |
char const *source() const { return in; } |
|---|
| 64 |
void skip(unsigned skip) { |
|---|
| 65 |
for (unsigned i = 0; i < skip; i++) |
|---|
| 66 |
if (*in) |
|---|
| 67 |
{ |
|---|
| 68 |
if (*in == '\n') _line++; |
|---|
| 69 |
in++; |
|---|
| 70 |
} else |
|---|
| 71 |
break; |
|---|
| 72 |
} |
|---|
| 73 |
private : |
|---|
| 74 |
void get() { |
|---|
| 75 |
assert(in && tokeniser); |
|---|
| 76 |
if (!tokeniser->get(*this, in, _line)) |
|---|
| 77 |
in = 0; |
|---|
| 78 |
} |
|---|
| 79 |
iterator(Lexer *t, char const *in) : tokeniser(t), in(in), _line(1) { |
|---|
| 80 |
if (in) get(); |
|---|
| 81 |
} |
|---|
| 82 |
friend class Lexer; |
|---|
| 83 |
|
|---|
| 84 |
Lexer *tokeniser; |
|---|
| 85 |
string _value; |
|---|
| 86 |
char const *in; |
|---|
| 87 |
unsigned _type; |
|---|
| 88 |
unsigned _line; |
|---|
| 89 |
struct { |
|---|
| 90 |
int start, end; |
|---|
| 91 |
} match[50]; |
|---|
| 92 |
int matches; |
|---|
| 93 |
}; |
|---|
| 94 |
friend class iterator; |
|---|
| 95 |
|
|---|
| 96 |
Lexer(); |
|---|
| 97 |
virtual ~Lexer(); |
|---|
| 98 |
|
|---|
| 99 |
iterator begin(char const *in) { return iterator(this, in); } |
|---|
| 100 |
iterator end() { return iterator(this, 0); } |
|---|
| 101 |
|
|---|
| 102 |
Lexer &ignorePattern(unsigned index, char const *pattern) { return addPattern(index, pattern, true); } |
|---|
| 103 |
Lexer &addPattern(unsigned index, char const *pattern, bool ignore = false); |
|---|
| 104 |
|
|---|
| 105 |
void ignore(unsigned index, bool state = true); |
|---|
| 106 |
void enable(unsigned index, bool state = true); |
|---|
| 107 |
protected : |
|---|
| 108 |
enum { Character = 1000000 }; |
|---|
| 109 |
|
|---|
| 110 |
struct Pattern { |
|---|
| 111 |
Regex rx; |
|---|
| 112 |
bool ignore, enabled; |
|---|
| 113 |
}; |
|---|
| 114 |
map<int, Pattern> pattern; |
|---|
| 115 |
bool initialised; |
|---|
| 116 |
|
|---|
| 117 |
virtual bool get(iterator &it, char const *&in, unsigned &line); |
|---|
| 118 |
}; |
|---|
| 119 |
|
|---|
| 120 |
inline ostream &operator << (ostream &out, Lexer::iterator const &it) { |
|---|
| 121 |
out << it.value(); |
|---|
| 122 |
return out; |
|---|
| 123 |
} |
|---|
| 124 |
|
|---|
| 125 |
#endif |
|---|