| 1 |
#include "Lexer.h" |
|---|
| 2 |
|
|---|
| 3 |
Lexer::Lexer() : initialised(false) |
|---|
| 4 |
{ |
|---|
| 5 |
} |
|---|
| 6 |
|
|---|
| 7 |
Lexer::~Lexer() |
|---|
| 8 |
{ |
|---|
| 9 |
} |
|---|
| 10 |
|
|---|
| 11 |
Lexer &Lexer::addPattern(unsigned index, char const *in, bool ignore) |
|---|
| 12 |
{ |
|---|
| 13 |
// This was to allow statically allocated Lexer's. If regexp's were |
|---|
| 14 |
// initialised globally, they may have been constructed before |
|---|
| 15 |
// the Regex::cache was initialised...this caused segfaults. Bad. |
|---|
| 16 |
if (!initialised) |
|---|
| 17 |
{ |
|---|
| 18 |
Pattern &p = pattern[Character]; |
|---|
| 19 |
|
|---|
| 20 |
try { |
|---|
| 21 |
p.rx = "."; |
|---|
| 22 |
p.ignore = false; |
|---|
| 23 |
p.enabled = true; |
|---|
| 24 |
} catch (Regex::exception &e) { |
|---|
| 25 |
throw runtime_error("Lexer::Lexer: index " + str::stringify(index) + ", " + string(e.what())); |
|---|
| 26 |
} |
|---|
| 27 |
initialised = true; |
|---|
| 28 |
} |
|---|
| 29 |
|
|---|
| 30 |
if (index < 256) throw runtime_error("pattern indices under 256 are reserved, relevant pattern is '" + string(in) + "'"); |
|---|
| 31 |
|
|---|
| 32 |
Pattern &p = pattern[index]; |
|---|
| 33 |
|
|---|
| 34 |
try { |
|---|
| 35 |
p.rx = in; |
|---|
| 36 |
p.ignore = ignore; |
|---|
| 37 |
p.enabled = true; |
|---|
| 38 |
} catch (Regex::exception &e) { |
|---|
| 39 |
throw runtime_error("Lexer::addPattern: index " + str::stringify(index) + ", " + string(e.what())); |
|---|
| 40 |
} |
|---|
| 41 |
|
|---|
| 42 |
return *this; |
|---|
| 43 |
} |
|---|
| 44 |
|
|---|
| 45 |
bool Lexer::get(iterator &it, char const *&in, unsigned &line) |
|---|
| 46 |
{ |
|---|
| 47 |
if (*in == 0) |
|---|
| 48 |
return false; |
|---|
| 49 |
|
|---|
| 50 |
bool ignore; |
|---|
| 51 |
|
|---|
| 52 |
do { |
|---|
| 53 |
ignore = false; |
|---|
| 54 |
for (map<int, Pattern>::iterator i = pattern.begin(); i != pattern.end(); ++i) |
|---|
| 55 |
if ((*i).second.enabled) |
|---|
| 56 |
{ |
|---|
| 57 |
Pattern &p = (*i).second; |
|---|
| 58 |
int length; |
|---|
| 59 |
|
|---|
| 60 |
if ((length = p.rx.matchStart(in)) != -1) |
|---|
| 61 |
{ |
|---|
| 62 |
for (int j = 0; j < length; ++j) |
|---|
| 63 |
if (in[j] == '\n') line++; |
|---|
| 64 |
if (p.ignore) |
|---|
| 65 |
{ |
|---|
| 66 |
in += length; |
|---|
| 67 |
ignore = true; |
|---|
| 68 |
if (*in == 0) return false; |
|---|
| 69 |
break; |
|---|
| 70 |
} else |
|---|
| 71 |
{ |
|---|
| 72 |
it._value = string(in, length); |
|---|
| 73 |
it._line = line; |
|---|
| 74 |
// Handle individual characters |
|---|
| 75 |
if ((*i).first == Character) |
|---|
| 76 |
it._type = *in; |
|---|
| 77 |
else |
|---|
| 78 |
it._type = (*i).first; |
|---|
| 79 |
it.in = in; |
|---|
| 80 |
in += length; |
|---|
| 81 |
return true; |
|---|
| 82 |
} |
|---|
| 83 |
} |
|---|
| 84 |
} |
|---|
| 85 |
} while (ignore); |
|---|
| 86 |
return false; |
|---|
| 87 |
} |
|---|
| 88 |
|
|---|
| 89 |
void Lexer::ignore(unsigned index, bool state) |
|---|
| 90 |
{ |
|---|
| 91 |
if (pattern.find(index) == pattern.end()) |
|---|
| 92 |
throw exception("tried to ignore/unignore an unknown token", 0); |
|---|
| 93 |
pattern[index].ignore = state; |
|---|
| 94 |
} |
|---|
| 95 |
|
|---|
| 96 |
void Lexer::enable(unsigned index, bool state) |
|---|
| 97 |
{ |
|---|
| 98 |
if (pattern.find(index) == pattern.end()) |
|---|
| 99 |
throw exception("tried to enable/disable an unknown token", 0); |
|---|
| 100 |
pattern[index].enabled = state; |
|---|
| 101 |
} |
|---|