2015-08-30 07:12:46 +08:00
|
|
|
#ifndef MEMGRAPH_CYPHER_LEXER_HPP
|
|
|
|
#define MEMGRAPH_CYPHER_LEXER_HPP
|
|
|
|
|
|
|
|
#include <cstdint>
|
|
|
|
|
2015-09-13 17:34:17 +08:00
|
|
|
// unfortunatelly, lexertl uses some stuff deprecated in c++11 so we get some
|
|
|
|
// warnings during compile time, mainly for the auto_ptr
|
|
|
|
// auto_ptr<lexertl::detail::basic_re_token<char, char> > is deprecated
|
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
2015-09-18 03:37:53 +08:00
|
|
|
#include "lexertl/lexertl/generator.hpp"
|
|
|
|
#include "lexertl/lexertl/lookup.hpp"
|
2015-09-13 17:34:17 +08:00
|
|
|
#pragma GCC diagnostic pop
|
2015-08-30 07:12:46 +08:00
|
|
|
|
2015-09-13 17:34:17 +08:00
|
|
|
#include "errors.hpp"
|
2015-08-30 07:12:46 +08:00
|
|
|
#include "token.hpp"
|
|
|
|
|
|
|
|
class Lexer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
class Tokenizer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Tokenizer(const Lexer& lexer, const std::string& str)
|
|
|
|
: lexer(lexer), results(str.begin(), str.end()) {}
|
|
|
|
|
2015-10-19 01:44:00 +08:00
|
|
|
Token lookup()
|
2015-08-30 07:12:46 +08:00
|
|
|
{
|
|
|
|
lexertl::lookup(lexer.sm, results);
|
2015-10-19 01:44:00 +08:00
|
|
|
auto token = Token {results.id, results.str()};
|
2015-08-30 07:12:46 +08:00
|
|
|
|
|
|
|
if(results.id == static_cast<decltype(results.id)>(-1))
|
2015-10-19 01:44:00 +08:00
|
|
|
throw LexicalError(token);
|
2015-08-30 07:12:46 +08:00
|
|
|
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const Lexer& lexer;
|
|
|
|
lexertl::smatch results;
|
|
|
|
};
|
|
|
|
|
|
|
|
Tokenizer tokenize(const std::string& str)
|
|
|
|
{
|
|
|
|
return Tokenizer(*this, str);
|
|
|
|
}
|
|
|
|
|
|
|
|
void build()
|
|
|
|
{
|
|
|
|
lexertl::generator::build(rules, sm);
|
|
|
|
}
|
|
|
|
|
|
|
|
void rule(const std::string& regex, uint64_t id)
|
|
|
|
{
|
|
|
|
rules.push(regex, id);
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
lexertl::rules rules;
|
|
|
|
lexertl::state_machine sm;
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|