memgraph/cypher/tokenizer/lexer.hpp
2016-02-06 13:45:45 +01:00

92 lines
2.3 KiB
C++

#pragma once
#include <cstdint>
#include <memory>
// unfortunatelly, lexertl uses some stuff deprecated in c++11 so we get some
// warnings during compile time, mainly for the auto_ptr
// auto_ptr<lexertl::detail::basic_re_token<char, char> > is deprecated
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#include "cypher/lexertl/lexertl/generator.hpp"
#include "cypher/lexertl/lexertl/lookup.hpp"
#pragma GCC diagnostic pop
#include "cypher/errors.hpp"
#include "cypher/token.hpp"
class Lexer
{
public:
// public pointer declarations
using uptr = std::unique_ptr<Lexer>;
using sptr = std::shared_ptr<Lexer>;
// constructors
// default constructor creates unique pointers to object
// members
Lexer() :
rules(std::make_unique<lexertl::rules>()),
sm(std::make_unique<lexertl::state_machine>())
{
}
// copy constructor is deleted
Lexer(Lexer& other) = delete;
// move constructor has default implementation
Lexer(Lexer&& other) :
rules(std::move(other.rules)),
sm(std::move(other.sm))
{
}
// TODO take care of concurrnecy and moving the lexer object when
// some Tokenizer already uses the it (right now I'm not
// sure what is going to happen)
// check this ASAP
class Tokenizer
{
public:
Tokenizer(const Lexer& lexer, const std::string& str)
: lexer(lexer), results(str.begin(), str.end()) {}
Token lookup()
{
lexertl::lookup(*lexer.sm, results);
auto token = Token {results.id, results.str()};
if(results.id == static_cast<decltype(results.id)>(-1))
throw LexicalError(token);
return token;
}
private:
const Lexer& lexer;
lexertl::smatch results;
};
Tokenizer tokenize(const std::string& str)
{
return Tokenizer(*this, str);
}
void build()
{
lexertl::generator::build(*rules, *sm);
}
void rule(const std::string& regex, uint64_t id)
{
rules->push(regex, id);
}
protected:
using uptr_lexertl_rules = std::unique_ptr<lexertl::rules>;
using uptr_lexertl_sm = std::unique_ptr<lexertl::state_machine>;
uptr_lexertl_rules rules;
uptr_lexertl_sm sm;
};