Start implementing compiler structures
Summary: Merge remote-tracking branch 'origin/dev' into mg_compiler_structs Merge remote-tracking branch 'origin/dev' into mg_compiler_structs Implement compiler structures and tests Reviewers: florijan, buda Reviewed By: buda Subscribers: pullbot, florijan, buda Differential Revision: https://phabricator.memgraph.io/D61
This commit is contained in:
parent
c8ef12910d
commit
ca90f813ad
@ -21,8 +21,9 @@ BASE_FLAGS = [
|
||||
'-I./include',
|
||||
'-I./libs/fmt',
|
||||
'-I./libs/yaml-cpp',
|
||||
'-I./build/googletest-src/googletest/include',
|
||||
'-I./build/googlebenchmark-src/include',
|
||||
'-I./libs/googletest/googletest/include',
|
||||
'-I./libs/googletest/googlemock/include',
|
||||
'-I./libs/benchmark/include',
|
||||
'-I./libs/antlr4/runtime/Cpp/runtime/src'
|
||||
]
|
||||
|
||||
|
@ -332,6 +332,7 @@ set(memgraph_src_files
|
||||
${src_dir}/logging/log.cpp
|
||||
${src_dir}/database/graph_db.cpp
|
||||
${src_dir}/database/graph_db_accessor.cpp
|
||||
${src_dir}/query/backend/cpp/cypher_main_visitor.cpp
|
||||
)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
52
src/query/backend/cpp/compiler_structures.hpp
Normal file
52
src/query/backend/cpp/compiler_structures.hpp
Normal file
@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <unordered_map>
|
||||
#include "query/frontend/opencypher/generated/CypherParser.h"
|
||||
#include "utils/exceptions/basic_exception.hpp"
|
||||
|
||||
// TODO: Figure out what information to put in exception.
|
||||
// Error reporting is tricky since we get stripped query and position of error
|
||||
// in original query is not same as position of error in stripped query. Most
|
||||
// correct approach would be to do semantic analysis with original query even
|
||||
// for already hashed queries, but that has obvious performance issues. Other
|
||||
// approach would be to report some of the semantic errors in runtime of the
|
||||
// query and only report line numbers of semantic errors (not position in the
|
||||
// line) if multiple line strings are not allowed by grammar. We could also
|
||||
// print whole line that contains error instead of specifying line number.
|
||||
class SemanticException : BasicException {
|
||||
public:
|
||||
SemanticException() : BasicException("") {}
|
||||
};
|
||||
|
||||
// enum VariableType { TYPED_VALUE, LIST, MAP, NODE, RELATIONSHIP, PATH };
|
||||
|
||||
struct Node {
|
||||
std::string output_identifier;
|
||||
std::vector<std::string> labels;
|
||||
std::unordered_map<std::string,
|
||||
antlropencypher::CypherParser::ExpressionContext*>
|
||||
properties;
|
||||
};
|
||||
|
||||
struct Relationship {
|
||||
enum Direction { LEFT, RIGHT, BOTH };
|
||||
std::string output_identifier;
|
||||
Direction direction = Direction::BOTH;
|
||||
std::vector<std::string> types;
|
||||
std::unordered_map<std::string,
|
||||
antlropencypher::CypherParser::ExpressionContext*>
|
||||
properties;
|
||||
bool has_range = false;
|
||||
// If has_range is false, lower and upper bound values are not important.
|
||||
// lower_bound can be larger than upper_bound and in that case there is no
|
||||
// results.
|
||||
int64_t lower_bound = 1LL;
|
||||
int64_t upper_bound = LLONG_MAX;
|
||||
};
|
||||
|
||||
struct PatternPart {
|
||||
std::string output_identifier;
|
||||
std::vector<Node> nodes;
|
||||
std::vector<Relationship> relationships;
|
||||
};
|
227
src/query/backend/cpp/cypher_main_visitor.cpp
Normal file
227
src/query/backend/cpp/cypher_main_visitor.cpp
Normal file
@ -0,0 +1,227 @@
|
||||
#include "query/backend/cpp/cypher_main_visitor.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "query/backend/cpp/compiler_structures.hpp"
|
||||
|
||||
namespace {
|
||||
// List of unnamed tokens visitor needs to use. This should be reviewed on every
|
||||
// grammar change since even changes in ordering of rules will cause antlr to
|
||||
// generate different constants for unnamed tokens.
|
||||
const auto kDotsTokenId = CypherParser::T__12; // ..
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitNodePattern(
|
||||
CypherParser::NodePatternContext *ctx) {
|
||||
Node node;
|
||||
node.output_identifier = new_identifier();
|
||||
if (ctx->variable()) {
|
||||
identifiers_map_[ctx->variable()->accept(this).as<std::string>()] =
|
||||
node.output_identifier;
|
||||
}
|
||||
if (ctx->nodeLabels()) {
|
||||
node.labels =
|
||||
ctx->nodeLabels()->accept(this).as<std::vector<std::string>>();
|
||||
}
|
||||
if (ctx->properties()) {
|
||||
node.properties =
|
||||
ctx->properties()
|
||||
->accept(this)
|
||||
.as<std::unordered_map<std::string,
|
||||
CypherParser::ExpressionContext *>>();
|
||||
}
|
||||
symbol_table_[node.output_identifier] = node;
|
||||
return node;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitNodeLabels(
|
||||
CypherParser::NodeLabelsContext *ctx) {
|
||||
std::vector<std::string> labels;
|
||||
for (auto *node_label : ctx->nodeLabel()) {
|
||||
labels.push_back(node_label->accept(this).as<std::string>());
|
||||
}
|
||||
return labels;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitProperties(
|
||||
CypherParser::PropertiesContext *ctx) {
|
||||
if (!ctx->mapLiteral()) {
|
||||
// If child is not mapLiteral that means child is params. At the moment
|
||||
// memgraph doesn't support params.
|
||||
throw SemanticException();
|
||||
}
|
||||
return ctx->mapLiteral()->accept(this);
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitMapLiteral(
|
||||
CypherParser::MapLiteralContext *ctx) {
|
||||
std::unordered_map<std::string, CypherParser::ExpressionContext *> map;
|
||||
for (int i = 0; i < (int)ctx->propertyKeyName().size(); ++i) {
|
||||
map[ctx->propertyKeyName()[i]->accept(this).as<std::string>()] =
|
||||
ctx->expression()[i];
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitSymbolicName(
|
||||
CypherParser::SymbolicNameContext *ctx) {
|
||||
if (!ctx->UnescapedSymbolicName()) {
|
||||
// SymbolicName can only be UnescapedSymbolicName. At this moment we want to
|
||||
// avoid openCypher crazyness that allows variables to be named as keywords
|
||||
// and escaped sequences. To allow all possible variable names allowed by
|
||||
// openCypher grammar we need to figure out escaping rules so we can
|
||||
// reference same variable as unescaped and escaped string.
|
||||
throw SemanticException();
|
||||
}
|
||||
return ctx->getText();
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitPattern(
|
||||
CypherParser::PatternContext *ctx) {
|
||||
std::vector<PatternPart> pattern;
|
||||
for (auto *pattern_part : ctx->patternPart()) {
|
||||
pattern.push_back(pattern_part->accept(this).as<PatternPart>());
|
||||
}
|
||||
return pattern;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitPatternPart(
|
||||
CypherParser::PatternPartContext *ctx) {
|
||||
PatternPart pattern_part =
|
||||
ctx->anonymousPatternPart()->accept(this).as<PatternPart>();
|
||||
if (ctx->variable()) {
|
||||
identifiers_map_[ctx->variable()->accept(this).as<std::string>()] =
|
||||
pattern_part.output_identifier;
|
||||
}
|
||||
symbol_table_[pattern_part.output_identifier] = pattern_part;
|
||||
return pattern_part;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitPatternElement(
|
||||
CypherParser::PatternElementContext *ctx) {
|
||||
if (ctx->patternElement()) {
|
||||
return ctx->patternElement()->accept(this);
|
||||
}
|
||||
PatternPart pattern_part;
|
||||
pattern_part.output_identifier = new_identifier();
|
||||
pattern_part.nodes.push_back(ctx->nodePattern()->accept(this).as<Node>());
|
||||
for (auto *pattern_element_chain : ctx->patternElementChain()) {
|
||||
auto element =
|
||||
pattern_element_chain->accept(this).as<std::pair<Relationship, Node>>();
|
||||
pattern_part.relationships.push_back(element.first);
|
||||
pattern_part.nodes.push_back(element.second);
|
||||
}
|
||||
return pattern_part;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitPatternElementChain(
|
||||
CypherParser::PatternElementChainContext *ctx) {
|
||||
return std::pair<Relationship, Node>(
|
||||
ctx->relationshipPattern()->accept(this).as<Relationship>(),
|
||||
ctx->nodePattern()->accept(this).as<Node>());
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitRelationshipPattern(
|
||||
CypherParser::RelationshipPatternContext *ctx) {
|
||||
Relationship relationship;
|
||||
relationship.output_identifier = new_identifier();
|
||||
if (ctx->relationshipDetail()) {
|
||||
VisitRelationshipDetail(ctx->relationshipDetail(), relationship);
|
||||
}
|
||||
if (ctx->leftArrowHead() && !ctx->rightArrowHead()) {
|
||||
relationship.direction = Relationship::Direction::LEFT;
|
||||
} else if (!ctx->leftArrowHead() && ctx->rightArrowHead()) {
|
||||
relationship.direction = Relationship::Direction::RIGHT;
|
||||
} else {
|
||||
// <-[]-> and -[]- is the same thing as far as we understand openCypher
|
||||
// grammar.
|
||||
relationship.direction = Relationship::Direction::BOTH;
|
||||
}
|
||||
symbol_table_[relationship.output_identifier] = relationship;
|
||||
return relationship;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitRelationshipDetail(
|
||||
CypherParser::RelationshipDetailContext *) {
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CypherMainVisitor::VisitRelationshipDetail(
|
||||
CypherParser::RelationshipDetailContext *ctx, Relationship &relationship) {
|
||||
if (ctx->variable()) {
|
||||
identifiers_map_[ctx->variable()->accept(this).as<std::string>()] =
|
||||
relationship.output_identifier;
|
||||
}
|
||||
if (ctx->relationshipTypes()) {
|
||||
relationship.types =
|
||||
ctx->relationshipTypes()->accept(this).as<std::vector<std::string>>();
|
||||
}
|
||||
if (ctx->properties()) {
|
||||
relationship.properties =
|
||||
ctx->properties()
|
||||
->accept(this)
|
||||
.as<std::unordered_map<std::string,
|
||||
CypherParser::ExpressionContext *>>();
|
||||
}
|
||||
if (ctx->rangeLiteral()) {
|
||||
relationship.has_range = true;
|
||||
auto range =
|
||||
ctx->rangeLiteral()->accept(this).as<std::pair<int64_t, int64_t>>();
|
||||
relationship.lower_bound = range.first;
|
||||
relationship.upper_bound = range.second;
|
||||
}
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitRelationshipTypes(
|
||||
CypherParser::RelationshipTypesContext *ctx) {
|
||||
std::vector<std::string> types;
|
||||
for (auto *label : ctx->relTypeName()) {
|
||||
types.push_back(label->accept(this).as<std::string>());
|
||||
}
|
||||
return types;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitRangeLiteral(
|
||||
CypherParser::RangeLiteralContext *ctx) {
|
||||
if (ctx->integerLiteral().size() == 0U) {
|
||||
// -[*]-
|
||||
return std::pair<int64_t, int64_t>(1LL, LLONG_MAX);
|
||||
} else if (ctx->integerLiteral().size() == 1U) {
|
||||
auto dots_tokens = ctx->getTokens(kDotsTokenId);
|
||||
int64_t bound = ctx->integerLiteral()[0]->accept(this).as<int64_t>();
|
||||
if (!dots_tokens.size()) {
|
||||
// -[*2]-
|
||||
return std::pair<int64_t, int64_t>(bound, bound);
|
||||
}
|
||||
if (dots_tokens[0]->getSourceInterval().startsAfter(
|
||||
ctx->integerLiteral()[0]->getSourceInterval())) {
|
||||
// -[*2..]-
|
||||
return std::pair<int64_t, int64_t>(bound, LLONG_MAX);
|
||||
} else {
|
||||
// -[*..2]-
|
||||
return std::pair<int64_t, int64_t>(1LL, bound);
|
||||
}
|
||||
} else {
|
||||
int64_t lbound = ctx->integerLiteral()[0]->accept(this).as<int64_t>();
|
||||
int64_t rbound = ctx->integerLiteral()[1]->accept(this).as<int64_t>();
|
||||
// -[*2..5]-
|
||||
return std::pair<int64_t, int64_t>(lbound, rbound);
|
||||
}
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitIntegerLiteral(
|
||||
CypherParser::IntegerLiteralContext *ctx) {
|
||||
int64_t t = 0LL;
|
||||
try {
|
||||
t = std::stoll(ctx->getText(), 0, 0);
|
||||
} catch (std::out_of_range) {
|
||||
throw SemanticException();
|
||||
}
|
||||
return t;
|
||||
}
|
140
src/query/backend/cpp/cypher_main_visitor.hpp
Normal file
140
src/query/backend/cpp/cypher_main_visitor.hpp
Normal file
@ -0,0 +1,140 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
|
||||
#include "antlr4-runtime.h"
|
||||
#include "query/backend/cpp/compiler_structures.hpp"
|
||||
|
||||
using antlropencypher::CypherParser;
|
||||
|
||||
class CypherMainVisitor : public antlropencypher::CypherBaseVisitor {
|
||||
/**
|
||||
* Creates Node and stores it in symbol_table_. If variable is defined it is
|
||||
* stored in identifiers_map_.
|
||||
*
|
||||
* @return Node.
|
||||
*/
|
||||
antlrcpp::Any visitNodePattern(
|
||||
CypherParser::NodePatternContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return vector<string> labels.
|
||||
*/
|
||||
antlrcpp::Any visitNodeLabels(CypherParser::NodeLabelsContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return unordered_map<string, ExpressionContext*> properties.
|
||||
*/
|
||||
antlrcpp::Any visitProperties(CypherParser::PropertiesContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return unordered_map<string, ExpressionContext*> map.
|
||||
*/
|
||||
antlrcpp::Any visitMapLiteral(CypherParser::MapLiteralContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return string.
|
||||
*/
|
||||
antlrcpp::Any visitSymbolicName(
|
||||
CypherParser::SymbolicNameContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return vector<PatternPart> pattern.
|
||||
*/
|
||||
antlrcpp::Any visitPattern(CypherParser::PatternContext *ctx) override;
|
||||
|
||||
/**
|
||||
* Stores PatternPart in symbol_table_. If variable is defined it is stored in
|
||||
* identifiers_map_.
|
||||
*
|
||||
* @return PatternPart.
|
||||
*/
|
||||
antlrcpp::Any visitPatternPart(
|
||||
CypherParser::PatternPartContext *ctx) override;
|
||||
|
||||
/**
|
||||
* Creates PatternPart.
|
||||
*
|
||||
* @return PatternPart.
|
||||
*/
|
||||
antlrcpp::Any visitPatternElement(
|
||||
CypherParser::PatternElementContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return pair<Relationship, Node>
|
||||
*/
|
||||
antlrcpp::Any visitPatternElementChain(
|
||||
CypherParser::PatternElementChainContext *ctx) override;
|
||||
|
||||
/**
|
||||
* Creates Relationship and stores it in symbol_table_.
|
||||
*
|
||||
*/
|
||||
antlrcpp::Any visitRelationshipPattern(
|
||||
CypherParser::RelationshipPatternContext *ctx) override;
|
||||
|
||||
/**
|
||||
* This should never be called. Call VisitRelationshipDetail with already
|
||||
* created Relationship instead.
|
||||
*/
|
||||
antlrcpp::Any visitRelationshipDetail(
|
||||
CypherParser::RelationshipDetailContext *ctx) override;
|
||||
|
||||
/**
|
||||
* If variable is defined it is stored in symbol_table_. Relationship is
|
||||
* filled with properties, types and range if provided.
|
||||
* Use this instead of antlr generated visitRelationshipDetail with already
|
||||
* created Relationship. If we should have used visitRelationshipDetail
|
||||
* (relationshipDetail is optional production in relationshipPattern) then we
|
||||
* would have needed to return not completely initialised Relationship.
|
||||
*/
|
||||
void VisitRelationshipDetail(CypherParser::RelationshipDetailContext *ctx,
|
||||
Relationship &relationship);
|
||||
|
||||
/**
|
||||
* @return vector<string>.
|
||||
*/
|
||||
antlrcpp::Any visitRelationshipTypes(
|
||||
CypherParser::RelationshipTypesContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return int64_t.
|
||||
*/
|
||||
antlrcpp::Any visitIntegerLiteral(
|
||||
CypherParser::IntegerLiteralContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return pair<int64_t, int64_t>.
|
||||
*/
|
||||
antlrcpp::Any visitRangeLiteral(
|
||||
CypherParser::RangeLiteralContext *ctx) override;
|
||||
|
||||
public:
|
||||
// TODO: These temporary getters should eventually be replaced with something
|
||||
// else once we figure out where and how those strctures will be used.
|
||||
// Currently there are needed for testing. cypher_main_visitor test should be
|
||||
// refactored once these getters are deleted.
|
||||
const std::unordered_map<std::string, std::string> &identifiers_map() const {
|
||||
return identifiers_map_;
|
||||
}
|
||||
const std::unordered_map<std::string, antlrcpp::Any> &symbol_table() const {
|
||||
return symbol_table_;
|
||||
}
|
||||
|
||||
private:
|
||||
// Return new output code identifier.
|
||||
// TODO: Should we generate identifiers with more readable names: node_1,
|
||||
// relationship_5, ...?
|
||||
std::string new_identifier() const {
|
||||
static int next_identifier = 0;
|
||||
return "id" + std::to_string(next_identifier++);
|
||||
}
|
||||
|
||||
// Mapping of identifiers (nodes, relationships, values, lists ...) from query
|
||||
// code to identifier that is used in generated code;
|
||||
std::unordered_map<std::string, std::string> identifiers_map_;
|
||||
|
||||
// Mapping of output (generated) code identifiers to appropriate parser
|
||||
// structure.
|
||||
std::unordered_map<std::string, antlrcpp::Any> symbol_table_;
|
||||
};
|
@ -1,25 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <experimental/filesystem>
|
||||
#include "antlr4-runtime.h"
|
||||
#include "query/backend/cpp/cypher_main_visitor.hpp"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace backend {
|
||||
|
||||
namespace cpp {
|
||||
|
||||
using namespace antlr4;
|
||||
|
||||
/**
|
||||
* Traverse AST and generate C++
|
||||
* Traverse Antlr tree::ParseTree generated from Cypher grammar and generate
|
||||
* C++.
|
||||
*/
|
||||
class Generator {
|
||||
public:
|
||||
/**
|
||||
* Generates cpp code inside file on the path.
|
||||
*
|
||||
* @tparam Ast type of AST structure
|
||||
*/
|
||||
template <typename Ast>
|
||||
void generate_plan(const Ast &ast, const std::string &query,
|
||||
const uint64_t stripped_hash, const fs::path &path) {
|
||||
Generator(tree::ParseTree *tree, const std::string &query,
|
||||
const uint64_t stripped_hash, const fs::path &path) {
|
||||
CypherMainVisitor visitor;
|
||||
visitor.visit(tree);
|
||||
throw std::runtime_error("TODO: implementation");
|
||||
}
|
||||
};
|
||||
|
@ -11,7 +11,6 @@ namespace fs = std::experimental::filesystem;
|
||||
#include "query/exception/query_engine.hpp"
|
||||
#include "query/frontend/opencypher/parser.hpp"
|
||||
#include "query/plan_compiler.hpp"
|
||||
#include "query/plan_generator.hpp"
|
||||
#include "query/plan_interface.hpp"
|
||||
#include "query/preprocessor.hpp"
|
||||
#include "utils/dynamic_lib.hpp"
|
||||
@ -147,7 +146,9 @@ class QueryEngine : public Loggable {
|
||||
auto generated_path = fs::path(CONFIG(config::COMPILE_PATH) +
|
||||
std::to_string(stripped.hash) + ".cpp");
|
||||
|
||||
plan_generator.generate_plan(stripped.query, stripped.hash, generated_path);
|
||||
frontend::opencypher::Parser parser(stripped.query);
|
||||
backend::cpp::Generator(parser.tree(), stripped.query, stripped.hash,
|
||||
generated_path);
|
||||
return LoadCpp(generated_path, stripped.hash);
|
||||
}
|
||||
|
||||
@ -193,7 +194,5 @@ class QueryEngine : public Loggable {
|
||||
|
||||
QueryPreprocessor preprocessor;
|
||||
PlanCompiler plan_compiler;
|
||||
PlanGenerator<frontend::opencypher::Parser, backend::cpp::Generator>
|
||||
plan_generator;
|
||||
ConcurrentMap<HashType, std::unique_ptr<QueryPlanLib>> query_plans;
|
||||
};
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "antlr4-runtime.h"
|
||||
#include "query/frontend/opencypher/generated/CypherLexer.h"
|
||||
#include "query/frontend/opencypher/generated/CypherParser.h"
|
||||
#include "utils/exceptions/basic_exception.hpp"
|
||||
|
||||
namespace frontend {
|
||||
namespace opencypher {
|
||||
@ -12,8 +13,15 @@ namespace opencypher {
|
||||
using namespace antlropencypher;
|
||||
using namespace antlr4;
|
||||
|
||||
class SyntaxException : BasicException {
|
||||
public:
|
||||
SyntaxException() : BasicException("") {}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates openCypher AST
|
||||
* This thing must me a class since parser.cypher() returns pointer and there is
|
||||
* no way for us to get ownership over the object.
|
||||
*/
|
||||
class Parser {
|
||||
public:
|
||||
@ -21,18 +29,23 @@ class Parser {
|
||||
* @param query incomming query that has to be compiled into query plan
|
||||
* the first step is to generate AST
|
||||
*/
|
||||
auto generate_ast(const std::string &query) {
|
||||
// get tokens
|
||||
ANTLRInputStream input(query.c_str());
|
||||
CypherLexer lexer(&input);
|
||||
CommonTokenStream tokens(&lexer);
|
||||
|
||||
// generate ast
|
||||
CypherParser parser(&tokens);
|
||||
tree::ParseTree *tree = parser.cypher();
|
||||
|
||||
return tree;
|
||||
Parser(const std::string query) : query_(std::move(query)) {
|
||||
if (parser_.getNumberOfSyntaxErrors()) {
|
||||
throw SyntaxException();
|
||||
}
|
||||
}
|
||||
|
||||
auto tree() { return tree_; }
|
||||
|
||||
private:
|
||||
std::string query_;
|
||||
ANTLRInputStream input_{query_.c_str()};
|
||||
CypherLexer lexer_{&input_};
|
||||
CommonTokenStream tokens_{&lexer_};
|
||||
|
||||
// generate ast
|
||||
CypherParser parser_{&tokens_};
|
||||
tree::ParseTree *tree_{parser_.cypher()};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -1,30 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <experimental/filesystem>
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
/**
|
||||
* @class PlanGenerator
|
||||
*
|
||||
* @tparam Frontend defines compiler frontend for query parsing
|
||||
* object of this class must have method with name generate_ast
|
||||
* @tparam Backend defines compiler backend for plan gen
|
||||
* object of this class must have method with name generate_code
|
||||
*
|
||||
*/
|
||||
template <typename Frontend, typename Backend>
|
||||
class PlanGenerator {
|
||||
public:
|
||||
/**
|
||||
* Generates query plan based on the input query
|
||||
*/
|
||||
void generate_plan(const std::string &query, const uint64_t stripped_hash,
|
||||
const fs::path &path) {
|
||||
auto ast = frontend.generate_ast(query);
|
||||
backend.generate_plan(ast, query, stripped_hash, path);
|
||||
}
|
||||
|
||||
private:
|
||||
Frontend frontend;
|
||||
Backend backend;
|
||||
};
|
307
tests/unit/cypher_main_visitor.cpp
Normal file
307
tests/unit/cypher_main_visitor.cpp
Normal file
@ -0,0 +1,307 @@
|
||||
#include <climits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <algorithm>
|
||||
#include "antlr4-runtime.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "query/backend/cpp/cypher_main_visitor.cpp"
|
||||
#include "query/frontend/opencypher/parser.hpp"
|
||||
|
||||
using namespace ::testing;
|
||||
|
||||
namespace {
|
||||
|
||||
class ParserTables {
|
||||
template <typename T>
|
||||
auto FilterAnies(std::unordered_map<std::string, antlrcpp::Any> map) {
|
||||
std::unordered_map<std::string, T> filtered;
|
||||
for (auto x : map) {
|
||||
if (x.second.is<T>()) {
|
||||
filtered[x.first] = x.second.as<T>();
|
||||
}
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
public:
|
||||
ParserTables(const std::string &query) {
|
||||
frontend::opencypher::Parser parser(query);
|
||||
auto *tree = parser.tree();
|
||||
CypherMainVisitor visitor;
|
||||
visitor.visit(tree);
|
||||
identifiers_map_ = visitor.identifiers_map();
|
||||
symbol_table_ = visitor.symbol_table();
|
||||
pattern_parts_ = FilterAnies<PatternPart>(symbol_table_);
|
||||
nodes_ = FilterAnies<Node>(symbol_table_);
|
||||
relationships_ = FilterAnies<Relationship>(symbol_table_);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::string> identifiers_map_;
|
||||
std::unordered_map<std::string, antlrcpp::Any> symbol_table_;
|
||||
std::unordered_map<std::string, PatternPart> pattern_parts_;
|
||||
std::unordered_map<std::string, Node> nodes_;
|
||||
std::unordered_map<std::string, Relationship> relationships_;
|
||||
};
|
||||
|
||||
// TODO: Once expression evaluation is implemented, we should also test if
|
||||
// property values are equal.
|
||||
void CompareNodes(std::pair<std::string, Node> node_entry,
|
||||
std::vector<std::string> labels,
|
||||
std::vector<std::string> property_keys) {
|
||||
auto node = node_entry.second;
|
||||
ASSERT_EQ(node_entry.first, node.output_identifier);
|
||||
ASSERT_THAT(node.labels,
|
||||
UnorderedElementsAreArray(labels.begin(), labels.end()));
|
||||
std::vector<std::string> node_property_keys;
|
||||
for (auto x : node.properties) {
|
||||
node_property_keys.push_back(x.first);
|
||||
}
|
||||
ASSERT_THAT(
|
||||
node_property_keys,
|
||||
UnorderedElementsAreArray(property_keys.begin(), property_keys.end()));
|
||||
}
|
||||
|
||||
// If has_range is false, lower and upper bound values are ignored.
|
||||
// TODO: Once expression evaluation is implemented, we should also test if
|
||||
// property values are equal.
|
||||
void CompareRelationships(
|
||||
std::pair<std::string, Relationship> relationship_entry,
|
||||
Relationship::Direction direction, std::vector<std::string> types,
|
||||
std::vector<std::string> property_keys, bool has_range,
|
||||
int64_t lower_bound = 1LL, int64_t upper_bound = LLONG_MAX) {
|
||||
auto relationship = relationship_entry.second;
|
||||
ASSERT_EQ(relationship_entry.first, relationship.output_identifier);
|
||||
ASSERT_EQ(relationship.direction, direction);
|
||||
ASSERT_THAT(relationship.types,
|
||||
UnorderedElementsAreArray(types.begin(), types.end()));
|
||||
std::vector<std::string> relationship_property_keys;
|
||||
for (auto x : relationship.properties) {
|
||||
relationship_property_keys.push_back(x.first);
|
||||
}
|
||||
ASSERT_THAT(
|
||||
relationship_property_keys,
|
||||
UnorderedElementsAreArray(property_keys.begin(), property_keys.end()));
|
||||
ASSERT_EQ(relationship.has_range, has_range);
|
||||
if (!has_range) return;
|
||||
ASSERT_EQ(relationship.lower_bound, lower_bound);
|
||||
ASSERT_EQ(relationship.upper_bound, upper_bound);
|
||||
}
|
||||
|
||||
// SyntaxException on incorrect syntax.
|
||||
TEST(CompilerStructuresTest, SyntaxException) {
|
||||
ASSERT_THROW(ParserTables("CREATE ()-[*1...2]-()"),
|
||||
frontend::opencypher::SyntaxException);
|
||||
}
|
||||
|
||||
// Empty node.
|
||||
TEST(CompilerStructuresTest, NodePatternEmpty) {
|
||||
ParserTables parser("CREATE ()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.nodes_.size(), 1U);
|
||||
CompareNodes(*parser.nodes_.begin(), {}, {});
|
||||
}
|
||||
|
||||
// Node with variable.
|
||||
TEST(CompilerStructuresTest, NodePatternVariable) {
|
||||
ParserTables parser("CREATE (var)");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 1U);
|
||||
ASSERT_NE(parser.identifiers_map_.find("var"), parser.identifiers_map_.end());
|
||||
ASSERT_EQ(parser.nodes_.size(), 1U);
|
||||
auto output_identifier = parser.identifiers_map_["var"];
|
||||
ASSERT_NE(parser.nodes_.find(output_identifier), parser.nodes_.end());
|
||||
CompareNodes(*parser.nodes_.begin(), {}, {});
|
||||
}
|
||||
|
||||
// Node with labels.
|
||||
TEST(CompilerStructuresTest, NodePatternLabels) {
|
||||
ParserTables parser("CREATE (:label1:label2:label3)");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.nodes_.size(), 1U);
|
||||
CompareNodes(*parser.nodes_.begin(), {"label1", "label2", "label3"}, {});
|
||||
}
|
||||
|
||||
// Node with properties.
|
||||
TEST(CompilerStructuresTest, NodePatternProperties) {
|
||||
ParserTables parser("CREATE ({age: 5, name: \"John\", surname: \"Smith\"})");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.nodes_.size(), 1U);
|
||||
CompareNodes(*parser.nodes_.begin(), {}, {"age", "name", "surname"});
|
||||
}
|
||||
|
||||
// Relationship without relationship details.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternNoDetails) {
|
||||
ParserTables parser("CREATE ()--()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, false);
|
||||
}
|
||||
|
||||
// Relationship with empty relationship details.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternEmptyDetails) {
|
||||
ParserTables parser("CREATE ()-[]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, false);
|
||||
}
|
||||
|
||||
// Relationship with left direction.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternLeftDirection) {
|
||||
ParserTables parser("CREATE ()<--()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::LEFT, {}, {}, false);
|
||||
}
|
||||
|
||||
// Relationship with right direction.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternRightDirection) {
|
||||
ParserTables parser("CREATE ()-[]->()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::RIGHT, {}, {}, false);
|
||||
}
|
||||
|
||||
// Relationship with both directions.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternBothDirection) {
|
||||
ParserTables parser("CREATE ()<-[]->()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, false);
|
||||
}
|
||||
|
||||
// Relationship with unbounded variable range.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternUnbounded) {
|
||||
ParserTables parser("CREATE ()-[*]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, true, 1,
|
||||
LLONG_MAX);
|
||||
}
|
||||
|
||||
// Relationship with lower bounded variable range.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternLowerBounded) {
|
||||
ParserTables parser("CREATE ()-[*5..]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, true, 5,
|
||||
LLONG_MAX);
|
||||
}
|
||||
|
||||
// Relationship with upper bounded variable range.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternUpperBounded) {
|
||||
ParserTables parser("CREATE ()-[*..10]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, true, 1, 10);
|
||||
}
|
||||
|
||||
// Relationship with lower and upper bounded variable range.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternLowerUpperBounded) {
|
||||
ParserTables parser("CREATE ()-[*5..10]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, true, 5, 10);
|
||||
}
|
||||
|
||||
// Relationship with fixed number of edges.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternFixedRange) {
|
||||
ParserTables parser("CREATE ()-[*10]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, true, 10, 10);
|
||||
}
|
||||
|
||||
// Relationship with invalid bound (larger than long long).
|
||||
TEST(CompilerStructuresTest, RelationshipPatternInvalidBound) {
|
||||
ASSERT_THROW(
|
||||
ParserTables parser("CREATE ()-[*100000000000000000000000000]-()"),
|
||||
SemanticException);
|
||||
}
|
||||
|
||||
// Relationship with variable
|
||||
TEST(CompilerStructuresTest, RelationshipPatternVariable) {
|
||||
ParserTables parser("CREATE ()-[var]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 1U);
|
||||
ASSERT_NE(parser.identifiers_map_.find("var"), parser.identifiers_map_.end());
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
auto output_identifier = parser.identifiers_map_["var"];
|
||||
ASSERT_NE(parser.relationships_.find(output_identifier),
|
||||
parser.relationships_.end());
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {}, {}, false);
|
||||
}
|
||||
|
||||
// Relationship with labels.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternLabels) {
|
||||
ParserTables parser("CREATE ()-[:label1|label2|:label3]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH,
|
||||
{"label1", "label2", "label3"}, {}, false);
|
||||
}
|
||||
|
||||
// Relationship with properties.
|
||||
TEST(CompilerStructuresTest, RelationshipPatternProperties) {
|
||||
ParserTables parser(
|
||||
"CREATE ()-[{age: 5, name: \"John\", surname: \"Smith\"}]-()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
CompareRelationships(*parser.relationships_.begin(),
|
||||
Relationship::Direction::BOTH, {},
|
||||
{"age", "name", "surname"}, false);
|
||||
}
|
||||
|
||||
// PatternPart.
|
||||
TEST(CompilerStructuresTest, PatternPart) {
|
||||
ParserTables parser("CREATE ()--()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.pattern_parts_.size(), 1U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
ASSERT_EQ(parser.nodes_.size(), 2U);
|
||||
ASSERT_EQ(parser.pattern_parts_.begin()->second.nodes.size(), 2U);
|
||||
ASSERT_EQ(parser.pattern_parts_.begin()->second.relationships.size(), 1U);
|
||||
}
|
||||
|
||||
// PatternPart in braces.
|
||||
TEST(CompilerStructuresTest, PatternPartBraces) {
|
||||
ParserTables parser("CREATE ((()--()))");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 0U);
|
||||
ASSERT_EQ(parser.pattern_parts_.size(), 1U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
ASSERT_EQ(parser.nodes_.size(), 2U);
|
||||
ASSERT_EQ(parser.pattern_parts_.begin()->second.nodes.size(), 2U);
|
||||
ASSERT_EQ(parser.pattern_parts_.begin()->second.relationships.size(), 1U);
|
||||
}
|
||||
|
||||
// PatternPart with variable.
|
||||
TEST(CompilerStructuresTest, PatternPartVariable) {
|
||||
ParserTables parser("CREATE var=()--()");
|
||||
ASSERT_EQ(parser.identifiers_map_.size(), 1U);
|
||||
ASSERT_EQ(parser.pattern_parts_.size(), 1U);
|
||||
ASSERT_EQ(parser.relationships_.size(), 1U);
|
||||
ASSERT_EQ(parser.nodes_.size(), 2U);
|
||||
ASSERT_EQ(parser.pattern_parts_.begin()->second.nodes.size(), 2U);
|
||||
ASSERT_EQ(parser.pattern_parts_.begin()->second.relationships.size(), 1U);
|
||||
ASSERT_NE(parser.identifiers_map_.find("var"), parser.identifiers_map_.end());
|
||||
auto output_identifier = parser.identifiers_map_["var"];
|
||||
ASSERT_NE(parser.pattern_parts_.find(output_identifier),
|
||||
parser.pattern_parts_.end());
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
Loading…
Reference in New Issue
Block a user