Extract literals to ast

Summary:
Extract literals to ast Literal node
Fix bugs in TypedValue
Add tests for properties to cypher_main_visitor test

Reviewers: buda, florijan, dgleich, teon.banek

Reviewed By: dgleich, teon.banek

Subscribers: dgleich, pullbot

Differential Revision: https://phabricator.memgraph.io/D149
This commit is contained in:
Mislav Bradac 2017-03-21 15:10:59 +01:00
parent a2a4c28168
commit 8e7ccf6e83
6 changed files with 388 additions and 61 deletions

View File

@ -164,6 +164,7 @@ TypedValue::TypedValue(const TypedValue& other) : type_(other.type_) {
return;
case Type::Path:
new (&path_v) Path(other.path_v);
return;
}
permanent_fail("Unsupported TypedValue::Type");
}
@ -229,13 +230,14 @@ std::ostream& operator<<(std::ostream& os, const TypedValue& value) {
}
TypedValue& TypedValue::operator=(const TypedValue& other) {
// set the type of this
this->~TypedValue();
type_ = other.type_;
if (this != &other) {
this->~TypedValue();
// set the type of this
type_ = other.type_;
switch (other.type_) {
case TypedValue::Type::Null:
return *this;
case TypedValue::Type::Bool:
this->bool_v = other.bool_v;
return *this;
@ -264,8 +266,9 @@ TypedValue& TypedValue::operator=(const TypedValue& other) {
new (&path_v) Path(other.path_v);
return *this;
}
permanent_fail("Unsupported TypedValue::Type");
}
permanent_fail("Unsupported TypedValue::Type");
return *this;
}
const TypedValue TypedValue::Null = TypedValue();

View File

@ -5,6 +5,7 @@
#include <vector>
#include "database/graph_db.hpp"
#include "query/backend/cpp/typed_value.hpp"
#include "query/frontend/ast/ast_visitor.hpp"
#include "utils/visitor/visitable.hpp"
@ -30,6 +31,19 @@ class Expression : public Tree {
Expression(int uid) : Tree(uid) {}
};
class Literal : public Expression {
friend class AstTreeStorage;
public:
TypedValue value_;
DEFVISITABLE(TreeVisitorBase);
protected:
Literal(int uid) : Expression(uid) {}
template <typename T>
Literal(int uid, T value) : Expression(uid), value_(value) {}
};
class Identifier : public Expression {
friend class AstTreeStorage;

View File

@ -15,8 +15,9 @@ class Return;
class Pattern;
class NodeAtom;
class EdgeAtom;
class Literal;
using TreeVisitorBase =
::utils::Visitor<Query, NamedExpression, Identifier, PropertyLookup, Create,
Match, Return, Pattern, NodeAtom, EdgeAtom>;
using TreeVisitorBase = ::utils::Visitor<Query, NamedExpression, Identifier,
Literal, PropertyLookup, Create, Match,
Return, Pattern, NodeAtom, EdgeAtom>;
}

View File

@ -1,6 +1,9 @@
#include "query/frontend/ast/cypher_main_visitor.hpp"
#include <algorithm>
#include <climits>
#include <codecvt>
#include <cstring>
#include <string>
#include <unordered_map>
#include <utility>
@ -37,6 +40,8 @@ namespace {
// }
// return ops;
//}
const std::string kTrueString = "true";
const std::string kFalseString = "false";
}
const std::string CypherMainVisitor::kAnonPrefix = "anon";
@ -63,16 +68,19 @@ antlrcpp::Any CypherMainVisitor::visitSingleQuery(
antlrcpp::Any CypherMainVisitor::visitClause(CypherParser::ClauseContext *ctx) {
if (ctx->cypherReturn()) {
return (Clause *)ctx->cypherReturn()->accept(this).as<Return *>();
return static_cast<Clause *>(
ctx->cypherReturn()->accept(this).as<Return *>());
}
if (ctx->cypherMatch()) {
return (Clause *)ctx->cypherMatch()->accept(this).as<Match *>();
return static_cast<Clause *>(
ctx->cypherMatch()->accept(this).as<Match *>());
}
if (ctx->create()) {
return (Clause *)ctx->create()->accept(this).as<Create *>();
return static_cast<Clause *>(ctx->create()->accept(this).as<Create *>());
}
// TODO: implement other clauses.
throw std::exception();
return visitChildren(ctx);
return 0;
}
antlrcpp::Any CypherMainVisitor::visitCypherMatch(
@ -261,12 +269,15 @@ antlrcpp::Any CypherMainVisitor::visitRelationshipPattern(
}
if (ctx->relationshipDetail()->relationshipTypes()) {
edge->edge_types_ = ctx->relationshipDetail()
->relationshipTypes()
->accept(this)
.as<std::vector<GraphDb::EdgeType>>();
->relationshipTypes()
->accept(this)
.as<std::vector<GraphDb::EdgeType>>();
}
if (ctx->relationshipDetail()->properties()) {
throw std::exception();
edge->properties_ = ctx->relationshipDetail()
->properties()
->accept(this)
.as<std::map<GraphDb::Property, Expression *>>();
}
if (ctx->relationshipDetail()->rangeLiteral()) {
throw std::exception();
@ -535,32 +546,9 @@ antlrcpp::Any CypherMainVisitor::visitExpression2(
antlrcpp::Any CypherMainVisitor::visitAtom(CypherParser::AtomContext *ctx) {
if (ctx->literal()) {
// This is not very nice since we didn't parse text given in query, but we
// left that job to the code generator. Correct approach would be to parse
// it and store it in a structure of appropriate type, int, string... And
// then code generator would generate its own text based on structure. This
// is also a security risk if code generator doesn't parse and escape
// text appropriately. At the moment we don;t care much since literal will
// appear only in tests and real queries will be stripped.
// TODO: Either parse it correctly or raise exception. If exception is
// raised it tests should also use params instead of literals.
// auto text = ctx->literal()->getText();
// auto lhs_id = new_id();
// symbol_table_[lhs_id] = SimpleExpression{Function::LITERAL, {text}};
// return lhs_id;
throw std::exception();
return static_cast<Expression *>(visitChildren(ctx).as<Literal *>());
} else if (ctx->parameter()) {
// This is once again potential security risk. We shouldn't output text
// given in user query as parameter name directly to the code. Stripper
// should either replace user's parameter name with generic one or we should
// allow only parameters with numeric names. At the moment this is not a
// problem since we don't accept user's parameters but only ours.
// TODO: revise this.
// auto text = ctx->literal()->getText();
// auto lhs_id = new_id();
// symbol_table_[lhs_id] = SimpleExpression{Function::PARAMETER, {text}};
throw std::exception();
// return lhs_id;
} else if (ctx->parenthesizedExpression()) {
return ctx->parenthesizedExpression()->accept(this);
} else if (ctx->variable()) {
@ -573,15 +561,158 @@ antlrcpp::Any CypherMainVisitor::visitAtom(CypherParser::AtomContext *ctx) {
throw std::exception();
}
antlrcpp::Any CypherMainVisitor::visitLiteral(
CypherParser::LiteralContext *ctx) {
if (ctx->CYPHERNULL()) {
return storage_.Create<Literal>(TypedValue::Null);
} else if (ctx->StringLiteral()) {
return storage_.Create<Literal>(
visitStringLiteral(ctx->StringLiteral()->getText()).as<std::string>());
} else if (ctx->booleanLiteral()) {
return storage_.Create<Literal>(
ctx->booleanLiteral()->accept(this).as<bool>());
} else if (ctx->numberLiteral()) {
return storage_.Create<Literal>(
ctx->numberLiteral()->accept(this).as<TypedValue>());
} else {
// TODO: Implement map and list literals.
throw std::exception();
}
return visitChildren(ctx);
}
antlrcpp::Any CypherMainVisitor::visitNumberLiteral(
CypherParser::NumberLiteralContext *ctx) {
if (ctx->integerLiteral()) {
return TypedValue(ctx->integerLiteral()->accept(this).as<int64_t>());
} else if (ctx->doubleLiteral()) {
return TypedValue(ctx->doubleLiteral()->accept(this).as<double>());
} else {
debug_assert(false, "can't happen");
throw std::exception();
}
}
antlrcpp::Any CypherMainVisitor::visitDoubleLiteral(
CypherParser::DoubleLiteralContext *ctx) {
// stod would be nicer but it uses current locale so we shouldn't use it.
double t = 0LL;
std::istringstream iss(ctx->getText());
iss.imbue(std::locale::classic());
iss >> t;
if (!iss.eof()) {
throw std::exception();
}
return t;
}
antlrcpp::Any CypherMainVisitor::visitIntegerLiteral(
CypherParser::IntegerLiteralContext *ctx) {
int64_t t = 0LL;
try {
// Not really correct since long long can have a bigger range than int64_t.
t = std::stoll(ctx->getText(), 0, 0);
} catch (std::out_of_range) {
throw std::exception();
}
return t;
}
antlrcpp::Any CypherMainVisitor::visitStringLiteral(
const std::string &escaped) {
// This function is declared as lambda since its semantics is highly specific
// for this conxtext and shouldn't be used elsewhere.
auto EncodeEscapedUnicodeCodepoint = [](const std::string &s, int &i) {
int j = i + 1;
const int kShortUnicodeLength = 4;
const int kLongUnicodeLength = 8;
while (j < (int)s.size() - 1 && j < i + kLongUnicodeLength + 1 &&
isxdigit(s[j])) {
++j;
}
if (j - i == kLongUnicodeLength + 1) {
char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
i += kLongUnicodeLength;
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
return converter.to_bytes(t);
} else if (j - i >= kShortUnicodeLength + 1) {
char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
i += kShortUnicodeLength;
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
converter;
return converter.to_bytes(t);
} else {
debug_assert(false, "can't happen");
throw std::exception();
}
};
std::string unescaped;
bool escape = false;
// First and last char is quote, we don't need to look at them.
for (int i = 1; i < (int)escaped.size() - 1; ++i) {
if (escape) {
switch (escaped[i]) {
case '\\':
unescaped += '\\';
break;
case '\'':
unescaped += '\'';
break;
case '"':
unescaped += '"';
break;
case 'B':
case 'b':
unescaped += '\b';
break;
case 'F':
case 'f':
unescaped += '\f';
break;
case 'N':
case 'n':
unescaped += '\n';
break;
case 'R':
case 'r':
unescaped += '\r';
break;
case 'T':
case 't':
unescaped += '\t';
break;
case 'U':
case 'u':
unescaped += EncodeEscapedUnicodeCodepoint(escaped, i);
break;
default:
debug_assert(false, "can't happen");
throw std::exception();
}
escape = false;
} else if (escaped[i] == '\\') {
escape = true;
} else {
unescaped += escaped[i];
}
}
return unescaped;
}
antlrcpp::Any CypherMainVisitor::visitBooleanLiteral(
CypherParser::BooleanLiteralContext *ctx) {
std::string s = ctx->getText();
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
if (s == kTrueString) {
return true;
} else if (s == kFalseString) {
return false;
} else {
debug_assert(false, "can't happen");
throw std::exception();
}
}
}
}

View File

@ -283,10 +283,11 @@ class CypherMainVisitor : public antlropencypher::CypherBaseVisitor {
*/
antlrcpp::Any visitAtom(CypherParser::AtomContext *ctx) override;
// antlrcpp::Any visitLiteral(CypherParser::LiteralContext *ctx) override {
// return visitChildren(ctx);
// }
//
/**
* @return Literal*
*/
antlrcpp::Any visitLiteral(CypherParser::LiteralContext *ctx) override;
// antlrcpp::Any visitBooleanLiteral(
// CypherParser::BooleanLiteralContext *ctx) override {
// return visitChildren(ctx);
@ -303,11 +304,36 @@ class CypherMainVisitor : public antlropencypher::CypherBaseVisitor {
// }
/**
* @return int64_t.
* Convert escaped string from a query to unescaped utf8 string.
*
* @return string
*/
antlrcpp::Any visitStringLiteral(const std::string &escaped);
/**
* @return bool
*/
antlrcpp::Any visitBooleanLiteral(
CypherParser::BooleanLiteralContext *ctx) override;
/**
* @return TypedValue with either double or int
*/
antlrcpp::Any visitNumberLiteral(
CypherParser::NumberLiteralContext *ctx) override;
/**
* @return int64_t
*/
antlrcpp::Any visitIntegerLiteral(
CypherParser::IntegerLiteralContext *ctx) override;
/**
* @return double
*/
antlrcpp::Any visitDoubleLiteral(
CypherParser::DoubleLiteralContext *ctx) override;
public:
Query *query() { return query_; }
const static std::string kAnonPrefix;

View File

@ -17,6 +17,7 @@ namespace {
using namespace query;
using namespace query::frontend;
using testing::UnorderedElementsAre;
using testing::Pair;
class AstGenerator {
public:
@ -45,8 +46,150 @@ TEST(CypherMainVisitorTest, SyntaxException) {
ASSERT_THROW(AstGenerator("CREATE ()-[*1...2]-()"), std::exception);
}
TEST(CypherMainVisitorTest, PropertyLookup) {
AstGenerator ast_generator("RETURN n.x");
auto *query = ast_generator.query_;
ASSERT_EQ(query->clauses_.size(), 1U);
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *property_lookup = dynamic_cast<PropertyLookup *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(property_lookup->expression_);
auto identifier = dynamic_cast<Identifier *>(property_lookup->expression_);
ASSERT_TRUE(identifier);
ASSERT_EQ(identifier->name_, "n");
ASSERT_EQ(property_lookup->property_,
ast_generator.db_accessor_->property("x"));
}
TEST(CypherMainVisitorTest, ReturnNamedIdentifier) {
AstGenerator ast_generator("RETURN var AS var5");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *named_expr = return_clause->named_expressions_[0];
ASSERT_EQ(named_expr->name_, "var5");
auto *identifier = dynamic_cast<Identifier *>(named_expr->expression_);
ASSERT_EQ(identifier->name_, "var");
}
TEST(CypherMainVisitorTest, IntegerLiteral) {
AstGenerator ast_generator("RETURN 42");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<int64_t>(), 42);
}
TEST(CypherMainVisitorTest, IntegerLiteralTooLarge) {
ASSERT_THROW(AstGenerator("RETURN 10000000000000000000000000"),
std::exception);
}
TEST(CypherMainVisitorTest, BooleanLiteralTrue) {
AstGenerator ast_generator("RETURN TrUe");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<bool>(), true);
}
TEST(CypherMainVisitorTest, BooleanLiteralFalse) {
AstGenerator ast_generator("RETURN faLSE");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<bool>(), false);
}
TEST(CypherMainVisitorTest, NullLiteral) {
AstGenerator ast_generator("RETURN nULl");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.type(), TypedValue::Type::Null);
}
TEST(CypherMainVisitorTest, StringLiteralDoubleQuotes) {
AstGenerator ast_generator("RETURN \"mi'rko\"");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<std::string>(), "mi'rko");
}
TEST(CypherMainVisitorTest, StringLiteralSingleQuotes) {
AstGenerator ast_generator("RETURN 'mi\"rko'");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<std::string>(), "mi\"rko");
}
TEST(CypherMainVisitorTest, StringLiteralEscapedChars) {
AstGenerator ast_generator(
"RETURN '\\\\\\'\\\"\\b\\B\\f\\F\\n\\N\\r\\R\\t\\T'");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<std::string>(), "\\'\"\b\b\f\f\n\n\r\r\t\t");
}
TEST(CypherMainVisitorTest, StringLiteralEscapedUtf16) {
AstGenerator ast_generator("RETURN '\\u221daaa\\U221daaa'");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<std::string>(), u8"\u221daaa\u221daaa");
}
TEST(CypherMainVisitorTest, StringLiteralEscapedUtf32) {
AstGenerator ast_generator("RETURN '\\u0001F600aaaa\\U0001F600aaaaaaaa'");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<std::string>(),
u8"\U0001F600aaaa\U0001F600aaaaaaaa");
}
TEST(CypherMainVisitorTest, DoubleLiteral) {
AstGenerator ast_generator("RETURN 3.5");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<double>(), 3.5);
}
TEST(CypherMainVisitorTest, DoubleLiteralExponent) {
AstGenerator ast_generator("RETURN 5e-1");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *literal = dynamic_cast<Literal *>(
return_clause->named_expressions_[0]->expression_);
ASSERT_TRUE(literal);
ASSERT_EQ(literal->value_.Value<double>(), 0.5);
}
TEST(CypherMainVisitorTest, NodePattern) {
AstGenerator ast_generator("MATCH (:label1:label2:label3)");
AstGenerator ast_generator("MATCH (:label1:label2:label3 {a : 5, b : 10})");
auto *query = ast_generator.query_;
ASSERT_EQ(query->clauses_.size(), 1U);
auto *match = dynamic_cast<Match *>(query->clauses_[0]);
@ -63,7 +206,17 @@ TEST(CypherMainVisitorTest, NodePattern) {
ast_generator.db_accessor_->label("label1"),
ast_generator.db_accessor_->label("label2"),
ast_generator.db_accessor_->label("label3")));
// TODO: add test for properties.
std::unordered_map<GraphDb::Property, int64_t> properties;
for (auto x : node->properties_) {
auto *literal = dynamic_cast<Literal *>(x.second);
ASSERT_TRUE(literal);
ASSERT_TRUE(literal->value_.type() == TypedValue::Type::Int);
properties[x.first] = literal->value_.Value<int64_t>();
}
ASSERT_THAT(properties,
UnorderedElementsAre(
Pair(ast_generator.db_accessor_->property("a"), 5),
Pair(ast_generator.db_accessor_->property("b"), 10)));
}
TEST(CypherMainVisitorTest, NodePatternIdentifier) {
@ -74,7 +227,7 @@ TEST(CypherMainVisitorTest, NodePatternIdentifier) {
ASSERT_TRUE(node->identifier_);
ASSERT_EQ(node->identifier_->name_, "var");
ASSERT_THAT(node->labels_, UnorderedElementsAre());
// TODO: add test for properties.
ASSERT_THAT(node->properties_, UnorderedElementsAre());
}
TEST(CypherMainVisitorTest, RelationshipPatternNoDetails) {
@ -97,7 +250,7 @@ TEST(CypherMainVisitorTest, RelationshipPatternNoDetails) {
}
TEST(CypherMainVisitorTest, RelationshipPatternDetails) {
AstGenerator ast_generator("MATCH ()<-[:type1|type2]-()");
AstGenerator ast_generator("MATCH ()<-[:type1|type2 {a : 5, b : 10}]-()");
auto *query = ast_generator.query_;
auto *match = dynamic_cast<Match *>(query->clauses_[0]);
auto *edge = dynamic_cast<EdgeAtom *>(match->patterns_[0]->atoms_[1]);
@ -106,7 +259,17 @@ TEST(CypherMainVisitorTest, RelationshipPatternDetails) {
edge->edge_types_,
UnorderedElementsAre(ast_generator.db_accessor_->edge_type("type1"),
ast_generator.db_accessor_->edge_type("type2")));
// TODO: test properties
std::unordered_map<GraphDb::Property, int64_t> properties;
for (auto x : edge->properties_) {
auto *literal = dynamic_cast<Literal *>(x.second);
ASSERT_TRUE(literal);
ASSERT_TRUE(literal->value_.type() == TypedValue::Type::Int);
properties[x.first] = literal->value_.Value<int64_t>();
}
ASSERT_THAT(properties,
UnorderedElementsAre(
Pair(ast_generator.db_accessor_->property("a"), 5),
Pair(ast_generator.db_accessor_->property("b"), 10)));
}
TEST(CypherMainVisitorTest, RelationshipPatternVariable) {
@ -232,16 +395,6 @@ TEST(CypherMainVisitorTest, ReturnUnanemdIdentifier) {
ASSERT_EQ(identifier->name_, "var");
}
TEST(CypherMainVisitorTest, ReturnNamedIdentifier) {
AstGenerator ast_generator("RETURN var AS var5");
auto *query = ast_generator.query_;
auto *return_clause = dynamic_cast<Return *>(query->clauses_[0]);
auto *named_expr = return_clause->named_expressions_[0];
ASSERT_EQ(named_expr->name_, "var5");
auto *identifier = dynamic_cast<Identifier *>(named_expr->expression_);
ASSERT_EQ(identifier->name_, "var");
}
TEST(CypherMainVisitorTest, Create) {
AstGenerator ast_generator("CREATE (n)");
auto *query = ast_generator.query_;
@ -255,6 +408,5 @@ TEST(CypherMainVisitorTest, Create) {
ASSERT_TRUE(node);
ASSERT_TRUE(node->identifier_);
ASSERT_EQ(node->identifier_->name_, "n");
// TODO: add test for properties.
}
}