2017-06-08 00:28:31 +08:00
|
|
|
#include "query/common.hpp"
|
|
|
|
|
|
|
|
#include <cctype>
|
|
|
|
#include <codecvt>
|
|
|
|
#include <locale>
|
|
|
|
#include <stdexcept>
|
|
|
|
|
2017-10-11 19:19:10 +08:00
|
|
|
#include "glog/logging.h"
|
|
|
|
|
2017-06-08 00:28:31 +08:00
|
|
|
#include "query/exceptions.hpp"
|
2018-05-16 19:48:56 +08:00
|
|
|
#include "utils/serialization.hpp"
|
2017-06-08 00:28:31 +08:00
|
|
|
#include "utils/string.hpp"
|
|
|
|
|
|
|
|
namespace query {
|
|
|
|
|
|
|
|
int64_t ParseIntegerLiteral(const std::string &s) {
|
|
|
|
try {
|
|
|
|
// Not really correct since long long can have a bigger range than int64_t.
|
|
|
|
return static_cast<int64_t>(std::stoll(s, 0, 0));
|
|
|
|
} catch (const std::out_of_range &) {
|
2017-11-20 20:33:16 +08:00
|
|
|
throw SemanticException("Integer literal exceeds 64 bits");
|
2017-06-08 00:28:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string ParseStringLiteral(const std::string &s) {
|
2017-09-09 20:08:59 +08:00
|
|
|
// These functions is declared as lambda since its semantics is highly
|
|
|
|
// specific for this conxtext and shouldn't be used elsewhere.
|
|
|
|
auto EncodeEscapedUnicodeCodepointUtf32 = [](const std::string &s, int &i) {
|
2017-06-08 00:28:31 +08:00
|
|
|
const int kLongUnicodeLength = 8;
|
2017-09-09 20:08:59 +08:00
|
|
|
int j = i + 1;
|
2017-06-26 21:42:13 +08:00
|
|
|
while (j < static_cast<int>(s.size()) - 1 &&
|
|
|
|
j < i + kLongUnicodeLength + 1 && isxdigit(s[j])) {
|
2017-06-08 00:28:31 +08:00
|
|
|
++j;
|
|
|
|
}
|
|
|
|
if (j - i == kLongUnicodeLength + 1) {
|
|
|
|
char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
|
|
|
|
i += kLongUnicodeLength;
|
|
|
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
|
|
|
return converter.to_bytes(t);
|
2017-09-09 20:08:59 +08:00
|
|
|
}
|
|
|
|
throw SyntaxException(
|
|
|
|
"Expected 8 hex digits as unicode codepoint started with \\U. "
|
|
|
|
"Use \\u for 4 hex digits format.");
|
|
|
|
};
|
|
|
|
auto EncodeEscapedUnicodeCodepointUtf16 = [](const std::string &s, int &i) {
|
|
|
|
const int kShortUnicodeLength = 4;
|
|
|
|
int j = i + 1;
|
|
|
|
while (j < static_cast<int>(s.size()) - 1 &&
|
|
|
|
j < i + kShortUnicodeLength + 1 && isxdigit(s[j])) {
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
if (j - i >= kShortUnicodeLength + 1) {
|
2017-06-08 00:28:31 +08:00
|
|
|
char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
|
2017-09-04 22:03:17 +08:00
|
|
|
if (t >= 0xD800 && t <= 0xDBFF) {
|
|
|
|
// t is high surrogate pair. Expect one more utf16 codepoint.
|
|
|
|
j = i + kShortUnicodeLength + 1;
|
|
|
|
if (j >= static_cast<int>(s.size()) - 1 || s[j] != '\\') {
|
|
|
|
throw SemanticException("Invalid utf codepoint");
|
|
|
|
}
|
|
|
|
++j;
|
|
|
|
if (j >= static_cast<int>(s.size()) - 1 ||
|
|
|
|
(s[j] != 'u' && s[j] != 'U')) {
|
|
|
|
throw SemanticException("Invalid utf codepoint");
|
|
|
|
}
|
|
|
|
++j;
|
|
|
|
int k = j;
|
|
|
|
while (k < static_cast<int>(s.size()) - 1 &&
|
|
|
|
k < j + kShortUnicodeLength && isxdigit(s[k])) {
|
|
|
|
++k;
|
|
|
|
}
|
|
|
|
if (k != j + kShortUnicodeLength) {
|
|
|
|
throw SemanticException("Invalid utf codepoint");
|
|
|
|
}
|
|
|
|
char16_t surrogates[3] = {t,
|
|
|
|
static_cast<char16_t>(stoi(
|
|
|
|
s.substr(j, kShortUnicodeLength), 0, 16)),
|
|
|
|
0};
|
|
|
|
i += kShortUnicodeLength + 2 + kShortUnicodeLength;
|
|
|
|
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
|
|
|
|
converter;
|
|
|
|
return converter.to_bytes(surrogates);
|
|
|
|
} else {
|
|
|
|
i += kShortUnicodeLength;
|
|
|
|
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
|
|
|
|
converter;
|
|
|
|
return converter.to_bytes(t);
|
|
|
|
}
|
2017-06-08 00:28:31 +08:00
|
|
|
}
|
2017-09-09 20:08:59 +08:00
|
|
|
throw SyntaxException(
|
|
|
|
"Expected 4 hex digits as unicode codepoint started with \\u. "
|
|
|
|
"Use \\U for 8 hex digits format.");
|
2017-06-08 00:28:31 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
std::string unescaped;
|
|
|
|
bool escape = false;
|
|
|
|
|
|
|
|
// First and last char is quote, we don't need to look at them.
|
|
|
|
for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) {
|
|
|
|
if (escape) {
|
|
|
|
switch (s[i]) {
|
|
|
|
case '\\':
|
|
|
|
unescaped += '\\';
|
|
|
|
break;
|
|
|
|
case '\'':
|
|
|
|
unescaped += '\'';
|
|
|
|
break;
|
|
|
|
case '"':
|
|
|
|
unescaped += '"';
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
case 'b':
|
|
|
|
unescaped += '\b';
|
|
|
|
break;
|
|
|
|
case 'F':
|
|
|
|
case 'f':
|
|
|
|
unescaped += '\f';
|
|
|
|
break;
|
|
|
|
case 'N':
|
|
|
|
case 'n':
|
|
|
|
unescaped += '\n';
|
|
|
|
break;
|
|
|
|
case 'R':
|
|
|
|
case 'r':
|
|
|
|
unescaped += '\r';
|
|
|
|
break;
|
|
|
|
case 'T':
|
|
|
|
case 't':
|
|
|
|
unescaped += '\t';
|
|
|
|
break;
|
|
|
|
case 'U':
|
2017-09-09 20:08:59 +08:00
|
|
|
try {
|
|
|
|
unescaped += EncodeEscapedUnicodeCodepointUtf32(s, i);
|
|
|
|
} catch (const std::range_error &) {
|
|
|
|
throw SemanticException("Invalid utf codepoint");
|
|
|
|
}
|
|
|
|
break;
|
2017-06-08 00:28:31 +08:00
|
|
|
case 'u':
|
2017-09-04 22:03:17 +08:00
|
|
|
try {
|
2017-09-09 20:08:59 +08:00
|
|
|
unescaped += EncodeEscapedUnicodeCodepointUtf16(s, i);
|
2017-09-04 22:03:17 +08:00
|
|
|
} catch (const std::range_error &) {
|
|
|
|
throw SemanticException("Invalid utf codepoint");
|
|
|
|
}
|
2017-06-08 00:28:31 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// This should never happen, except grammar changes and we don't
|
|
|
|
// notice change in this production.
|
2017-10-11 19:19:10 +08:00
|
|
|
DLOG(FATAL) << "can't happen";
|
2017-06-08 00:28:31 +08:00
|
|
|
throw std::exception();
|
|
|
|
}
|
|
|
|
escape = false;
|
|
|
|
} else if (s[i] == '\\') {
|
|
|
|
escape = true;
|
|
|
|
} else {
|
|
|
|
unescaped += s[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return unescaped;
|
|
|
|
}
|
|
|
|
|
|
|
|
double ParseDoubleLiteral(const std::string &s) {
|
|
|
|
try {
|
|
|
|
return utils::ParseDouble(s);
|
|
|
|
} catch (const utils::BasicException &) {
|
|
|
|
throw SemanticException("Couldn't parse string to double");
|
|
|
|
}
|
|
|
|
}
|
2017-07-20 00:14:59 +08:00
|
|
|
|
|
|
|
std::string ParseParameter(const std::string &s) {
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(s[0] == '$') << "Invalid string passed as parameter name";
|
2017-07-20 00:14:59 +08:00
|
|
|
if (s[1] != '`') return s.substr(1);
|
|
|
|
// If parameter name is escaped symbolic name then symbolic name should be
|
|
|
|
// unescaped and leading and trailing backquote should be removed.
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(s.size() > 3U && s.back() == '`')
|
|
|
|
<< "Invalid string passed as parameter name";
|
2017-07-20 00:14:59 +08:00
|
|
|
std::string out;
|
|
|
|
for (int i = 2; i < static_cast<int>(s.size()) - 1; ++i) {
|
|
|
|
if (s[i] == '`') {
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
out.push_back(s[i]);
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
2018-02-08 20:27:07 +08:00
|
|
|
|
|
|
|
void ReconstructTypedValue(TypedValue &value) {
|
|
|
|
using Type = TypedValue::Type;
|
|
|
|
switch (value.type()) {
|
|
|
|
case Type::Vertex:
|
|
|
|
if (!value.ValueVertex().Reconstruct()) throw ReconstructionException();
|
|
|
|
break;
|
|
|
|
case Type::Edge:
|
|
|
|
if (!value.ValueEdge().Reconstruct()) throw ReconstructionException();
|
|
|
|
break;
|
|
|
|
case Type::List:
|
|
|
|
for (TypedValue &inner_value : value.Value<std::vector<TypedValue>>())
|
|
|
|
ReconstructTypedValue(inner_value);
|
|
|
|
break;
|
|
|
|
case Type::Map:
|
|
|
|
for (auto &kv : value.Value<std::map<std::string, TypedValue>>())
|
|
|
|
ReconstructTypedValue(kv.second);
|
|
|
|
break;
|
|
|
|
case Type::Path:
|
|
|
|
for (auto &vertex : value.ValuePath().vertices()) {
|
|
|
|
if (!vertex.Reconstruct()) throw ReconstructionException();
|
|
|
|
}
|
|
|
|
for (auto &edge : value.ValuePath().edges()) {
|
|
|
|
if (!edge.Reconstruct()) throw ReconstructionException();
|
|
|
|
}
|
|
|
|
case Type::Null:
|
|
|
|
case Type::Bool:
|
|
|
|
case Type::Int:
|
|
|
|
case Type::Double:
|
|
|
|
case Type::String:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-02-19 22:07:02 +08:00
|
|
|
|
2018-07-30 19:48:22 +08:00
|
|
|
namespace {
|
2018-02-19 22:07:02 +08:00
|
|
|
|
2018-07-30 19:48:22 +08:00
|
|
|
bool TypedValueCompare(const TypedValue &a, const TypedValue &b) {
|
2018-02-19 22:07:02 +08:00
|
|
|
// in ordering null comes after everything else
|
|
|
|
// at the same time Null is not less that null
|
|
|
|
// first deal with Null < Whatever case
|
|
|
|
if (a.IsNull()) return false;
|
|
|
|
// now deal with NotNull < Null case
|
|
|
|
if (b.IsNull()) return true;
|
|
|
|
|
|
|
|
// comparisons are from this point legal only between values of
|
|
|
|
// the same type, or int+float combinations
|
|
|
|
if ((a.type() != b.type() && !(a.IsNumeric() && b.IsNumeric())))
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Can't compare value of type {} to value of type {}", a.type(),
|
|
|
|
b.type());
|
|
|
|
|
|
|
|
switch (a.type()) {
|
|
|
|
case TypedValue::Type::Bool:
|
|
|
|
return !a.Value<bool>() && b.Value<bool>();
|
|
|
|
case TypedValue::Type::Int:
|
|
|
|
if (b.type() == TypedValue::Type::Double)
|
|
|
|
return a.Value<int64_t>() < b.Value<double>();
|
|
|
|
else
|
|
|
|
return a.Value<int64_t>() < b.Value<int64_t>();
|
|
|
|
case TypedValue::Type::Double:
|
|
|
|
if (b.type() == TypedValue::Type::Int)
|
|
|
|
return a.Value<double>() < b.Value<int64_t>();
|
|
|
|
else
|
|
|
|
return a.Value<double>() < b.Value<double>();
|
|
|
|
case TypedValue::Type::String:
|
|
|
|
return a.Value<std::string>() < b.Value<std::string>();
|
|
|
|
case TypedValue::Type::List:
|
|
|
|
case TypedValue::Type::Map:
|
|
|
|
case TypedValue::Type::Vertex:
|
|
|
|
case TypedValue::Type::Edge:
|
|
|
|
case TypedValue::Type::Path:
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Comparison is not defined for values of type {}", a.type());
|
|
|
|
default:
|
|
|
|
LOG(FATAL) << "Unhandled comparison for types";
|
|
|
|
}
|
|
|
|
}
|
2018-05-15 23:38:47 +08:00
|
|
|
|
2018-07-30 19:48:22 +08:00
|
|
|
} // namespace
|
|
|
|
|
|
|
|
bool TypedValueVectorCompare::operator()(
|
|
|
|
const std::vector<TypedValue> &c1,
|
|
|
|
const std::vector<TypedValue> &c2) const {
|
|
|
|
// ordering is invalid if there are more elements in the collections
|
|
|
|
// then there are in the ordering_ vector
|
|
|
|
DCHECK(c1.size() <= ordering_.size() && c2.size() <= ordering_.size())
|
|
|
|
<< "Collections contain more elements then there are orderings";
|
|
|
|
|
|
|
|
auto c1_it = c1.begin();
|
|
|
|
auto c2_it = c2.begin();
|
|
|
|
auto ordering_it = ordering_.begin();
|
|
|
|
for (; c1_it != c1.end() && c2_it != c2.end();
|
|
|
|
c1_it++, c2_it++, ordering_it++) {
|
|
|
|
if (TypedValueCompare(*c1_it, *c2_it)) return *ordering_it == Ordering::ASC;
|
|
|
|
if (TypedValueCompare(*c2_it, *c1_it))
|
|
|
|
return *ordering_it == Ordering::DESC;
|
|
|
|
}
|
|
|
|
|
|
|
|
// at least one collection is exhausted
|
|
|
|
// c1 is less then c2 iff c1 reached the end but c2 didn't
|
|
|
|
return (c1_it == c1.end()) && (c2_it != c2.end());
|
|
|
|
}
|
|
|
|
|
2018-05-16 19:48:56 +08:00
|
|
|
void TypedValueVectorCompare::Save(
|
|
|
|
capnp::TypedValueVectorCompare::Builder *builder) const {
|
|
|
|
auto ordering_builder = builder->initOrdering(ordering_.size());
|
|
|
|
for (size_t i = 0; i < ordering_.size(); ++i) {
|
|
|
|
ordering_builder.set(i, ordering_[i] == Ordering::ASC
|
|
|
|
? capnp::Ordering::ASC
|
|
|
|
: capnp::Ordering::DESC);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TypedValueVectorCompare::Load(
|
|
|
|
const capnp::TypedValueVectorCompare::Reader &reader) {
|
|
|
|
std::vector<Ordering> ordering;
|
|
|
|
ordering.reserve(reader.getOrdering().size());
|
|
|
|
for (auto ordering_reader : reader.getOrdering()) {
|
|
|
|
ordering.push_back(ordering_reader == capnp::Ordering::ASC
|
|
|
|
? Ordering::ASC
|
|
|
|
: Ordering::DESC);
|
|
|
|
}
|
|
|
|
ordering_ = ordering;
|
|
|
|
}
|
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
template <typename TAccessor>
|
|
|
|
void SwitchAccessor(TAccessor &accessor, GraphView graph_view) {
|
|
|
|
switch (graph_view) {
|
|
|
|
case GraphView::NEW:
|
|
|
|
accessor.SwitchNew();
|
|
|
|
break;
|
|
|
|
case GraphView::OLD:
|
|
|
|
accessor.SwitchOld();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template void SwitchAccessor<>(VertexAccessor &accessor, GraphView graph_view);
|
|
|
|
template void SwitchAccessor<>(EdgeAccessor &accessor, GraphView graph_view);
|
|
|
|
|
2017-10-11 19:19:10 +08:00
|
|
|
} // namespace query
|