Rework CallProcedure to validate result sets

Summary: This diff renames `__reload__` procedure to be `mg.reload` accepting a module name. The main CallCustomProcedure function is now split into multiple parts, so that there's more control over finding a procedure, type checking its arguments and finally checking the returned result set. Depends on D2572 Reviewers: mferencevic, ipaljak Reviewed By: ipaljak Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2573
2019-11-26 16:07:55 +01:00 · 2019-11-26 16:07:55 +01:00 · e31331aae4
commit e31331aae4
parent d71f1bfa35
4 changed files with 113 additions and 33 deletions
--- a/query_modules/example.c
+++ b/query_modules/example.c
@ -68,7 +68,8 @@ int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) {
  }
  mgp_value_destroy(null_value);
  if (!mgp_proc_add_result(proc, "result", mgp_type_string())) return 1;
-  if (!mgp_proc_add_result(proc, "args", mgp_type_list(mgp_type_any())))
+  if (!mgp_proc_add_result(proc, "args",
+                           mgp_type_list(mgp_type_nullable(mgp_type_any()))))
    return 1;
  return 0;
 }
--- a/src/query/plan/operator.cpp
+++ b/src/query/plan/operator.cpp
@ -3735,24 +3735,47 @@ std::vector<Symbol> CallProcedure::ModifiedSymbols(

 namespace {

-void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name,
-                         const std::vector<Expression *> &args,
-                         storage::View graph_view, const ExecutionContext &ctx,
-                         Frame *frame, mgp_result *result) {
-  // Use evaluation memory, as invoking a procedure is akin to a simple
-  // evaluation of an expression.
-  // TODO: This will probably need to be changed when we add support for
-  // generator like procedures which yield a new result on each invocation.
-  auto *memory = ctx.evaluation_context.memory;
-  // First try to handle special procedure invocations for (re)loading modules.
+// Return true if we handled one of the special `mg` module procedures for
+// reloading query modules.
+// @throw QueryRuntimeException in case of error during procedure invocation.
+bool HandleReloadProcedures(
+    const std::string_view &fully_qualified_procedure_name,
+    const std::vector<Expression *> &args, ExpressionEvaluator *evaluator) {
  // It would be great to simply register `reload_all_modules` as a
  // regular procedure on a `mg` module, so we don't have a special case here.
  // Unfortunately, reloading requires taking a write lock, and we would
  // acquire a read lock by getting the module.
  if (fully_qualified_procedure_name == "mg.reload_all_modules") {
+    if (!args.empty())
+      throw QueryRuntimeException(
+          "'mg.reload_all_modules' requires no arguments.");
    procedure::gModuleRegistry.ReloadAllModules();
-    return;
+    return true;
+  } else if (fully_qualified_procedure_name == "mg.reload") {
+    // This is a special case for the same reasons as `mg.reload_all_modules`.
+    if (args.size() != 1U)
+      throw QueryRuntimeException("'mg.reload' requires exactly 1 argument.");
+    const auto &arg = args.front()->Accept(*evaluator);
+    if (!arg.IsString()) {
+      throw QueryRuntimeException(
+          "'mg.reload' argument named 'module_name' at position 0 must be of "
+          "type STRING.");
    }
+    const auto &module_name = arg.ValueString();
+    procedure::gModuleRegistry.ReloadModuleNamed(module_name);
+    return true;
+  }
+  return false;
+}
+
+// Return the ModulePtr and `mgp_proc *` of the found procedure after resolving
+// `fully_qualified_procedure_name`. `memory` is used for temporary allocations
+// inside this function. ModulePtr must be kept alive to make sure it won't be
+// unloaded.
+// @throw QueryRuntimeException if unable to find the procedure.
+std::pair<procedure::ModulePtr, const mgp_proc *> FindProcedureOrThrow(
+    const std::string_view &fully_qualified_procedure_name,
+    utils::MemoryResource *memory) {
  utils::pmr::vector<std::string_view> name_parts(memory);
  utils::Split(&name_parts, fully_qualified_procedure_name, ".");
  if (name_parts.size() == 1U) {
@ -3764,27 +3787,26 @@ void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name,
  const auto &module_name =
      fully_qualified_procedure_name.substr(0, last_dot_pos);
  const auto &proc_name = name_parts.back();
-  // This is a special case for the same reasons as `mg.reload_all_modules`.
-  if (proc_name == "__reload__") {
-    procedure::gModuleRegistry.ReloadModuleNamed(module_name);
-    return;
-  }
-  const auto &module = procedure::gModuleRegistry.GetModuleNamed(module_name);
+  auto module = procedure::gModuleRegistry.GetModuleNamed(module_name);
  if (!module) throw QueryRuntimeException("'{}' isn't loaded!", module_name);
-  static_assert(std::uses_allocator_v<mgp_value, utils::Allocator<mgp_value>>,
-                "Expected mgp_value to use custom allocator and makes STL "
-                "containers aware of that");
  const auto &proc_it = module->procedures.find(proc_name);
  if (proc_it == module->procedures.end())
    throw QueryRuntimeException("'{}' does not have a procedure named '{}'",
                                module_name, proc_name);
-  const auto &proc = proc_it->second;
+  return {std::move(module), &proc_it->second};
+}
+
+void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name,
+                         const mgp_proc &proc,
+                         const std::vector<Expression *> &args,
+                         const mgp_graph &graph, ExpressionEvaluator *evaluator,
+                         utils::MemoryResource *memory, mgp_result *result) {
+  static_assert(std::uses_allocator_v<mgp_value, utils::Allocator<mgp_value>>,
+                "Expected mgp_value to use custom allocator and makes STL "
+                "containers aware of that");
  // Build and type check procedure arguments.
-  mgp_graph graph{ctx.db_accessor, graph_view};
  mgp_list proc_args(memory);
  proc_args.elems.reserve(args.size());
-  ExpressionEvaluator evaluator(frame, ctx.symbol_table, ctx.evaluation_context,
-                                ctx.db_accessor, graph_view);
  if (args.size() < proc.args.size() ||
      // Rely on `||` short circuit so we can avoid potential overflow of
      // proc.args.size() + proc.opt_args.size() by subtracting.
@ -3804,7 +3826,7 @@ void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name,
    }
  }
  for (size_t i = 0; i < args.size(); ++i) {
-    auto arg = args[i]->Accept(evaluator);
+    auto arg = args[i]->Accept(*evaluator);
    std::string_view name;
    const query::procedure::CypherType *type;
    if (proc.args.size() > i) {
@ -3833,6 +3855,7 @@ void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name,
  utils::LimitedMemoryResource limited_mem(memory,
                                           100 * 1024 * 1024 /* 100 MB */);
  mgp_memory proc_memory{&limited_mem};
+  CHECK(result->signature == &proc.results);
  // TODO: What about cross library boundary exceptions? OMG C++?!
  proc.cb(&proc_args, &graph, result, &proc_memory);
  size_t leaked_bytes = limited_mem.GetAllocatedBytes();
@ -3856,7 +3879,7 @@ class CallProcedureCursor : public Cursor {
        // result_ needs to live throughout multiple Pull evaluations, until all
        // rows are produced. Therefore, we use the memory dedicated for the
        // whole execution.
-        result_(mem) {
+        result_(nullptr, mem) {
    CHECK(self_->result_fields_.size() == self_->result_symbols_.size())
        << "Incorrectly constructed CallProcedure";
  }
@ -3866,6 +3889,7 @@ class CallProcedureCursor : public Cursor {

    if (MustAbort(context)) throw HintedAbortError();

+    size_t result_signature_size = 0;
    // We need to fetch new procedure results after pulling from input.
    // TODO: Look into openCypher's distinction between procedures returning an
    // empty result set vs procedures which return `void`. We currently don't
@ -3873,13 +3897,40 @@ class CallProcedureCursor : public Cursor {
    // This `while` loop will skip over empty results.
    while (result_row_it_ == result_.rows.end()) {
      if (!input_cursor_->Pull(frame, context)) return false;
+      result_.signature = nullptr;
      result_.rows.clear();
      result_.error_msg.reset();
      // TODO: When we add support for write and eager procedures, we will need
      // to plan this operator with Accumulate and pass in storage::View::NEW.
      auto graph_view = storage::View::OLD;
-      CallCustomProcedure(self_->procedure_name_, self_->arguments_, graph_view,
-                          context, &frame, &result_);
+      ExpressionEvaluator evaluator(&frame, context.symbol_table,
+                                    context.evaluation_context,
+                                    context.db_accessor, graph_view);
+      // First try to handle special procedures for (re)loading modules.
+      if (HandleReloadProcedures(self_->procedure_name_, self_->arguments_,
+                                 &evaluator))
+        continue;
+      // Nothing special, so find the regular procedure and invoke it.
+      // It might be a good idea to resolve the procedure name once, at the
+      // start. Unfortunately, this could deadlock if we tried to invoke a
+      // procedure from a module (read lock) and reload a module (write lock)
+      // inside the same execution thread.
+      const auto &[module, proc] = FindProcedureOrThrow(
+          self_->procedure_name_, context.evaluation_context.memory);
+      result_.signature = &proc->results;
+      // Use evaluation memory, as invoking a procedure is akin to a simple
+      // evaluation of an expression.
+      // TODO: This will probably need to be changed when we add support for
+      // generator like procedures which yield a new result on each invocation.
+      auto *memory = context.evaluation_context.memory;
+      mgp_graph graph{context.db_accessor, graph_view};
+      CallCustomProcedure(self_->procedure_name_, *proc, self_->arguments_,
+                          graph, &evaluator, memory, &result_);
+      // Reset result_.signature to nullptr, because outside of this scope we
+      // will no longer hold a lock on the `module`. If someone were to reload
+      // it, the pointer would be invalid.
+      result_signature_size = result_.signature->size();
+      result_.signature = nullptr;
      if (result_.error_msg) {
        throw QueryRuntimeException("{}: {}", self_->procedure_name_,
                                    *result_.error_msg);
@ -3887,8 +3938,17 @@ class CallProcedureCursor : public Cursor {
      result_row_it_ = result_.rows.begin();
    }

-    for (size_t i = 0; i < self_->result_fields_.size(); ++i) {
    const auto &values = result_row_it_->values;
+    // Check that the row has all fields as required by the result signature.
+    // C API guarantees that it's impossible to set fields which are not part of
+    // the result record, but it does not gurantee that some may be missing. See
+    // `mgp_result_record_insert`.
+    if (values.size() != result_signature_size) {
+      throw QueryRuntimeException(
+          "Procedure '{}' did not yield all fields as required by its "
+          "signature.", self_->procedure_name_);
+    }
+    for (size_t i = 0; i < self_->result_fields_.size(); ++i) {
      std::string_view field_name(self_->result_fields_[i]);
      auto result_it = values.find(field_name);
      if (result_it == values.end()) {
--- a/src/query/procedure/mg_procedure_impl.cpp
+++ b/src/query/procedure/mg_procedure_impl.cpp
@ -792,8 +792,10 @@ int mgp_result_set_error_msg(mgp_result *res, const char *msg) {

 mgp_result_record *mgp_result_new_record(mgp_result *res) {
  auto *memory = res->rows.get_allocator().GetMemoryResource();
+  CHECK(res->signature) << "Expected to have a valid signature";
  try {
    res->rows.push_back(mgp_result_record{
+        res->signature,
        utils::pmr::map<utils::pmr::string, query::TypedValue>(memory)});
  } catch (...) {
    return nullptr;
@ -804,8 +806,12 @@ mgp_result_record *mgp_result_new_record(mgp_result *res) {
 int mgp_result_record_insert(mgp_result_record *record, const char *field_name,
                             const mgp_value *val) {
  auto *memory = record->values.get_allocator().GetMemoryResource();
-  // TODO: Result validation when we add registering procedures with result
-  // signature description.
+  // Validate field_name & val satisfy the procedure's result signature.
+  CHECK(record->signature) << "Expected to have a valid signature";
+  auto find_it = record->signature->find(field_name);
+  if (find_it == record->signature->end()) return 0;
+  const auto *type = find_it->second.first;
+  if (!type->SatisfiesType(*val)) return 0;
  try {
    record->values.emplace(field_name, ToTypedValue(*val, memory));
  } catch (...) {
--- a/src/query/procedure/mg_procedure_impl.hpp
+++ b/src/query/procedure/mg_procedure_impl.hpp
@ -327,12 +327,25 @@ struct mgp_path {
 };

 struct mgp_result_record {
+  /// Result record signature as defined for mgp_proc.
+  const utils::pmr::map<utils::pmr::string,
+                        std::pair<const query::procedure::CypherType *, bool>>
+      *signature;
  utils::pmr::map<utils::pmr::string, query::TypedValue> values;
 };

 struct mgp_result {
-  explicit mgp_result(utils::MemoryResource *mem) : rows(mem) {}
+  explicit mgp_result(
+      const utils::pmr::map<
+          utils::pmr::string,
+          std::pair<const query::procedure::CypherType *, bool>> *signature,
+      utils::MemoryResource *mem)
+      : signature(signature), rows(mem) {}

+  /// Result record signature as defined for mgp_proc.
+  const utils::pmr::map<utils::pmr::string,
+                        std::pair<const query::procedure::CypherType *, bool>>
+      *signature;
  utils::pmr::vector<mgp_result_record> rows;
  std::optional<utils::pmr::string> error_msg;
 };