From 6666eea8972c0ec0914db1d4ff15a4b979bdfc38 Mon Sep 17 00:00:00 2001
From: Jure Bajic <jure.bajic@memgraph.com>
Date: Thu, 23 Jun 2022 14:04:44 +0200
Subject: [PATCH] Add initial schema implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add initial schema implementation

* Add index to schema

* List schemas and enable multiple properties

* Implement SchemaTypes

* Apply suggestions from code review

Co-authored-by: Jeremy B <97525434+42jeremy@users.noreply.github.com>
Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com>

* Address review comments

* Remove Map and List

* Apply suggestions from code review

Co-authored-by: Kostas Kyrimis  <kostaskyrim@gmail.com>

Co-authored-by: Jeremy B <97525434+42jeremy@users.noreply.github.com>
Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com>
Co-authored-by: Kostas Kyrimis  <kostaskyrim@gmail.com>
---
 src/query/db_accessor.hpp     |   2 +
 src/storage/v2/CMakeLists.txt |   1 +
 src/storage/v2/schemas.cpp    |  74 ++++++++++++++++
 src/storage/v2/schemas.hpp    | 155 ++++++++++++++++++++++++++++++++++
 src/storage/v2/storage.cpp    |  11 ++-
 src/storage/v2/storage.hpp    |  21 ++++-
 6 files changed, 261 insertions(+), 3 deletions(-)
 create mode 100644 src/storage/v2/schemas.cpp
 create mode 100644 src/storage/v2/schemas.hpp

diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp
index 55514c883..1e19d0018 100644
--- a/src/query/db_accessor.hpp
+++ b/src/query/db_accessor.hpp
@@ -356,6 +356,8 @@ class DbAccessor final {
   storage::IndicesInfo ListAllIndices() const { return accessor_->ListAllIndices(); }
 
   storage::ConstraintsInfo ListAllConstraints() const { return accessor_->ListAllConstraints(); }
+
+  storage::SchemasInfo ListAllSchemas() const { return accessor_->ListAllSchemas(); }
 };
 
 }  // namespace memgraph::query
diff --git a/src/storage/v2/CMakeLists.txt b/src/storage/v2/CMakeLists.txt
index f33a8553d..dab088c93 100644
--- a/src/storage/v2/CMakeLists.txt
+++ b/src/storage/v2/CMakeLists.txt
@@ -10,6 +10,7 @@ set(storage_v2_src_files
     indices.cpp
     property_store.cpp
     vertex_accessor.cpp
+    schemas.cpp
     storage.cpp)
 
 ##### Replication #####
diff --git a/src/storage/v2/schemas.cpp b/src/storage/v2/schemas.cpp
new file mode 100644
index 000000000..2e4fbbe3b
--- /dev/null
+++ b/src/storage/v2/schemas.cpp
@@ -0,0 +1,74 @@
+// Copyright 2022 Memgraph Ltd.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+// License, and you may not use this file except in compliance with the Business Source License.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "storage/v2/property_value.hpp"
+#include "storage/v2/schemas.hpp"
+
+namespace memgraph::storage {
+
+SchemaViolation::SchemaViolation(ValidationStatus status, LabelId label) : status{status}, label{label} {}
+SchemaViolation::SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type)
+    : status{status}, label{label}, violated_type{violated_type} {}
+
+SchemaViolation::SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type,
+                                 PropertyValue violated_property_value)
+    : status{status}, label{label}, violated_type{violated_type}, violated_property_value{violated_property_value} {}
+
+bool Schemas::CreateSchema(const LabelId primary_label, const std::vector<SchemaProperty> &schemas_types) {
+  return schemas_.insert({primary_label, schemas_types}).second;
+}
+
+bool Schemas::DeleteSchema(const LabelId primary_label) {
+  return schemas_.erase(primary_label);
+}
+
+std::optional<SchemaViolation> Schemas::ValidateVertex(const LabelId primary_label, const Vertex &vertex) {
+  // TODO Check for multiple defined primary labels
+  const auto schema = schemas_.find(primary_label);
+  if (schema == schemas_.end()) {
+    return SchemaViolation(SchemaViolation::ValidationStatus::NO_SCHEMA_DEFINED_FOR_LABEL, primary_label);
+  }
+  if (!utils::Contains(vertex.labels, primary_label)) {
+    return SchemaViolation(SchemaViolation::ValidationStatus::VERTEX_HAS_NO_PRIMARY_LABEL, primary_label);
+  }
+
+  for (const auto &schema_type : schema->second) {
+    if (!vertex.properties.HasProperty(schema_type.property_id)) {
+      return SchemaViolation(SchemaViolation::ValidationStatus::VERTEX_HAS_NO_PROPERTY, primary_label, schema_type);
+    }
+    // Property type check
+    //  TODO Can this be replaced with just property id check?
+    if (auto vertex_property = vertex.properties.GetProperty(schema_type.property_id);
+        PropertyValueTypeToSchemaProperty(vertex_property) != schema_type.type) {
+      return SchemaViolation(SchemaViolation::ValidationStatus::VERTEX_PROPERTY_WRONG_TYPE, primary_label, schema_type,
+                             vertex_property);
+    }
+  }
+  // TODO after the introduction of vertex hashing introduce check for vertex
+  // primary key uniqueness
+
+  return std::nullopt;
+}
+
+Schemas::SchemasList Schemas::ListSchemas() const {
+  Schemas::SchemasList ret;
+  ret.reserve(schemas_.size());
+  for (const auto &[label_props, schema_property] : schemas_) {
+    ret.emplace_back(label_props, schema_property);
+  }
+  return ret;
+}
+
+}  // namespace memgraph::storage
diff --git a/src/storage/v2/schemas.hpp b/src/storage/v2/schemas.hpp
new file mode 100644
index 000000000..113707069
--- /dev/null
+++ b/src/storage/v2/schemas.hpp
@@ -0,0 +1,155 @@
+// Copyright 2022 Memgraph Ltd.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+// License, and you may not use this file except in compliance with the Business Source License.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "storage/v2/id_types.hpp"
+#include "storage/v2/indices.hpp"
+#include "storage/v2/property_value.hpp"
+#include "storage/v2/temporal.hpp"
+#include "storage/v2/transaction.hpp"
+#include "storage/v2/vertex.hpp"
+#include "utils/result.hpp"
+
+namespace memgraph::storage {
+
+class SchemaViolationException : public utils::BasicException {
+  using utils::BasicException::BasicException;
+};
+
+struct SchemaProperty {
+  enum class Type : uint8_t { Bool, Int, Double, String, Date, LocalTime, LocalDateTime, Duration };
+
+  Type type;
+  PropertyId property_id;
+};
+
+struct SchemaViolation {
+  enum class ValidationStatus : uint8_t {
+    VERTEX_HAS_NO_PRIMARY_LABEL,
+    VERTEX_HAS_NO_PROPERTY,
+    NO_SCHEMA_DEFINED_FOR_LABEL,
+    VERTEX_PROPERTY_WRONG_TYPE
+  };
+
+  SchemaViolation(ValidationStatus status, LabelId label);
+
+  SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type);
+
+  SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type,
+                  PropertyValue violated_property_value);
+
+  ValidationStatus status;
+  LabelId label;
+  std::optional<SchemaProperty> violated_type;
+  std::optional<PropertyValue> violated_property_value;
+};
+
+/// Structure that represents a collection of schemas
+/// Schema can be mapped under only one label => primary label
+class Schemas {
+ public:
+  using SchemasMap = std::unordered_map<LabelId, std::vector<SchemaProperty>>;
+  using SchemasList = std::vector<std::pair<LabelId, std::vector<SchemaProperty>>>;
+
+  Schemas() = default;
+  Schemas(const Schemas &) = delete;
+  Schemas(Schemas &&) = delete;
+  Schemas &operator=(const Schemas &) = delete;
+  Schemas &operator=(Schemas &&) = delete;
+  ~Schemas() = default;
+
+  [[nodiscard]] bool CreateSchema(LabelId label, const std::vector<SchemaProperty> &schemas_types);
+
+  [[nodiscard]] bool DeleteSchema(LabelId label);
+
+  [[nodiscard]] std::optional<SchemaViolation> ValidateVertex(LabelId primary_label, const Vertex &vertex);
+
+  [[nodiscard]] SchemasList ListSchemas() const;
+
+ private:
+  SchemasMap schemas_;
+};
+
+inline std::optional<SchemaProperty::Type> PropertyValueTypeToSchemaProperty(const PropertyValue &property_value) {
+  switch (property_value.type()) {
+    case PropertyValue::Type::Bool: {
+      return SchemaProperty::Type::Bool;
+    }
+    case PropertyValue::Type::Int: {
+      return SchemaProperty::Type::Int;
+    }
+    case PropertyValue::Type::Double: {
+      return SchemaProperty::Type::Double;
+    }
+    case PropertyValue::Type::String: {
+      return SchemaProperty::Type::String;
+    }
+    case PropertyValue::Type::TemporalData: {
+      switch (property_value.ValueTemporalData().type) {
+        case TemporalType::Date: {
+          return SchemaProperty::Type::Date;
+        }
+        case TemporalType::LocalDateTime: {
+          return SchemaProperty::Type::LocalDateTime;
+        }
+        case TemporalType::LocalTime: {
+          return SchemaProperty::Type::LocalTime;
+        }
+        case TemporalType::Duration: {
+          return SchemaProperty::Type::Duration;
+        }
+      }
+    }
+    case PropertyValue::Type::Null:
+    case PropertyValue::Type::Map:
+    case PropertyValue::Type::List: {
+      return std::nullopt;
+    }
+  }
+}
+
+inline std::string SchemaPropertyToString(const SchemaProperty::Type type) {
+  switch (type) {
+    case SchemaProperty::Type::Bool: {
+      return "Bool";
+    }
+    case SchemaProperty::Type::Int: {
+      return "Integer";
+    }
+    case SchemaProperty::Type::Double: {
+      return "Double";
+    }
+    case SchemaProperty::Type::String: {
+      return "String";
+    }
+    case SchemaProperty::Type::Date: {
+      return "Date";
+    }
+    case SchemaProperty::Type::LocalTime: {
+      return "LocalTime";
+    }
+    case SchemaProperty::Type::LocalDateTime: {
+      return "LocalDateTime";
+    }
+    case SchemaProperty::Type::Duration: {
+      return "Duration";
+    }
+  }
+}
+
+}  // namespace memgraph::storage
diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp
index c0842dbee..4b6e46709 100644
--- a/src/storage/v2/storage.cpp
+++ b/src/storage/v2/storage.cpp
@@ -28,6 +28,7 @@
 #include "storage/v2/indices.hpp"
 #include "storage/v2/mvcc.hpp"
 #include "storage/v2/replication/config.hpp"
+#include "storage/v2/schemas.hpp"
 #include "storage/v2/transaction.hpp"
 #include "storage/v2/vertex_accessor.hpp"
 #include "utils/file.hpp"
@@ -456,12 +457,13 @@ VertexAccessor Storage::Accessor::CreateVertex() {
   OOMExceptionEnabler oom_exception;
   auto gid = storage_->vertex_id_.fetch_add(1, std::memory_order_acq_rel);
   auto acc = storage_->vertices_.access();
-  auto delta = CreateDeleteObjectDelta(&transaction_);
+  auto *delta = CreateDeleteObjectDelta(&transaction_);
   auto [it, inserted] = acc.insert(Vertex{storage::Gid::FromUint(gid), delta});
   MG_ASSERT(inserted, "The vertex must be inserted here!");
   MG_ASSERT(it != acc.end(), "Invalid Vertex accessor!");
+
   delta->prev.Set(&*it);
-  return VertexAccessor(&*it, &transaction_, &storage_->indices_, &storage_->constraints_, config_);
+  return {&*it, &transaction_, &storage_->indices_, &storage_->constraints_, config_};
 }
 
 VertexAccessor Storage::Accessor::CreateVertex(storage::Gid gid) {
@@ -1227,6 +1229,11 @@ ConstraintsInfo Storage::ListAllConstraints() const {
   return {ListExistenceConstraints(constraints_), constraints_.unique_constraints.ListConstraints()};
 }
 
+SchemasInfo Storage::ListAllSchemas() const {
+  std::shared_lock<utils::RWLock> storage_guard_(main_lock_);
+  return {schemas_.ListSchemas()};
+}
+
 StorageInfo Storage::GetInfo() const {
   auto vertex_count = vertices_.size();
   auto edge_count = edge_count_.load(std::memory_order_acquire);
diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp
index 7b20dc20b..d935ca5f3 100644
--- a/src/storage/v2/storage.hpp
+++ b/src/storage/v2/storage.hpp
@@ -16,6 +16,7 @@
 #include <optional>
 #include <shared_mutex>
 #include <variant>
+#include <vector>
 
 #include "io/network/endpoint.hpp"
 #include "storage/v2/commit_log.hpp"
@@ -25,14 +26,18 @@
 #include "storage/v2/durability/wal.hpp"
 #include "storage/v2/edge.hpp"
 #include "storage/v2/edge_accessor.hpp"
+#include "storage/v2/id_types.hpp"
 #include "storage/v2/indices.hpp"
 #include "storage/v2/isolation_level.hpp"
 #include "storage/v2/mvcc.hpp"
 #include "storage/v2/name_id_mapper.hpp"
+#include "storage/v2/property_value.hpp"
 #include "storage/v2/result.hpp"
+#include "storage/v2/schemas.hpp"
 #include "storage/v2/transaction.hpp"
 #include "storage/v2/vertex.hpp"
 #include "storage/v2/vertex_accessor.hpp"
+#include "utils/exceptions.hpp"
 #include "utils/file_locker.hpp"
 #include "utils/on_scope_exit.hpp"
 #include "utils/rw_lock.hpp"
@@ -173,6 +178,11 @@ struct ConstraintsInfo {
   std::vector<std::pair<LabelId, std::set<PropertyId>>> unique;
 };
 
+/// Structure used to return information about existing schemas in the storage
+struct SchemasInfo {
+  Schemas::SchemasList schemas;
+};
+
 /// Structure used to return information about the storage.
 struct StorageInfo {
   uint64_t vertex_count;
@@ -306,6 +316,8 @@ class Storage final {
               storage_->constraints_.unique_constraints.ListConstraints()};
     }
 
+    SchemasInfo ListAllSchemas() const { return {storage_->schemas_.ListSchemas()}; }
+
     void AdvanceCommand();
 
     /// Commit returns `ConstraintViolation` if the changes made by this
@@ -364,7 +376,7 @@ class Storage final {
   IndicesInfo ListAllIndices() const;
 
   /// Creates an existence constraint. Returns true if the constraint was
-  /// successfuly added, false if it already exists and a `ConstraintViolation`
+  /// successfully added, false if it already exists and a `ConstraintViolation`
   /// if there is an existing vertex violating the constraint.
   ///
   /// @throw std::bad_alloc
@@ -402,6 +414,12 @@ class Storage final {
 
   ConstraintsInfo ListAllConstraints() const;
 
+  bool CreateSchema(LabelId primary_label, std::vector<SchemaProperty> &schemas_types);
+
+  bool DeleteSchema(LabelId primary_label);
+
+  SchemasInfo ListAllSchemas() const;
+
   StorageInfo GetInfo() const;
 
   bool LockPath();
@@ -491,6 +509,7 @@ class Storage final {
 
   Constraints constraints_;
   Indices indices_;
+  Schemas schemas_;
 
   // Transaction engine
   utils::SpinLock engine_lock_;