From 6666eea8972c0ec0914db1d4ff15a4b979bdfc38 Mon Sep 17 00:00:00 2001 From: Jure Bajic <jure.bajic@memgraph.com> Date: Thu, 23 Jun 2022 14:04:44 +0200 Subject: [PATCH] Add initial schema implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add initial schema implementation * Add index to schema * List schemas and enable multiple properties * Implement SchemaTypes * Apply suggestions from code review Co-authored-by: Jeremy B <97525434+42jeremy@users.noreply.github.com> Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com> * Address review comments * Remove Map and List * Apply suggestions from code review Co-authored-by: Kostas Kyrimis <kostaskyrim@gmail.com> Co-authored-by: Jeremy B <97525434+42jeremy@users.noreply.github.com> Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com> Co-authored-by: Kostas Kyrimis <kostaskyrim@gmail.com> --- src/query/db_accessor.hpp | 2 + src/storage/v2/CMakeLists.txt | 1 + src/storage/v2/schemas.cpp | 74 ++++++++++++++++ src/storage/v2/schemas.hpp | 155 ++++++++++++++++++++++++++++++++++ src/storage/v2/storage.cpp | 11 ++- src/storage/v2/storage.hpp | 21 ++++- 6 files changed, 261 insertions(+), 3 deletions(-) create mode 100644 src/storage/v2/schemas.cpp create mode 100644 src/storage/v2/schemas.hpp diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index 55514c883..1e19d0018 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -356,6 +356,8 @@ class DbAccessor final { storage::IndicesInfo ListAllIndices() const { return accessor_->ListAllIndices(); } storage::ConstraintsInfo ListAllConstraints() const { return accessor_->ListAllConstraints(); } + + storage::SchemasInfo ListAllSchemas() const { return accessor_->ListAllSchemas(); } }; } // namespace memgraph::query diff --git a/src/storage/v2/CMakeLists.txt b/src/storage/v2/CMakeLists.txt index f33a8553d..dab088c93 100644 --- a/src/storage/v2/CMakeLists.txt +++ b/src/storage/v2/CMakeLists.txt @@ -10,6 +10,7 @@ set(storage_v2_src_files indices.cpp property_store.cpp vertex_accessor.cpp + schemas.cpp storage.cpp) ##### Replication ##### diff --git a/src/storage/v2/schemas.cpp b/src/storage/v2/schemas.cpp new file mode 100644 index 000000000..2e4fbbe3b --- /dev/null +++ b/src/storage/v2/schemas.cpp @@ -0,0 +1,74 @@ +// Copyright 2022 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <unordered_map> +#include <utility> +#include <vector> + +#include "storage/v2/property_value.hpp" +#include "storage/v2/schemas.hpp" + +namespace memgraph::storage { + +SchemaViolation::SchemaViolation(ValidationStatus status, LabelId label) : status{status}, label{label} {} +SchemaViolation::SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type) + : status{status}, label{label}, violated_type{violated_type} {} + +SchemaViolation::SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type, + PropertyValue violated_property_value) + : status{status}, label{label}, violated_type{violated_type}, violated_property_value{violated_property_value} {} + +bool Schemas::CreateSchema(const LabelId primary_label, const std::vector<SchemaProperty> &schemas_types) { + return schemas_.insert({primary_label, schemas_types}).second; +} + +bool Schemas::DeleteSchema(const LabelId primary_label) { + return schemas_.erase(primary_label); +} + +std::optional<SchemaViolation> Schemas::ValidateVertex(const LabelId primary_label, const Vertex &vertex) { + // TODO Check for multiple defined primary labels + const auto schema = schemas_.find(primary_label); + if (schema == schemas_.end()) { + return SchemaViolation(SchemaViolation::ValidationStatus::NO_SCHEMA_DEFINED_FOR_LABEL, primary_label); + } + if (!utils::Contains(vertex.labels, primary_label)) { + return SchemaViolation(SchemaViolation::ValidationStatus::VERTEX_HAS_NO_PRIMARY_LABEL, primary_label); + } + + for (const auto &schema_type : schema->second) { + if (!vertex.properties.HasProperty(schema_type.property_id)) { + return SchemaViolation(SchemaViolation::ValidationStatus::VERTEX_HAS_NO_PROPERTY, primary_label, schema_type); + } + // Property type check + // TODO Can this be replaced with just property id check? + if (auto vertex_property = vertex.properties.GetProperty(schema_type.property_id); + PropertyValueTypeToSchemaProperty(vertex_property) != schema_type.type) { + return SchemaViolation(SchemaViolation::ValidationStatus::VERTEX_PROPERTY_WRONG_TYPE, primary_label, schema_type, + vertex_property); + } + } + // TODO after the introduction of vertex hashing introduce check for vertex + // primary key uniqueness + + return std::nullopt; +} + +Schemas::SchemasList Schemas::ListSchemas() const { + Schemas::SchemasList ret; + ret.reserve(schemas_.size()); + for (const auto &[label_props, schema_property] : schemas_) { + ret.emplace_back(label_props, schema_property); + } + return ret; +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/schemas.hpp b/src/storage/v2/schemas.hpp new file mode 100644 index 000000000..113707069 --- /dev/null +++ b/src/storage/v2/schemas.hpp @@ -0,0 +1,155 @@ +// Copyright 2022 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include <memory> +#include <optional> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "storage/v2/id_types.hpp" +#include "storage/v2/indices.hpp" +#include "storage/v2/property_value.hpp" +#include "storage/v2/temporal.hpp" +#include "storage/v2/transaction.hpp" +#include "storage/v2/vertex.hpp" +#include "utils/result.hpp" + +namespace memgraph::storage { + +class SchemaViolationException : public utils::BasicException { + using utils::BasicException::BasicException; +}; + +struct SchemaProperty { + enum class Type : uint8_t { Bool, Int, Double, String, Date, LocalTime, LocalDateTime, Duration }; + + Type type; + PropertyId property_id; +}; + +struct SchemaViolation { + enum class ValidationStatus : uint8_t { + VERTEX_HAS_NO_PRIMARY_LABEL, + VERTEX_HAS_NO_PROPERTY, + NO_SCHEMA_DEFINED_FOR_LABEL, + VERTEX_PROPERTY_WRONG_TYPE + }; + + SchemaViolation(ValidationStatus status, LabelId label); + + SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type); + + SchemaViolation(ValidationStatus status, LabelId label, SchemaProperty violated_type, + PropertyValue violated_property_value); + + ValidationStatus status; + LabelId label; + std::optional<SchemaProperty> violated_type; + std::optional<PropertyValue> violated_property_value; +}; + +/// Structure that represents a collection of schemas +/// Schema can be mapped under only one label => primary label +class Schemas { + public: + using SchemasMap = std::unordered_map<LabelId, std::vector<SchemaProperty>>; + using SchemasList = std::vector<std::pair<LabelId, std::vector<SchemaProperty>>>; + + Schemas() = default; + Schemas(const Schemas &) = delete; + Schemas(Schemas &&) = delete; + Schemas &operator=(const Schemas &) = delete; + Schemas &operator=(Schemas &&) = delete; + ~Schemas() = default; + + [[nodiscard]] bool CreateSchema(LabelId label, const std::vector<SchemaProperty> &schemas_types); + + [[nodiscard]] bool DeleteSchema(LabelId label); + + [[nodiscard]] std::optional<SchemaViolation> ValidateVertex(LabelId primary_label, const Vertex &vertex); + + [[nodiscard]] SchemasList ListSchemas() const; + + private: + SchemasMap schemas_; +}; + +inline std::optional<SchemaProperty::Type> PropertyValueTypeToSchemaProperty(const PropertyValue &property_value) { + switch (property_value.type()) { + case PropertyValue::Type::Bool: { + return SchemaProperty::Type::Bool; + } + case PropertyValue::Type::Int: { + return SchemaProperty::Type::Int; + } + case PropertyValue::Type::Double: { + return SchemaProperty::Type::Double; + } + case PropertyValue::Type::String: { + return SchemaProperty::Type::String; + } + case PropertyValue::Type::TemporalData: { + switch (property_value.ValueTemporalData().type) { + case TemporalType::Date: { + return SchemaProperty::Type::Date; + } + case TemporalType::LocalDateTime: { + return SchemaProperty::Type::LocalDateTime; + } + case TemporalType::LocalTime: { + return SchemaProperty::Type::LocalTime; + } + case TemporalType::Duration: { + return SchemaProperty::Type::Duration; + } + } + } + case PropertyValue::Type::Null: + case PropertyValue::Type::Map: + case PropertyValue::Type::List: { + return std::nullopt; + } + } +} + +inline std::string SchemaPropertyToString(const SchemaProperty::Type type) { + switch (type) { + case SchemaProperty::Type::Bool: { + return "Bool"; + } + case SchemaProperty::Type::Int: { + return "Integer"; + } + case SchemaProperty::Type::Double: { + return "Double"; + } + case SchemaProperty::Type::String: { + return "String"; + } + case SchemaProperty::Type::Date: { + return "Date"; + } + case SchemaProperty::Type::LocalTime: { + return "LocalTime"; + } + case SchemaProperty::Type::LocalDateTime: { + return "LocalDateTime"; + } + case SchemaProperty::Type::Duration: { + return "Duration"; + } + } +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index c0842dbee..4b6e46709 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -28,6 +28,7 @@ #include "storage/v2/indices.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/replication/config.hpp" +#include "storage/v2/schemas.hpp" #include "storage/v2/transaction.hpp" #include "storage/v2/vertex_accessor.hpp" #include "utils/file.hpp" @@ -456,12 +457,13 @@ VertexAccessor Storage::Accessor::CreateVertex() { OOMExceptionEnabler oom_exception; auto gid = storage_->vertex_id_.fetch_add(1, std::memory_order_acq_rel); auto acc = storage_->vertices_.access(); - auto delta = CreateDeleteObjectDelta(&transaction_); + auto *delta = CreateDeleteObjectDelta(&transaction_); auto [it, inserted] = acc.insert(Vertex{storage::Gid::FromUint(gid), delta}); MG_ASSERT(inserted, "The vertex must be inserted here!"); MG_ASSERT(it != acc.end(), "Invalid Vertex accessor!"); + delta->prev.Set(&*it); - return VertexAccessor(&*it, &transaction_, &storage_->indices_, &storage_->constraints_, config_); + return {&*it, &transaction_, &storage_->indices_, &storage_->constraints_, config_}; } VertexAccessor Storage::Accessor::CreateVertex(storage::Gid gid) { @@ -1227,6 +1229,11 @@ ConstraintsInfo Storage::ListAllConstraints() const { return {ListExistenceConstraints(constraints_), constraints_.unique_constraints.ListConstraints()}; } +SchemasInfo Storage::ListAllSchemas() const { + std::shared_lock<utils::RWLock> storage_guard_(main_lock_); + return {schemas_.ListSchemas()}; +} + StorageInfo Storage::GetInfo() const { auto vertex_count = vertices_.size(); auto edge_count = edge_count_.load(std::memory_order_acquire); diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 7b20dc20b..d935ca5f3 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -16,6 +16,7 @@ #include <optional> #include <shared_mutex> #include <variant> +#include <vector> #include "io/network/endpoint.hpp" #include "storage/v2/commit_log.hpp" @@ -25,14 +26,18 @@ #include "storage/v2/durability/wal.hpp" #include "storage/v2/edge.hpp" #include "storage/v2/edge_accessor.hpp" +#include "storage/v2/id_types.hpp" #include "storage/v2/indices.hpp" #include "storage/v2/isolation_level.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/name_id_mapper.hpp" +#include "storage/v2/property_value.hpp" #include "storage/v2/result.hpp" +#include "storage/v2/schemas.hpp" #include "storage/v2/transaction.hpp" #include "storage/v2/vertex.hpp" #include "storage/v2/vertex_accessor.hpp" +#include "utils/exceptions.hpp" #include "utils/file_locker.hpp" #include "utils/on_scope_exit.hpp" #include "utils/rw_lock.hpp" @@ -173,6 +178,11 @@ struct ConstraintsInfo { std::vector<std::pair<LabelId, std::set<PropertyId>>> unique; }; +/// Structure used to return information about existing schemas in the storage +struct SchemasInfo { + Schemas::SchemasList schemas; +}; + /// Structure used to return information about the storage. struct StorageInfo { uint64_t vertex_count; @@ -306,6 +316,8 @@ class Storage final { storage_->constraints_.unique_constraints.ListConstraints()}; } + SchemasInfo ListAllSchemas() const { return {storage_->schemas_.ListSchemas()}; } + void AdvanceCommand(); /// Commit returns `ConstraintViolation` if the changes made by this @@ -364,7 +376,7 @@ class Storage final { IndicesInfo ListAllIndices() const; /// Creates an existence constraint. Returns true if the constraint was - /// successfuly added, false if it already exists and a `ConstraintViolation` + /// successfully added, false if it already exists and a `ConstraintViolation` /// if there is an existing vertex violating the constraint. /// /// @throw std::bad_alloc @@ -402,6 +414,12 @@ class Storage final { ConstraintsInfo ListAllConstraints() const; + bool CreateSchema(LabelId primary_label, std::vector<SchemaProperty> &schemas_types); + + bool DeleteSchema(LabelId primary_label); + + SchemasInfo ListAllSchemas() const; + StorageInfo GetInfo() const; bool LockPath(); @@ -491,6 +509,7 @@ class Storage final { Constraints constraints_; Indices indices_; + Schemas schemas_; // Transaction engine utils::SpinLock engine_lock_;