Add initial version of properties on disk

Summary:
A simplified end-to-end implementation

POD interface set-up, still have bugs with HDDkeys

Version bug fix and first iterator implementation

Fixed out-of-scope reference in PVS iterator

Added PVS unit tests

Reviewers: buda, mferencevic, dgleich, teon.banek

Reviewed By: buda, dgleich

Subscribers: mferencevic, pullbot

Differential Revision: https://phabricator.memgraph.io/D1369
This commit is contained in:
Ivan Paljak 2018-05-10 16:33:41 +02:00
parent 797bd9e435
commit 909e42d414
21 changed files with 621 additions and 185 deletions

View File

@ -59,6 +59,7 @@ set(memgraph_src_files
storage/edge_accessor.cpp
storage/locking/record_lock.cpp
storage/property_value.cpp
storage/property_value_store.cpp
storage/record_accessor.cpp
storage/vertex_accessor.cpp
threading/sync/rwlock.cpp

View File

@ -3,6 +3,7 @@
#include "database/graph_db.hpp"
#include "storage/gid.hpp"
#include "utils/flag_validation.hpp"
#include "utils/string.hpp"
// Durability flags.
DEFINE_bool(durability_enabled, false,
@ -26,6 +27,10 @@ DEFINE_int32(query_execution_time_sec, 180,
DEFINE_int32(gc_cycle_sec, 30,
"Amount of time between starts of two cleaning cycles in seconds. "
"-1 to turn off.");
// Data location.
DEFINE_string(properties_on_disk, "",
"Property names of properties which will be stored on available "
"disk. Property names have to be separated with comma (,).");
#ifndef MG_COMMUNITY
// Distributed master/worker flags.
@ -71,7 +76,9 @@ database::Config::Config()
snapshot_on_exit{FLAGS_snapshot_on_exit},
// Misc flags.
gc_cycle_sec{FLAGS_gc_cycle_sec},
query_execution_time_sec{FLAGS_query_execution_time_sec}
query_execution_time_sec{FLAGS_query_execution_time_sec},
// Data location.
properties_on_disk(utils::Split(FLAGS_properties_on_disk, ","))
#ifndef MG_COMMUNITY
,
// Distributed flags.

View File

@ -67,7 +67,8 @@ class PrivateBase : public GraphDb {
}
void ReinitializeStorage() override {
storage_ = std::make_unique<Storage>(WorkerId());
storage_ =
std::make_unique<Storage>(WorkerId(), config_.properties_on_disk);
}
distributed::PullRpcClients &pull_clients() override {
@ -88,7 +89,7 @@ class PrivateBase : public GraphDb {
protected:
std::unique_ptr<Storage> storage_ =
std::make_unique<Storage>(config_.worker_id);
std::make_unique<Storage>(config_.worker_id, config_.properties_on_disk);
durability::WriteAheadLog wal_{config_.worker_id,
config_.durability_directory,
config_.durability_enabled};
@ -129,7 +130,8 @@ class SingleNode : public PrivateBase {
std::unique_ptr<StorageGcSingleNode> storage_gc_ =
std::make_unique<StorageGcSingleNode>(*storage_, tx_engine_,
config_.gc_cycle_sec);
TypemapPack<SingleNodeConcurrentIdMapper> typemap_pack_;
TypemapPack<SingleNodeConcurrentIdMapper> typemap_pack_{
storage_->PropertiesOnDisk()};
database::SingleNodeCounters counters_;
std::vector<int> GetWorkerIds() const override { return {0}; }
distributed::DataRpcServer &data_server() override {

View File

@ -32,6 +32,7 @@ namespace database {
/// Database configuration. Initialized from flags, but modifiable.
struct Config {
Config();
// Durability flags.
bool durability_enabled;
std::string durability_directory;
@ -50,6 +51,9 @@ struct Config {
io::network::Endpoint master_endpoint{"0.0.0.0", 0};
io::network::Endpoint worker_endpoint{"0.0.0.0", 0};
int recovering_cluster_size{0};
// set of properties which will be stored on disk
std::vector<std::string> properties_on_disk;
};
/**

View File

@ -180,14 +180,14 @@ void StateDelta::Encode(
encoder.WriteInt(edge_id);
encoder.WriteInt(vertex_from_id);
encoder.WriteInt(vertex_to_id);
encoder.WriteInt(edge_type.storage());
encoder.WriteInt(edge_type.Id());
encoder.WriteString(edge_type_name);
break;
case Type::ADD_OUT_EDGE:
encoder.WriteInt(vertex_id);
encoder.WriteInt(vertex_to_address.raw());
encoder.WriteInt(edge_address.raw());
encoder.WriteInt(edge_type.storage());
encoder.WriteInt(edge_type.Id());
break;
case Type::REMOVE_OUT_EDGE:
encoder.WriteInt(vertex_id);
@ -197,7 +197,7 @@ void StateDelta::Encode(
encoder.WriteInt(vertex_id);
encoder.WriteInt(vertex_from_address.raw());
encoder.WriteInt(edge_address.raw());
encoder.WriteInt(edge_type.storage());
encoder.WriteInt(edge_type.Id());
break;
case Type::REMOVE_IN_EDGE:
encoder.WriteInt(vertex_id);
@ -205,20 +205,20 @@ void StateDelta::Encode(
break;
case Type::SET_PROPERTY_VERTEX:
encoder.WriteInt(vertex_id);
encoder.WriteInt(property.storage());
encoder.WriteInt(property.Id());
encoder.WriteString(property_name);
encoder.WritePropertyValue(value);
break;
case Type::SET_PROPERTY_EDGE:
encoder.WriteInt(edge_id);
encoder.WriteInt(property.storage());
encoder.WriteInt(property.Id());
encoder.WriteString(property_name);
encoder.WritePropertyValue(value);
break;
case Type::ADD_LABEL:
case Type::REMOVE_LABEL:
encoder.WriteInt(vertex_id);
encoder.WriteInt(label.storage());
encoder.WriteInt(label.Id());
encoder.WriteString(label_name);
break;
case Type::REMOVE_VERTEX:
@ -228,9 +228,9 @@ void StateDelta::Encode(
encoder.WriteInt(edge_id);
break;
case Type::BUILD_INDEX:
encoder.WriteInt(label.storage());
encoder.WriteInt(label.Id());
encoder.WriteString(label_name);
encoder.WriteInt(property.storage());
encoder.WriteInt(property.Id());
encoder.WriteString(property_name);
break;
}

View File

@ -10,6 +10,7 @@
#include "mvcc/version_list.hpp"
#include "storage/address.hpp"
#include "storage/edge.hpp"
#include "storage/kvstore_mock.hpp"
#include "storage/types.hpp"
#include "storage/vertex.hpp"
#include "transactions/type.hpp"
@ -34,10 +35,11 @@ namespace database {
/** A data structure containing the main data members of a graph database. */
class Storage {
public:
explicit Storage(int worker_id)
Storage(int worker_id, const std::vector<std::string> &properties_on_disk)
: worker_id_(worker_id),
vertex_generator_{worker_id},
edge_generator_{worker_id} {}
edge_generator_{worker_id},
properties_on_disk_{properties_on_disk} {}
public:
~Storage() {
@ -90,6 +92,9 @@ class Storage {
/// Gets the local edge address for the given gid. Fails if not present.
mvcc::VersionList<Edge> *LocalEdgeAddress(gid::Gid gid) const;
/// Gets names of properties stored on disk
std::vector<std::string> &PropertiesOnDisk() { return properties_on_disk_; }
private:
friend class GraphDbAccessor;
friend class StorageGc;
@ -110,6 +115,8 @@ class Storage {
KeyIndex<storage::Label, Vertex> labels_index_;
LabelPropertyIndex label_property_index_;
std::vector<std::string> properties_on_disk_;
// Set of transactions ids which are building indexes currently
SkipList<tx::TransactionId> index_build_tx_in_progress_;

View File

@ -32,7 +32,7 @@ template <typename TArchive>
void SaveProperties(TArchive &ar, const PropertyValueStore &props) {
ar << props.size();
for (auto &kv : props) {
ar << kv.first.storage();
ar << kv.first.Id();
utils::SaveTypedValue(ar, kv.second);
}
}
@ -54,7 +54,7 @@ void SaveVertex(TArchive &ar, const Vertex &vertex, int worker_id) {
for (auto &edge_struct : edges) {
impl::SaveAddress(ar, edge_struct.vertex, worker_id);
impl::SaveAddress(ar, edge_struct.edge, worker_id);
ar << edge_struct.edge_type.storage();
ar << edge_struct.edge_type.Id();
}
};
save_edges(vertex.out_);
@ -62,7 +62,7 @@ void SaveVertex(TArchive &ar, const Vertex &vertex, int worker_id) {
ar << vertex.labels_.size();
for (auto &label : vertex.labels_) {
ar << label.storage();
ar << label.Id();
}
impl::SaveProperties(ar, vertex.properties_);
@ -81,7 +81,7 @@ template <typename TArchive>
void SaveEdge(TArchive &ar, const Edge &edge, int worker_id) {
impl::SaveAddress(ar, edge.from_, worker_id);
impl::SaveAddress(ar, edge.to_, worker_id);
ar << edge.edge_type_.storage();
ar << edge.edge_type_.Id();
impl::SaveProperties(ar, edge.properties_);
}
@ -113,7 +113,7 @@ void LoadProperties(TArchive &ar, PropertyValueStore &store) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Property::StorageT prop;
storage::Property::IdT prop;
ar >> prop;
query::TypedValue value;
utils::LoadTypedValue(ar, value);
@ -138,7 +138,7 @@ std::unique_ptr<Vertex> LoadVertex(TArchive &ar) {
ar >> count;
for (size_t i = 0; i < count; ++i) {
auto vertex_address = impl::LoadVertexAddress(ar);
storage::EdgeType::StorageT edge_type;
storage::EdgeType::IdT edge_type;
gid::Gid edge_id;
ar >> edge_id;
int edge_worker_id;
@ -154,7 +154,7 @@ std::unique_ptr<Vertex> LoadVertex(TArchive &ar) {
size_t count;
ar >> count;
for (size_t i = 0; i < count; ++i) {
storage::Label::StorageT label;
storage::Label::IdT label;
ar >> label;
vertex->labels_.emplace_back(label);
}
@ -173,7 +173,7 @@ template <typename TArchive>
std::unique_ptr<Edge> LoadEdge(TArchive &ar) {
auto from = impl::LoadVertexAddress(ar);
auto to = impl::LoadVertexAddress(ar);
storage::EdgeType::StorageT edge_type;
storage::EdgeType::IdT edge_type;
ar >> edge_type;
auto edge = std::make_unique<Edge>(from, to, storage::EdgeType{edge_type});
impl::LoadProperties(ar, edge->properties_);

View File

@ -1,28 +1,40 @@
#pragma once
#include <mutex>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "storage/concurrent_id_mapper.hpp"
#include "storage/types.hpp"
#include "utils/algorithm.hpp"
namespace storage {
/** SingleNode implementation of ConcurrentIdMapper. */
template <typename TId>
class SingleNodeConcurrentIdMapper : public ConcurrentIdMapper<TId> {
using StorageT = typename TId::StorageT;
using StorageT = typename TId::IdT;
public:
SingleNodeConcurrentIdMapper() = default;
explicit SingleNodeConcurrentIdMapper(
const std::vector<std::string> &properties_on_disk)
: properties_on_disk_(properties_on_disk) {}
TId value_to_id(const std::string &value) override {
auto value_to_id_acc = value_to_id_.access();
auto found = value_to_id_acc.find(value);
TId inserted_id(0);
if (found == value_to_id_acc.end()) {
StorageT new_id = id_.fetch_add(1);
DCHECK(new_id < std::numeric_limits<StorageT>::max())
<< "Number of used ids overflowed our container";
auto insert_result = value_to_id_acc.insert(value, TId(new_id));
// After we tried to insert value with our id we either got our id, or the
// id created by the thread which succesfully inserted (value, id) pair
inserted_id = insert_result.first->second;
// because that's ConcurrentMap's behaviour
if (std::is_same<TId, Property>::value)
inserted_id =
value_to_id_acc.insert(value, TId(new_id, PropertyLocation(value)))
.first->second;
else
inserted_id = value_to_id_acc.insert(value, TId(new_id)).first->second;
} else {
inserted_id = found->second;
}
@ -45,5 +57,13 @@ class SingleNodeConcurrentIdMapper : public ConcurrentIdMapper<TId> {
ConcurrentMap<std::string, TId> value_to_id_;
ConcurrentMap<TId, std::string> id_to_value_;
std::atomic<StorageT> id_{0};
std::vector<std::string> properties_on_disk_;
std::mutex mutex_;
Location PropertyLocation(const std::string &name) {
std::unique_lock<std::mutex> lock(mutex_);
if (utils::Contains(properties_on_disk_, name)) return Location::Disk;
return Location::Memory;
}
};
} // namespace storage

View File

@ -0,0 +1,40 @@
#pragma once
#include <experimental/optional>
#include <map>
#include <string>
#include <glog/logging.h>
// TODO(ipaljak): replace with the real implementation
namespace storage {
class KVStore {
public:
explicit KVStore(const std::string &) {}
bool Put(const std::string &key, const std::string &value) {
VLOG(31) << "PUT: " << key << " : " << value;
storage_[key] = value;
return true;
}
std::experimental::optional<std::string> Get(const std::string &key) const {
VLOG(31) << "GET: " << key;
auto it = storage_.find(key);
if (it == storage_.end()) return std::experimental::nullopt;
return it->second;
}
bool Delete(const std::string &key) {
VLOG(31) << "DELETE: " << key;
storage_.erase(key);
return true;
}
private:
std::map<std::string, std::string> storage_;
};
} // namespace storage

View File

@ -0,0 +1,119 @@
#include "storage/property_value_store.hpp"
using Property = storage::Property;
using Location = storage::Location;
std::atomic<uint64_t> PropertyValueStore::disk_key_cnt_ = {0};
storage::KVStore PropertyValueStore::disk_storage_("properties");
PropertyValue PropertyValueStore::at(const Property &key) const {
auto GetValue = [&key](const auto &props) {
for (const auto &kv : props)
if (kv.first == key) return kv.second;
return PropertyValue::Null;
};
if (key.Location() == Location::Memory) return GetValue(props_);
return GetValue(PropsOnDisk(std::to_string(disk_key_)));
}
void PropertyValueStore::set(const Property &key, const char *value) {
set(key, std::string(value));
}
void PropertyValueStore::set(const Property &key, const PropertyValue &value) {
if (value.type() == PropertyValue::Type::Null) {
erase(key);
return;
}
auto SetValue = [&key, &value](auto &props) {
for (auto &kv : props)
if (kv.first == key) {
kv.second = value;
return;
}
props.emplace_back(key, value);
};
if (key.Location() == Location::Memory) {
SetValue(props_);
} else {
auto props_on_disk = PropsOnDisk(std::to_string(disk_key_));
SetValue(props_on_disk);
disk_storage_.Put(std::to_string(disk_key_), SerializeProps(props_on_disk));
}
}
size_t PropertyValueStore::erase(const Property &key) {
auto EraseKey = [&key](auto &props) {
auto found = std::find_if(props.begin(), props.end(),
[&key](std::pair<Property, PropertyValue> &kv) {
return kv.first == key;
});
if (found != props.end()) {
props.erase(found);
return true;
}
return false;
};
if (key.Location() == Location::Memory) return EraseKey(props_);
auto props_on_disk = PropsOnDisk(std::to_string(disk_key_));
if (EraseKey(props_on_disk)) {
if (props_on_disk.empty())
return disk_storage_.Delete(std::to_string(disk_key_));
return disk_storage_.Put(std::to_string(disk_key_),
SerializeProps(props_on_disk));
}
return false;
}
void PropertyValueStore::clear() {
props_.clear();
disk_storage_.Delete(std::to_string(disk_key_));
}
/* TODO(ipaljak): replace serialize/deserialize with the real implementation.
* Currently supporting a only one property on disk per record and that property
* must be a string.
* */
std::string PropertyValueStore::SerializeProps(
const std::vector<std::pair<Property, PropertyValue>> &props) const {
if (props.size() > 1) throw std::runtime_error("Unsupported operation");
std::stringstream strstream;
strstream << props[0].first.Id() << "," << props[0].second;
return strstream.str();
}
std::vector<std::pair<Property, PropertyValue>> PropertyValueStore::Deserialize(
const std::string &serialized_props) const {
std::istringstream strstream(serialized_props);
std::string s;
std::getline(strstream, s, ',');
uint16_t id;
std::istringstream ss(s);
ss >> id;
Property key(id, Location::Disk);
std::getline(strstream, s, ',');
PropertyValue value(s);
std::vector<std::pair<Property, PropertyValue>> ret;
ret.emplace_back(key, value);
return ret;
}
std::vector<std::pair<Property, PropertyValue>> PropertyValueStore::PropsOnDisk(
const std::string &disk_key) const {
auto serialized = disk_storage_.Get(disk_key);
if (serialized) return Deserialize(disk_storage_.Get(disk_key).value());
return {};
}

View File

@ -1,22 +1,52 @@
#pragma once
#include <algorithm>
#include <atomic>
#include <experimental/optional>
#include <map>
#include <memory>
#include <vector>
#include "property_value.hpp"
#include "storage/types.hpp"
#include "glog/logging.h"
#include "storage/kvstore_mock.hpp"
#include "storage/property_value.hpp"
#include "storage/types.hpp"
/**
* A collection of properties accessed in a map-like way
* using a key of type Properties::Property.
* A collection of properties accessed in a map-like way using a key of type
* Properties::Property.
*
* The underlying implementation is not necessarily std::map.
* PropertyValueStore handles storage on disk or in memory. Property key defines
* where the corresponding property should be stored. Each instance of
* PropertyValueStore contains a disk_key_ member which specifies where on
* disk should the properties be stored. That key is inferred from a static
* global counter disk_key_cnt_.
*
* The underlying implementation of in-memory storage is not necessarily
* std::map.
*
* TODO(ipaljak) modify on-disk storage so that each property has its own
* key for storage. Storage key should, in essence, be an ordered pair
* (global key, property key).
*/
class PropertyValueStore {
using Property = storage::Property;
using Location = storage::Location;
public:
PropertyValueStore() = default;
PropertyValueStore(const PropertyValueStore &old) {
// We need to update disk key and disk key counter when calling a copy
// constructor due to mvcc.
props_ = old.props_;
disk_key_ = disk_key_cnt_++;
auto old_value = disk_storage_.Get(std::to_string(old.disk_key_));
if (old_value)
disk_storage_.Put(std::to_string(disk_key_), old_value.value());
}
~PropertyValueStore() { disk_storage_.Delete(std::to_string(disk_key_)); }
/**
* Returns a PropertyValue (by reference) at the given key.
* If the key does not exist, the Null property is returned.
@ -27,16 +57,13 @@ class PropertyValueStore {
* @param key The key for which a PropertyValue is sought.
* @return See above.
*/
PropertyValue at(const Property &key) const {
for (const auto &kv : props_)
if (kv.first == key) return kv.second;
return PropertyValue::Null;
}
PropertyValue at(const Property &key) const;
/**
* Sets the value for the given key. A new PropertyValue instance
* is created for the given value (which is of raw type).
* is created for the given value (which is of raw type). If the property
* is to be stored on disk then that instance does not represent an additional
* memory overhead as it goes out of scope at the end of this method.
*
* @tparam TValue Type of value. It must be one of the
* predefined possible PropertyValue values (bool, string, int...)
@ -46,15 +73,23 @@ class PropertyValueStore {
*/
template <typename TValue>
void set(const Property &key, const TValue &value) {
for (auto &kv : props_)
if (kv.first == key) {
kv.second = PropertyValue(value);
return;
}
auto SetValue = [&key, &value](auto &props) {
for (auto &kv : props)
if (kv.first == key) {
kv.second = value;
return;
}
props.emplace_back(key, value);
};
// there is no value for the given key, add new
// TODO consider vector size increment optimization
props_.emplace_back(key, value);
if (key.Location() == Location::Memory) {
SetValue(props_);
} else {
auto props_on_disk = PropsOnDisk(std::to_string(disk_key_));
SetValue(props_on_disk);
disk_storage_.Put(std::to_string(disk_key_),
SerializeProps(props_on_disk));
}
}
/**
@ -62,28 +97,13 @@ class PropertyValueStore {
* to std::string, otherwise templating might cast the pointer
* to something else (bool) and mess things up.
*/
void set(const Property &key, const char *value) {
set(key, std::string(value));
}
void set(const Property &key, const char *value);
/**
* Set overriding for PropertyValue. When setting a Null value it
* calls 'erase' instead of inserting the Null into storage.
*/
void set(const Property &key, const PropertyValue &value) {
if (value.type() == PropertyValue::Type::Null) {
erase(key);
return;
}
for (auto &kv : props_)
if (kv.first == key) {
kv.second = value;
return;
}
props_.emplace_back(key, value);
}
void set(const Property &key, const PropertyValue &value);
/**
* Removes the PropertyValue for the given key.
@ -91,26 +111,83 @@ class PropertyValueStore {
* @param key The key for which to remove the property.
* @return The number of removed properties (0 or 1).
*/
size_t erase(const Property &key) {
auto found = std::find_if(props_.begin(), props_.end(),
[&key](std::pair<Property, PropertyValue> &kv) {
return kv.first == key;
});
size_t erase(const Property &key);
if (found != props_.end()) {
props_.erase(found);
return 1;
/** Removes all the properties (both in-mem and on-disk) from this store. */
void clear();
/**
* Custom PVS iterator behaves as if all properties are stored in a single
* iterable collection of std::pair<Property, PropertyValue>.
* */
class iterator : public std::iterator<
std::input_iterator_tag, // iterator_category
std::pair<Property, PropertyValue>, // value_type
long, // difference_type
const std::pair<Property, PropertyValue> *, // pointer
const std::pair<Property, PropertyValue> & // reference
> {
public:
explicit iterator(const PropertyValueStore *init, int it)
: PVS_(init), it_(it) {}
iterator &operator++() {
++it_;
return *this;
}
return 0;
iterator operator++(int) {
iterator ret = *this;
++(*this);
return ret;
}
bool operator==(const iterator &other) const {
return PVS_ == other.PVS_ && it_ == other.it_;
}
bool operator!=(const iterator &other) const {
return PVS_ != other.PVS_ || it_ != other.it_;
}
reference operator*() {
if (it_ < static_cast<int>(PVS_->props_.size())) return PVS_->props_[it_];
auto disk_props = PVS_->PropsOnDisk(std::to_string(PVS_->disk_key_));
disk_prop_ = disk_props[it_ - PVS_->props_.size()];
return disk_prop_.value();
}
pointer operator->() { return &**this; }
private:
const PropertyValueStore *PVS_;
int32_t it_;
std::experimental::optional<std::pair<Property, PropertyValue>> disk_prop_;
};
size_t size() const {
size_t ram_size = props_.size();
size_t disk_size = PropsOnDisk(std::to_string(disk_key_)).size();
return ram_size + disk_size;
}
/** Removes all the properties from this store. */
void clear() { props_.clear(); }
size_t size() const { return props_.size(); }
auto begin() const { return props_.begin(); }
auto end() const { return props_.end(); }
auto begin() const { return iterator(this, 0); }
auto end() const { return iterator(this, size()); }
private:
static std::atomic<uint64_t> disk_key_cnt_;
uint64_t disk_key_ = disk_key_cnt_++;
static storage::KVStore disk_storage_;
std::vector<std::pair<Property, PropertyValue>> props_;
std::string SerializeProps(
const std::vector<std::pair<Property, PropertyValue>> &props) const;
std::vector<std::pair<Property, PropertyValue>> Deserialize(
const std::string &serialized_props) const;
std::vector<std::pair<Property, PropertyValue>> PropsOnDisk(
const std::string &disk_key) const;
};

View File

@ -20,8 +20,7 @@ RecordAccessor<TRecord>::RecordAccessor(AddressT address,
}
template <typename TRecord>
PropertyValue RecordAccessor<TRecord>::PropsAt(
storage::Property key) const {
PropertyValue RecordAccessor<TRecord>::PropsAt(storage::Property key) const {
return current().properties_.at(key);
}

View File

@ -32,7 +32,7 @@ class RecordAccessor : public utils::TotalOrdering<RecordAccessor<TRecord>> {
/**
* The database::GraphDbAccessor is friend to this accessor so it can
* operate on it's data (mvcc version-list and the record itself).
* This is legitemate because database::GraphDbAccessor creates
* This is legitimate because database::GraphDbAccessor creates
* RecordAccessors
* and is semantically their parent/owner. It is necessary because
* the database::GraphDbAccessor handles insertions and deletions, and these

View File

@ -2,54 +2,80 @@
#include <cstdint>
#include <functional>
#include <limits>
#include "boost/serialization/base_object.hpp"
#include "glog/logging.h"
#include "types.capnp.h"
#include "utils/total_ordering.hpp"
namespace storage {
// In case of a new location Mask value has to be updated.
enum class Location : uint16_t { Memory = 0x8000, Disk = 0x0000 };
/**
* |-------------|--------------|
* |---location--|------id------|
* |-Memory|Disk-|-----2^15-----|
*/
template <typename TSpecificType>
class Common : public utils::TotalOrdering<TSpecificType> {
public:
using StorageT = uint16_t;
using IdT = uint16_t;
Common() {}
explicit Common(const StorageT storage) : storage_(storage) {}
Common() = default;
explicit Common(const IdT id, const Location location = Location::Memory)
: id_((id & Mask) | static_cast<uint16_t>(location)) {
// TODO(ipaljak): A better way would be to throw an exception
// and send a message to the user that a new Id can't be created.
// By doing that, database instance will continue to work and user
// has a chance to make an appropriate action.
// CHECK isn't user friendly at all because it will immediately
// terminate the whole process.
// TODO implement throw and error handling
CHECK(id <= Mask) << "Number of used ids overflowed!";
}
virtual ~Common() {}
friend bool operator==(const TSpecificType &a, const TSpecificType &b) {
return a.storage_ == b.storage_;
return a.Id() == b.Id();
}
friend bool operator<(const TSpecificType &a, const TSpecificType &b) {
return a.storage_ < b.storage_;
return a.Id() < b.Id();
}
StorageT storage() const { return storage_; }
IdT Id() const { return static_cast<IdT>(id_ & Mask); }
Location Location() const {
return static_cast<enum Location>(id_ & NotMask);
}
struct Hash {
std::hash<StorageT> hash{};
size_t operator()(const TSpecificType &t) const { return hash(t.storage_); }
std::hash<IdT> hash{};
size_t operator()(const TSpecificType &t) const { return hash(t.id_); }
};
virtual void Save(capnp::Common::Builder &builder) const {
builder.setStorage(storage_);
builder.setStorage(id_);
}
virtual void Load(capnp::Common::Reader &reader) {
storage_ = reader.getStorage();
id_ = reader.getStorage();
}
private:
static constexpr IdT Mask = std::numeric_limits<IdT>::max() >> 1;
static constexpr IdT NotMask = ~Mask;
friend class boost::serialization::access;
template <class TArchive>
void serialize(TArchive &ar, const unsigned int) {
ar &storage_;
ar &id_;
}
StorageT storage_{0};
IdT id_{0};
};
class Label : public Common<Label> {

View File

@ -20,15 +20,56 @@ import subprocess
log = logging.getLogger(__name__)
class Test:
"""
Class used to store basic information about a single
test suite
@attribute name:
string, name of the test_suite (must be unique)
@attribute test_suite:
string, test_suite within tck_engine/tests which contains tck tests
@attribute memgraph_params
string, any command line arguments that should be passed to
memgraph before evaluating tests from this suite
@attribute mandatory
bool, True if this suite is obligatory for continuous integration
to pass
"""
def __init__(self, name, test_suite, memgraph_params, mandatory):
self.name = name
self.test_suite = test_suite
self.memgraph_params = memgraph_params
self.mandatory = mandatory
# constants
memgraph_suite = "memgraph_V1"
extra_suites = ["openCypher_M09"]
suites = [
Test(
name="memgraph_V1",
test_suite="memgraph_V1",
memgraph_params="",
mandatory=True
),
Test(
name="memgraph_V1_POD",
test_suite="memgraph_V1",
memgraph_params="--properties-on-disk=surname,location",
mandatory=True
),
Test(
name="openCypher_M09",
test_suite="openCypher_M09",
memgraph_params="",
mandatory=False
)
]
results_folder = os.path.join("tck_engine", "results")
suite_suffix = "memgraph-{}.json"
qa_status_path = ".quality_assurance_status"
measurements_path = ".apollo_measurements"
def get_newest_path(folder, suffix):
"""
:param folder: Scanned folder.
@ -107,16 +148,14 @@ if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
# run suites
log.info("Starting Memgraph scenarios.")
subprocess.run(["./run", "--test-suite", memgraph_suite], check = True)
for suite in extra_suites:
log.info("Starting extra suite '{}' scenarios.".format(suite))
subprocess.run(["./run", "--test-suite", suite])
# get data files (memgraph internal test + openCypher TCK test results)
memgraph_result_path = get_newest_path(results_folder,
suite_suffix.format(memgraph_suite))
log.info("Memgraph result path is {}".format(memgraph_result_path))
for suite in suites:
log.info("Starting suite '{}' scenarios.".format(suite.name))
subprocess.run(["./run",
"--test-suite", suite.test_suite,
"--name", suite.name,
# "--verbose",
"--memgraph-params", suite.memgraph_params],
check = False)
# measurements
measurements = ""
@ -124,19 +163,24 @@ if __name__ == "__main__":
# status table headers
status_data = [["Suite", "Scenarios"]]
# read internal scenarios
memgraph_status, memgraph_passed, memgraph_total = generate_status(
memgraph_suite, memgraph_result_path, required = True)
status_data.append([memgraph_suite, memgraph_status])
measurements += generate_measurements(memgraph_suite, memgraph_result_path)
# List of mandatory suites that have failed
mandatory_fails = []
# read extra scenarios
for suite in extra_suites:
result_path = get_newest_path(results_folder, suite_suffix.format(suite))
log.info("Extra suite '{}' result path is {}".format(suite, result_path))
suite_status, _, _ = generate_status(suite, result_path)
status_data.append([suite, suite_status])
measurements += generate_measurements(suite, result_path)
for suite in suites:
# get data files for test suite
suite_result_path = get_newest_path(results_folder,
suite_suffix.format(suite.name))
log.info("Memgraph result path is {}".format(suite_result_path))
# read scenarios
suite_status, suite_passed, suite_total = generate_status(
suite.name, suite_result_path, required = suite.mandatory)
if suite.mandatory and suite_passed != suite_total:
mandatory_fails.append(suite.name)
status_data.append([suite.name, suite_status])
measurements += generate_measurements(suite.name, suite_result_path)
# create status message
qa_status_message = generate_remarkup(status_data)
@ -145,13 +189,13 @@ if __name__ == "__main__":
with open(qa_status_path, "w") as f:
f.write(qa_status_message)
# create the measurements file
#create the measurements file
with open(measurements_path, "w") as f:
f.write(measurements)
log.info("Status is generated in %s" % qa_status_path)
log.info("Measurements are generated in %s" % measurements_path)
if memgraph_total != memgraph_passed:
sys.exit("There is a problem with internal scenarios! %s"
% memgraph_status)
if mandatory_fails != []:
sys.exit("Some mandatory tests have failed -- %s"
% str(mandatory_fails))

View File

@ -4,8 +4,10 @@ function print_usage_and_exit {
echo "./run --test-suite test_suite [--unstable]"
echo "Required arguments:"
echo -e " --test-suite test_suite\trun test_suite scenarios, test_suite must be test folder in tck_engine/tests."
echo -e " --name name\tunique identifer of test_suite and its parameters"
echo "Optional arguments:"
echo -e " --unstable\trun unstable scenarios"
echo -e " --memgraph-params \"param1=value1 param2=value2\"\tcommand line arguments for memgraph"
exit 1
}
@ -14,6 +16,8 @@ set -e
# read arguments
unstable=false
memgraph_params=""
loglevel=2
while [[ $# -gt 0 ]]; do
case $1 in
@ -25,6 +29,22 @@ while [[ $# -gt 0 ]]; do
loglevel=0
shift
;;
--memgraph-params)
if [ $# -eq 1 ]; then
print_usage_and_exit
fi
memgraph_params=$2
shift
shift
;;
--name)
if [ $# -eq 1 ]; then
print_usage_and_exit
fi
name=$2
shift
shift
;;
--test-suite)
if [ $# -eq 1 ]; then
print_usage_and_exit
@ -63,18 +83,19 @@ config_path="${memgraph_src_dir}/config/testing.conf"
# run scenarios
cd ${script_dir}
tck_flags="--root tck_engine/tests/$test_suite
--test-name $name
--no-side-effects --db memgraph"
if [[ $unstable = true ]]; then
tck_flags="$tck_flags --unstable"
fi
# the script has to be careful because memgraph process will be detached
set +e
# run memgraph
MEMGRAPH_CONFIG="$config_path" "$memgraph_binary" --min-log-level=$loglevel 1>&2 &
MEMGRAPH_CONFIG="$config_path"
"$memgraph_binary" --min-log-level=$loglevel "$memgraph_params" 1>&2 &
background_pid=$!
function cleanup_and_exit {

View File

@ -88,7 +88,7 @@ def after_all(context):
test_suite = get_test_suite(context)
file_name = context.config.output_folder + timestamp + \
"-" + context.config.database + "-" + test_suite + ".json"
"-" + context.config.database + "-" + context.config.test_name + ".json"
js = {
"total": test_results.num_total(), "passed": test_results.num_passed(),

View File

@ -32,6 +32,8 @@ def parse_args():
help="Pause after every scenario.")
argp.add_argument("--single-feature", action="store_true",
help="Pause after every feature.")
argp.add_argument("--test-name", default="",
help="Name of the test")
return argp.parse_args()
@ -66,7 +68,7 @@ def main():
dict(action="store_true", help="Pause after every scenario."))
add_config("--single-feature",
dict(action="store_true", help="Pause after every feature."))
add_config("--test-name", dict(help="Name of the test."))
# list with all options
# options will be passed to the cucumber engine
@ -99,10 +101,11 @@ def main():
behave_options.append("--single-feature")
behave_options.append("--output-folder")
behave_options.append(args.output_folder)
behave_options.append("--test-name")
behave_options.append(args.test_name)
# runs tests with options
return behave_main(behave_options)
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,7 +1,7 @@
class TestResults:
"""
Clas used to store test results. It has parameters total
Class used to store test results. It has parameters total
and passed.
@attribute total:

View File

@ -8,23 +8,23 @@
#include "storage/concurrent_id_mapper_single_node.hpp"
#include "storage/types.hpp"
using Id = storage::Label;
using Mapper = storage::SingleNodeConcurrentIdMapper<Id>;
using IdLabel = storage::Label;
using MapperLabel = storage::SingleNodeConcurrentIdMapper<IdLabel>;
TEST(ConcurrentIdMapper, SameValueGivesSameId) {
Mapper mapper;
MapperLabel mapper;
EXPECT_EQ(mapper.value_to_id("a"), mapper.value_to_id("a"));
}
TEST(ConcurrentIdMapper, IdToValue) {
Mapper mapper;
MapperLabel mapper;
std::string value = "a";
auto id = mapper.value_to_id(value);
EXPECT_EQ(value, mapper.id_to_value(id));
}
TEST(ConcurrentIdMapper, TwoValuesTwoIds) {
Mapper mapper;
MapperLabel mapper;
EXPECT_NE(mapper.value_to_id("a"), mapper.value_to_id("b"));
}
@ -36,8 +36,8 @@ TEST(ConcurrentIdMapper, SameIdReturnedMultipleThreads) {
// Perform the whole test a number of times since it's stochastic (we're
// trying to detect bad behavior in parallel execution).
for (int loop_ind = 0; loop_ind < 20; ++loop_ind) {
Mapper mapper;
std::vector<std::map<Id, std::string>> mappings(thread_count);
MapperLabel mapper;
std::vector<std::map<IdLabel, std::string>> mappings(thread_count);
std::vector<std::thread> threads;
for (int thread_ind = 0; thread_ind < thread_count; ++thread_ind) {
threads.emplace_back([&mapper, &mappings, &values, thread_ind] {
@ -52,3 +52,23 @@ TEST(ConcurrentIdMapper, SameIdReturnedMultipleThreads) {
for (auto &mapping : mappings) EXPECT_EQ(mapping, mappings[0]);
}
}
using IdProperty = storage::Property;
using MapperProperty = storage::SingleNodeConcurrentIdMapper<IdProperty>;
TEST(ConcurrentIdMapper, PropertyLocation) {
// TODO(ipaljak): write unit tests for storage::Common and all
// derived classes (tests/unit/storage_types.cpp)
std::string prop_on_disk_name = "test_name1";
std::string prop_in_mem_name = "test_name2";
std::vector<std::string> props_on_disk = {prop_on_disk_name};
MapperProperty mapper(props_on_disk);
auto on_disk = mapper.value_to_id(prop_on_disk_name);
ASSERT_EQ(on_disk.Id(), 0);
ASSERT_EQ(on_disk.Location(), storage::Location::Disk);
auto in_mem = mapper.value_to_id(prop_in_mem_name);
ASSERT_EQ(in_mem.Id(), 1);
ASSERT_EQ(in_mem.Location(), storage::Location::Memory);
}

View File

@ -6,110 +6,156 @@
#include "storage/property_value_store.hpp"
using std::string;
using Location = storage::Location;
class PropertyValueStoreTest : public ::testing::Test {
protected:
PropertyValueStore props_;
void Set(int key, PropertyValue value) {
props_.set(storage::Property(key), value);
void Set(int key, Location location, PropertyValue value) {
props_.set(storage::Property(key, location), value);
}
PropertyValue At(int key) { return props_.at(storage::Property(key)); }
PropertyValue At(int key, Location location) {
return props_.at(storage::Property(key, location));
}
auto Erase(int key) { return props_.erase(storage::Property(key)); }
auto Erase(int key, Location location) {
return props_.erase(storage::Property(key, location));
}
void TearDown() override { props_.clear(); }
};
TEST_F(PropertyValueStoreTest, At) {
std::string some_string = "something";
std::string other_string = "something completely different";
EXPECT_EQ(PropertyValue(At(0)).type(), PropertyValue::Type::Null);
Set(0, some_string);
EXPECT_EQ(PropertyValue(At(0)).Value<string>(), some_string);
Set(120, 42);
EXPECT_EQ(PropertyValue(At(120)).Value<int64_t>(), 42);
EXPECT_EQ(PropertyValue(At(0, Location::Memory)).type(),
PropertyValue::Type::Null);
Set(0, Location::Memory, some_string);
EXPECT_EQ(PropertyValue(At(0, Location::Memory)).Value<string>(),
some_string);
Set(120, Location::Memory, 42);
EXPECT_EQ(PropertyValue(At(120, Location::Memory)).Value<int64_t>(), 42);
Set(100, Location::Disk, other_string);
EXPECT_EQ(PropertyValue(At(100, Location::Disk)).Value<string>(),
other_string);
}
TEST_F(PropertyValueStoreTest, AtNull) {
EXPECT_EQ(At(0).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(100).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(0, Location::Memory).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(100, Location::Memory).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(0, Location::Disk).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(100, Location::Disk).type(), PropertyValue::Type::Null);
// set one prop and test it's not null
Set(0, true);
EXPECT_NE(At(0).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(100).type(), PropertyValue::Type::Null);
Set(0, Location::Memory, true);
EXPECT_NE(At(0, Location::Memory).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(100, Location::Memory).type(), PropertyValue::Type::Null);
Set(0, Location::Disk, true);
EXPECT_NE(At(0, Location::Disk).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(100, Location::Disk).type(), PropertyValue::Type::Null);
}
TEST_F(PropertyValueStoreTest, SetNull) {
Set(11, PropertyValue::Null);
Set(11, Location::Memory, PropertyValue::Null);
EXPECT_EQ(0, props_.size());
Set(100, Location::Disk, PropertyValue::Null);
EXPECT_EQ(0, props_.size());
}
TEST_F(PropertyValueStoreTest, Remove) {
// set some props
Set(11, "a");
Set(30, "b");
EXPECT_NE(At(11).type(), PropertyValue::Type::Null);
EXPECT_NE(At(30).type(), PropertyValue::Type::Null);
Set(11, Location::Memory, "a");
Set(30, Location::Memory, "b");
EXPECT_NE(At(11, Location::Memory).type(), PropertyValue::Type::Null);
EXPECT_NE(At(30, Location::Memory).type(), PropertyValue::Type::Null);
EXPECT_EQ(props_.size(), 2);
Erase(11);
Erase(11, Location::Memory);
EXPECT_EQ(props_.size(), 1);
EXPECT_EQ(At(11).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(11, Location::Memory).type(), PropertyValue::Type::Null);
EXPECT_EQ(Erase(30), 1);
EXPECT_EQ(Erase(30, Location::Memory), 1);
EXPECT_EQ(props_.size(), 0);
EXPECT_EQ(At(30).type(), PropertyValue::Type::Null);
EXPECT_EQ(At(30, Location::Memory).type(), PropertyValue::Type::Null);
EXPECT_EQ(Erase(1000), 0);
EXPECT_EQ(Erase(1000, Location::Memory), 0);
props_.clear();
Set(110, Location::Disk, "a");
EXPECT_NE(At(110, Location::Disk).type(), PropertyValue::Type::Null);
EXPECT_EQ(props_.size(), 1);
Erase(110, Location::Disk);
EXPECT_EQ(props_.size(), 0);
EXPECT_EQ(At(110, Location::Disk).type(), PropertyValue::Type::Null);
EXPECT_EQ(Erase(1000, Location::Disk), 0);
}
TEST_F(PropertyValueStoreTest, Clear) {
// set some props
EXPECT_EQ(props_.size(), 0);
props_.clear();
EXPECT_EQ(props_.size(), 0);
Set(11, "a");
Set(30, "b");
Set(11, Location::Memory, "a");
Set(30, Location::Memory, "b");
EXPECT_EQ(props_.size(), 2);
props_.clear();
EXPECT_EQ(props_.size(), 0);
Set(11, Location::Disk, "a");
EXPECT_EQ(props_.size(), 1);
props_.clear();
EXPECT_EQ(props_.size(), 0);
}
TEST_F(PropertyValueStoreTest, Replace) {
Set(10, 42);
EXPECT_EQ(At(10).Value<int64_t>(), 42);
Set(10, 0.25f);
EXPECT_EQ(At(10).type(), PropertyValue::Type::Double);
EXPECT_FLOAT_EQ(At(10).Value<double>(), 0.25);
Set(10, Location::Memory, 42);
EXPECT_EQ(At(10, Location::Memory).Value<int64_t>(), 42);
Set(10, Location::Memory, 0.25f);
EXPECT_EQ(At(10, Location::Memory).type(), PropertyValue::Type::Double);
EXPECT_FLOAT_EQ(At(10, Location::Memory).Value<double>(), 0.25);
Set(100, Location::Disk, "some text");
EXPECT_EQ(At(100, Location::Disk).Value<string>(), "some text");
Set(100, Location::Disk, "some other text");
EXPECT_EQ(At(100, Location::Disk).Value<string>(), "some other text");
}
TEST_F(PropertyValueStoreTest, Size) {
EXPECT_EQ(props_.size(), 0);
Set(0, "something");
Set(0, Location::Memory, "something");
EXPECT_EQ(props_.size(), 1);
Set(0, true);
Set(0, Location::Memory, true);
EXPECT_EQ(props_.size(), 1);
Set(1, true);
Set(1, Location::Memory, true);
EXPECT_EQ(props_.size(), 2);
for (int i = 0; i < 100; ++i) Set(i + 20, true);
for (int i = 0; i < 100; ++i) Set(i + 20, Location::Memory, true);
EXPECT_EQ(props_.size(), 102);
Erase(0);
Erase(0, Location::Memory);
EXPECT_EQ(props_.size(), 101);
Erase(0);
Erase(0, Location::Memory);
EXPECT_EQ(props_.size(), 101);
Erase(1);
Erase(1, Location::Memory);
EXPECT_EQ(props_.size(), 100);
Set(101, Location::Disk, "dalmatians");
EXPECT_EQ(props_.size(), 101);
Erase(101, Location::Disk);
EXPECT_EQ(props_.size(), 100);
}
TEST_F(PropertyValueStoreTest, InsertRetrieveList) {
Set(0, std::vector<PropertyValue>{1, true, 2.5, "something",
PropertyValue::Null});
auto p = At(0);
Set(0, Location::Memory, std::vector<PropertyValue>{1, true, 2.5, "something",
PropertyValue::Null});
auto p = At(0, Location::Memory);
EXPECT_EQ(p.type(), PropertyValue::Type::List);
auto l = p.Value<std::vector<PropertyValue>>();
@ -126,10 +172,10 @@ TEST_F(PropertyValueStoreTest, InsertRetrieveList) {
}
TEST_F(PropertyValueStoreTest, InsertRetrieveMap) {
Set(0, std::map<std::string, PropertyValue>{
{"a", 1}, {"b", true}, {"c", "something"}});
Set(0, Location::Memory, std::map<std::string, PropertyValue>{
{"a", 1}, {"b", true}, {"c", "something"}});
auto p = At(0);
auto p = At(0, Location::Memory);
EXPECT_EQ(p.type(), PropertyValue::Type::Map);
auto m = p.Value<std::map<std::string, PropertyValue>>();
EXPECT_EQ(m.size(), 3);