Add hash on mgp::Value (#1093)

This commit is contained in:
imilinovic 2023-07-28 09:08:36 +02:00 committed by GitHub
parent e489e4f3e7
commit 609b9a20f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 163 additions and 0 deletions

View File

@ -135,6 +135,13 @@ inline int64_t value_get_int(mgp_value *val) { return MgInvoke<int64_t>(mgp_valu
inline double value_get_double(mgp_value *val) { return MgInvoke<double>(mgp_value_get_double, val); }
inline double value_get_numeric(mgp_value *val) {
if (MgInvoke<int>(mgp_value_is_int, val)) {
return static_cast<double>(value_get_int(val));
}
return value_get_double(val);
}
inline const char *value_get_string(mgp_value *val) { return MgInvoke<const char *>(mgp_value_get_string, val); }
inline mgp_list *value_get_list(mgp_value *val) { return MgInvoke<mgp_list *>(mgp_value_get_list, val); }
@ -171,6 +178,8 @@ inline bool value_is_int(mgp_value *val) { return MgInvoke<int>(mgp_value_is_int
inline bool value_is_double(mgp_value *val) { return MgInvoke<int>(mgp_value_is_double, val); }
inline bool value_is_numeric(mgp_value *val) { return value_is_int(val) || value_is_double(val); }
inline bool value_is_string(mgp_value *val) { return MgInvoke<int>(mgp_value_is_string, val); }
inline bool value_is_list(mgp_value *val) { return MgInvoke<int>(mgp_value_is_list, val); }

View File

@ -11,14 +11,17 @@
#pragma once
#include <cstdlib>
#include <cstring>
#include <functional>
#include <map>
#include <set>
#include <string>
#include <string_view>
#include <vector>
#include "_mgp.hpp"
#include "mg_exceptions.hpp"
#include "mg_procedure.h"
namespace mgp {
@ -1347,6 +1350,67 @@ inline void AddFunction(mgp_func_cb callback, std::string_view name, std::vector
/* #endregion */
namespace util {
inline uint64_t Fnv(const std::string_view s) {
// fnv1a is recommended so use it as the default implementation.
uint64_t hash = 14695981039346656037UL;
for (const auto &ch : s) {
hash = (hash ^ (uint64_t)ch) * 1099511628211UL;
}
return hash;
}
/**
* Does FNV-like hashing on a collection. Not truly FNV
* because it operates on 8-bit elements, while this
* implementation uses size_t elements (collection item
* hash).
*
* https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
*
*
* @tparam TIterable A collection type that has begin() and end().
* @tparam TElement Type of element in the collection.
* @tparam THash Hash type (has operator() that accepts a 'const TEelement &'
* and returns size_t. Defaults to std::hash<TElement>.
* @param iterable A collection of elements.
* @param element_hash Function for hashing a single element.
* @return The hash of the whole collection.
*/
template <typename TIterable, typename TElement, typename THash = std::hash<TElement>>
struct FnvCollection {
size_t operator()(const TIterable &iterable) const {
uint64_t hash = 14695981039346656037u;
THash element_hash;
for (const TElement &element : iterable) {
hash *= fnv_prime;
hash ^= element_hash(element);
}
return hash;
}
private:
static const uint64_t fnv_prime = 1099511628211u;
};
/**
* Like FNV hashing for a collection, just specialized for two elements to avoid
* iteration overhead.
*/
template <typename TA, typename TB, typename TAHash = std::hash<TA>, typename TBHash = std::hash<TB>>
struct HashCombine {
size_t operator()(const TA &a, const TB &b) const {
static constexpr size_t fnv_prime = 1099511628211UL;
static constexpr size_t fnv_offset = 14695981039346656037UL;
size_t ret = fnv_offset;
ret ^= TAHash()(a);
ret *= fnv_prime;
ret ^= TBHash()(b);
return ret;
}
};
// uint to int conversion in C++ is a bit tricky. Take a look here
// https://stackoverflow.com/questions/14623266/why-cant-i-reinterpret-cast-uint-to-int
// for more details.
@ -1469,6 +1533,10 @@ inline bool ValuesEqual(mgp_value *value1, mgp_value *value2) {
if (value1 == value2) {
return true;
}
// Make int and double comparable, (ex. this is true -> 1.0 == 1)
if (mgp::value_is_numeric(value1) && mgp::value_is_numeric(value2)) {
return mgp::value_get_numeric(value1) == mgp::value_get_numeric(value2);
}
if (mgp::value_get_type(value1) != mgp::value_get_type(value2)) {
return false;
}
@ -3596,6 +3664,28 @@ struct hash<mgp::Relationship> {
size_t operator()(const mgp::Relationship &x) const { return hash<int64_t>()(x.Id().AsInt()); };
};
template <>
struct hash<mgp::Path> {
size_t operator()(const mgp::Path &x) const {
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
// See mgp::util::FnvCollection
constexpr const uint64_t fnv_prime = 1099511628211U;
uint64_t hash = 14695981039346656037U;
auto multiply_and_xor = [](uint64_t &hash, size_t element_hash) {
hash *= fnv_prime;
hash ^= element_hash;
};
for (size_t i = 0; i < x.Length() - 1; ++i) {
multiply_and_xor(hash, std::hash<mgp::Node>{}(x.GetNodeAt(i)));
multiply_and_xor(hash, std::hash<mgp::Relationship>{}(x.GetRelationshipAt(i)));
}
multiply_and_xor(hash, std::hash<mgp::Node>{}(x.GetNodeAt(x.Length())));
return hash;
}
};
template <>
struct hash<mgp::Date> {
size_t operator()(const mgp::Date &x) const { return hash<int64_t>()(x.Timestamp()); };
@ -3620,4 +3710,59 @@ template <>
struct hash<mgp::MapItem> {
size_t operator()(const mgp::MapItem &x) const { return hash<std::string_view>()(x.key); };
};
template <>
struct hash<mgp::Map> {
size_t operator()(const mgp::Map &x) const {
return mgp::util::FnvCollection<mgp::Map, mgp::MapItem, std::hash<mgp::MapItem>>{}(x);
}
};
template <>
struct hash<mgp::Value> {
size_t operator()(const mgp::Value &x) const {
switch (x.Type()) {
case mgp::Type::Null:
return 31;
case mgp::Type::Any:
throw mg_exception::InvalidArgumentException();
case mgp::Type::Bool:
return std::hash<bool>{}(x.ValueBool());
case mgp::Type::Int:
// we cast int to double for hashing purposes
// to be consistent with equality (2.0 == 2) == true
return std::hash<double>{}((double)x.ValueInt());
case mgp::Type::Double:
return std::hash<double>{}(x.ValueDouble());
case mgp::Type::String:
return std::hash<std::string_view>{}(x.ValueString());
case mgp::Type::List:
return mgp::util::FnvCollection<mgp::List, mgp::Value, std::hash<mgp::Value>>{}(x.ValueList());
case mgp::Type::Map:
return std::hash<mgp::Map>{}(x.ValueMap());
case mgp::Type::Node:
return std::hash<mgp::Node>{}(x.ValueNode());
case mgp::Type::Relationship:
return std::hash<mgp::Relationship>{}(x.ValueRelationship());
case mgp::Type::Path:
return std::hash<mgp::Path>{}(x.ValuePath());
case mgp::Type::Date:
return std::hash<mgp::Date>{}(x.ValueDate());
case mgp::Type::LocalTime:
return std::hash<mgp::LocalTime>{}(x.ValueLocalTime());
case mgp::Type::LocalDateTime:
return std::hash<mgp::LocalDateTime>{}(x.ValueLocalDateTime());
case mgp::Type::Duration:
return std::hash<mgp::Duration>{}(x.ValueDuration());
}
throw mg_exception::InvalidArgumentException();
}
};
template <>
struct hash<mgp::List> {
size_t operator()(const mgp::List &x) {
return mgp::util::FnvCollection<mgp::List, mgp::Value, std::hash<mgp::Value>>{}(x);
}
};
} // namespace std

View File

@ -473,6 +473,15 @@ TYPED_TEST(CppApiTestFixture, TestNodeProperties) {
ASSERT_EQ(node_1.GetProperty("b").ValueString(), "b");
}
TYPED_TEST(CppApiTestFixture, TestNumberEquality) {
mgp::Value double_1{1.0};
mgp::Value int_1{static_cast<int64_t>(1)};
ASSERT_TRUE(double_1 == int_1);
mgp::Value double_2{2.01};
mgp::Value int_2{static_cast<int64_t>(2)};
ASSERT_FALSE(double_2 == int_2);
}
TYPED_TEST(CppApiTestFixture, TestTypeOperatorStream) {
std::string string1 = "string";
int64_t int1 = 4;