commit before os reinstall

2015-07-07 16:18:26 +02:00 · 2015-07-07 16:18:26 +02:00 · 20baa9f3ea
commit 20baa9f3ea
parent cd9eded8df
27 changed files with 521 additions and 93 deletions
--- a/data_structures/bitset/bitblock.hpp
+++ b/data_structures/bitset/bitblock.hpp
@ -19,7 +19,7 @@ struct BitBlock
    // mask = 11111111 >> 6 = 00000011
    static constexpr block_t mask = (block_t)(-1) >> (bits - N);

-    uint8_t at(size_t n)
+    block_t at(size_t n)
    {
        assert(n < size);

--- a/data_structures/bitset/dynamic_bitset.hpp
+++ b/data_structures/bitset/dynamic_bitset.hpp
@ -0,0 +1,61 @@
+#ifndef MEMGRAPH_DATA_STRUCTURES_BITSET_DYNAMIC_BITSET_HPP
+#define MEMGRAPH_DATA_STRUCTURES_BITSET_DYNAMIC_BITSET_HPP
+
+#include <vector>
+#include <mutex>
+#include <unistd.h>
+
+#include "sync/spinlock.hpp"
+#include "bitblock.hpp"
+
+template <class block_t,
+          size_t N,
+          class lock_t>
+class DynamicBitset
+{
+    using Block = BitBlock<block_t, N>;
+
+public:
+    DynamicBitset(size_t n) : data(container_size(n)) {}
+
+    void resize(size_t n)
+    {
+        auto guard = acquire();
+        data.resize(container_size(n));
+    }
+
+    size_t size() const
+    {
+
+
+        return data.size();
+    }
+
+    block_t at(size_t n)
+    {
+        return data[n / Block::size].at(n % Block::size);
+    }
+
+    void set(size_t n, block_t value)
+    {
+        data[n / Block::size].set(n % Block::size, value);
+    }
+
+private:
+
+    std::unique_lock<lock_t> acquire()
+    {
+        return std::unique_lock<lock_t>(lock);
+    }
+
+    size_t container_size(size_t num_elems)
+    {
+        return (num_elems + N - 1) / N;
+    }
+    
+    std::vector<Block> data;
+
+    lock_t lock;
+};
+
+#endif
--- a/memory/memory_engine.hpp
+++ b/memory/memory_engine.hpp
@ -0,0 +1,67 @@
+#ifndef MEMGRAPH_MEMORY_MEMORY_ENGINE_HPP
+#define MEMGRAPH_MEMORY_MEMORY_ENGINE_HPP
+
+#include <atomic>
+#include <mutex>
+
+#include "transaction/transaction.hpp"
+#include "storage/model/record.hpp"
+#include "storage/model/vertex.hpp"
+#include "storage/model/edge.hpp"
+
+// TODO implement the memory engine using the allocator style allocation to
+// make this class non-dependent on the memory allocation strategy
+
+// TODO implement real recycling of vertices and edges to improve performance
+
+template <class id_t,
+          class lock_t,
+class MemoryEngine
+{
+    template <class T>
+    using record_t = Record<T, id_t, lock_t>;
+
+    using vertex_t = Vertex<id_t, lock_t>;
+    using edge_t = Edge<id_t, lock_t>;
+public:
+    
+    template <class T,
+              typename... Args>
+    T* create(Args&&... args)
+    {
+        return new T(std::forward<Args>(args)...);
+    }
+
+    template<class T>
+    T* allocate()
+    {
+        return static_cast<T*>(malloc(sizeof(T)));
+    }
+
+    template <class T>
+    void recycle(record_t<T>* record)
+    {
+        recycle(&record->derived());
+    }
+
+    void recycle(vertex_t v)
+    {
+        delete v;
+    }
+
+    void recycle(edge_t e)
+    {
+        delete e;
+    }
+
+private:
+    
+    std::unique_lock<lock_t> acquire()
+    {
+        return std::unique_lock<lock_t>(lock);
+    }
+
+    lock_t lock;
+};
+
+#endif
--- a/storage/edge.hpp
+++ b/storage/edge.hpp
@ -1,16 +0,0 @@
-#ifndef MEMGRAPH_DATA_MODEL_EDGE_HPP
-#define MEMGRAPH_DATA_MODEL_EDGE_HPP
-
-#include "json/all.hpp"
-#include "record.hpp"
-
-struct Node;
-
-struct Edge : Record
-{
-    Node* to;
-
-    std::string data;
-};
-
-#endif
--- a/storage/model/edge.hpp
+++ b/storage/model/edge.hpp
@ -0,0 +1,23 @@
+#ifndef MEMGRAPH_DATA_MODEL_EDGE_HPP
+#define MEMGRAPH_DATA_MODEL_EDGE_HPP
+
+#include <vector>
+
+#include "record.hpp"
+
+template <class id_t, class lock_t>
+struct Vertex;
+
+template <class id_t,
+          class lock_t>
+struct Edge : public Record<Edge<id_t, lock_t>, id_t, lock_t>
+{
+    Edge(uint64_t id) : Record<Edge<id_t, lock_t>, id_t, lock_t>(id) {}
+
+    using vertex_t = Vertex<id_t, lock_t>;
+
+    // pointer to the vertex this edge points to
+    vertex_t* to;
+};
+
+#endif
--- a/storage/model/graph.hpp
+++ b/storage/model/graph.hpp
--- a/storage/model/json/all.hpp
+++ b/storage/model/json/all.hpp
--- a/storage/model/json/array.hpp
+++ b/storage/model/json/array.hpp
--- a/storage/model/json/bool.hpp
+++ b/storage/model/json/bool.hpp
--- a/storage/model/json/integral.hpp
+++ b/storage/model/json/integral.hpp
--- a/storage/model/json/json.hpp
+++ b/storage/model/json/json.hpp
--- a/storage/model/json/null.hpp
+++ b/storage/model/json/null.hpp
--- a/storage/model/json/object.hpp
+++ b/storage/model/json/object.hpp
--- a/storage/model/json/primitive.hpp
+++ b/storage/model/json/primitive.hpp
--- a/storage/model/json/real.hpp
+++ b/storage/model/json/real.hpp
--- a/storage/model/json/string.hpp
+++ b/storage/model/json/string.hpp
--- a/storage/model/record.hpp
+++ b/storage/model/record.hpp
@ -0,0 +1,96 @@
+#ifndef MEMGRAPH_STORAGE_RECORD_HPP
+#define MEMGRAPH_STORAGE_RECORD_HPP
+
+#include <mutex>
+
+#include "sync/spinlock.hpp"
+
+template <class Derived,
+          class xid_t,
+          class lock_t>
+class Record
+{
+public:
+    Record(uint64_t id = 0) : id(id), xmin_(0), xmax_(0), cmax_(0), cmin_(0),
+        newer_(nullptr) {}
+
+    using record_t = Record<xid_t, lock_t, Derived>;
+
+    // every node has a unique id. 2^64 = 1.8 x 10^19. that should be enough
+    // for a looong time :) but keep in mind that some vacuuming would be nice
+    // to reuse indices for deleted nodes.
+    uint64_t id;
+
+    // acquire an exclusive guard on this node, used by mvcc to guard
+    // concurrent writes to this record (update - update, update - delete)
+    std::unique_lock<lock_t> guard()
+    {
+        return std::unique_lock<lock_t>(lock);
+    }
+
+    xid_t xmin()
+    {
+        return xmin_.load(std::memory_order_relaxed);
+    }
+
+    void xmin(xid_t value)
+    {
+        xmin_.store(value, std::memory_order_relaxed);
+    }
+
+    xid_t xmax()
+    {
+        return xmax_.load(std::memory_order_relaxed);
+    }
+
+    void xmax(xid_t value)
+    {
+        return xmax_.store(value, std::memory_order_relaxed);
+    }
+
+    uint8_t cmin()
+    {
+        return cmin_.load(std::memory_order_relaxed);
+    }
+
+    void cmin(uint8_t value)
+    {
+        cmin_.store(value, std::memory_order_relaxed);
+    }
+
+    uint8_t cmax()
+    {
+        return cmax_.load(std::memory_order_relaxed);
+    }
+
+    void cmax(uint8_t value)
+    {
+        return cmax_.store(value, std::memory_order_relaxed);
+    }
+
+    record_t* newer()
+    {
+        return newer_.load(std::memory_order_relaxed);
+    }
+
+    void newer(record_t* value)
+    {
+        newer_.store(value, std::memory_order_relaxed);
+    }
+
+    Derived& derived()
+    {
+        return *static_cast<Derived*>(this);
+    }
+
+private:
+    // used by MVCC to keep track of what's visible to transactions
+    std::atomic<xid_t> xmin_, xmax_;
+    std::atomic<uint8_t>  cmin_, cmax_;
+
+    std::atomic<record_t*> newer_;
+
+    lock_t lock;
+};
+
+#endif
--- a/storage/model/vertex.hpp
+++ b/storage/model/vertex.hpp
@ -0,0 +1,21 @@
+#ifndef MEMGRAPH_STORAGE_MODEL_VERTEX_HPP
+#define MEMGRAPH_STORAGE_MODEL_VERTEX_HPP
+
+#include <vector>
+
+#include "record.hpp"
+#include "edge.hpp"
+
+template <class id_t,
+          class lock_t>
+struct Vertex : public Record<Vertex<id_t, lock_t>, id_t, lock_t>
+{
+    Vertex(uint64_t id) : Record<Vertex<id_t, lock_t>, id_t, lock_t>(id) {}
+
+    using edge_t = Edge<id_t, lock_t>;
+
+    // adjacency list containing pointers to outgoing edges from this vertex
+    std::vector<edge_t*> out;
+};
+
+#endif
--- a/storage/record.hpp
+++ b/storage/record.hpp
@ -1,58 +0,0 @@
-#ifndef MEMGRAPH_STORAGE_RECORD_HPP
-#define MEMGRAPH_STORAGE_RECORD_HPP
-
-#include <mutex>
-#include <list>
-
-#include "sync/spinlock.hpp"
-
-template <class lock_type = SpinLock>
-class Record
-{
-    // every node has a unique id. 2^64 = 1.8 x 10^19. that should be enough
-    // for a looong time :) but keep in mind that some vacuuming would be nice
-    // to reuse indices for deleted nodes. also, vacuuming would enable the
-    // use of uint32_t which could be more memory conserving
-    uint64_t id;
-
-    // acquire an exclusive guard on this node, use for concurrent access
-    std::unique_lock<lock_type> guard()
-    {
-        return std::unique_lock<lock_type>(lock);
-    }
-
-    uint64_t xmin()
-    {
-        return xmin_.load(std::memory_order_acquire);
-    }
-
-    uint64_t xmin_inc()
-    {
-        return xmin_.fetch_add(1, std::memory_order_relaxed);
-    }
-
-    uint64_t xmax()
-    {
-        return xmax_.load(std::memory_order_release);
-    }
-
-    uint64_t xmax_inc()
-    {
-        return xmax_.fetch_add(1, std::memory_order_relaxed);
-    }
-
-    Record* newer()
-    {
-        return versions.load(std::memory_order_consume);
-    }
-
-private:
-    // used by MVCC to keep track of what's visible to transactions
-    std::atomic<uint64_t> xmin_, xmax_;
-
-    std::atomic<Record*> versions;
-
-    lock_type lock;
-};
-
-#endif
--- a/storage/storage_engine.hpp
+++ b/storage/storage_engine.hpp
@ -0,0 +1,90 @@
+#ifndef MEMGRAPH_STORAGE_STORAGE_ENGINE_HPP
+#define MEMGRAPH_STORAGE_STORAGE_ENGINE_HPP
+
+#include <atomic>
+#include <mutex>
+
+#include "transaction/transaction.hpp"
+#include "storage/model/record.hpp"
+#include "storage/visible.hpp"
+#include "memory/memory_engine.hpp"
+
+template <class id_t,
+          class lock_t>
+class StorageEngine
+{
+    template <class T>
+    using record_t = Record<T, id_t, lock_t>;
+    using memory_engine_t = MemoryEngine<id_t, lock_t>;
+
+public:
+    StorageEngine(memory_engine_t& memory) : memory(memory) {}
+
+    template <class T>
+    bool insert(record_t<T>** record,
+                const Transaction<id_t>& t)
+    {
+
+    }
+
+    template <class T>
+    bool update(record_t<T>* record,
+                record_t<T>** updated,
+                const Transaction<id_t>& t)
+    {
+        // put a lock on the node to prevent other writers from modifying it
+        auto guard = record->guard();
+
+        // find the newest visible version of the record about to be updated
+        auto newest = max_visible(record, t);
+
+        if(newest == nullptr)
+            return false; // another transaction just deleted it!
+
+        *updated = memory.allocate<T>();
+        *updated = *newest; // copy the data in the current node TODO memset
+
+        newest->newer(latest);
+        *updated_record = newest
+
+        return true;
+    }
+
+    template <class T>
+    bool remove(record_t<T>& record,
+                const Transaction<id_t>& t)
+    {
+        // put a lock on the node to prevent other writers from modifying it
+        auto guard = record.guard();
+
+        // only mark the record as deleted if it isn't already deleted
+        // this prevents phantom reappearance of the deleted nodes
+        //
+        // T1 |---------- DELETE N --------| COMMIT
+        // T2      |----- DELETE N ---------------------------------| COMMIT
+        // T3                                   |-- SELECT N --|
+        //
+        // if xmax was overwritten by T2, T3 would see that T1 was still
+        // running and determined that the record hasn't been deleted yet
+        // even though T1 already committed before T3 even started!
+
+        if(record.xmax())
+            return false; // another transaction just deleted it!        
+
+        record.xmax(t.id);
+        return true;
+    }
+
+private:
+    
+    std::unique_lock<lock_t> acquire()
+    {
+        return std::unique_lock<lock_t>(lock);
+    }
+
+    memory_engine_t& memory;
+
+    lock_t lock;
+};
+
+#endif
--- a/storage/vertex.hpp
+++ b/storage/vertex.hpp
@ -1,14 +0,0 @@
-#ifndef MEMGRAPH_STORAGE_VERTEX_HPP
-#define MEMGRAPH_STORAGE_VERTEX_HPP
-
-#include "record.hpp"
-#include "edge.hpp"
-
-struct Vertex : Record
-{
-    std::list<Edge*> out;
-
-    std::string name;
-};
-
-#endif
--- a/storage/visible.hpp
+++ b/storage/visible.hpp
@ -0,0 +1,52 @@
+#ifndef MEMGRAPH_STORAGE_VISIBLE_HPP
+#define MEMGRAPH_STORAGE_VISIBLE_HPP
+
+#include "transaction/transaction.hpp"
+#include "model/record.hpp"
+#include "model/vertex.hpp"
+#include "model/edge.hpp"
+
+template <class T,
+          class id_t,
+          class lock_t>
+bool visible(const Record<T, id_t, lock_t>& r, const Transaction<id_t>& t)
+{
+    // Mike Olson says 17 march 1993: the tests in this routine are correct;
+    // if you think they're not, you're wrong, and you should think about it
+    // again. i know, it happened to me.
+    
+    return ((r.xmin() == t.id &&        // inserted by the current transaction
+        r.cmin() < t.cid &&             // before this command, and
+         (r.xmax() == 0 ||              // the row has not been deleted, or
+          (r.xmax() == t.id &&          // it was deleted by the current
+                                        // transaction
+           r.cmax() >= t.cid)))         // but not before this command,
+        ||                              // or
+         (t.committed(r.xmin()) &&      // the record was inserted by a
+                                        // committed transaction, and
+          (r.xmax() > 0 ||              // the record has not been deleted, or
+           (r.xmax() == t.id &&         // the row is being deleted by this
+                                        // transaction
+            r.cmax() >= t.cid) ||       // but it's not deleted "yet", or
+           (r.xmax() != t.id &&         // the row was deleted by another
+                                        // transaction
+            !t.committed(r.xmax()))))); // that has not been committed
+}
+
+// inspects the record change history and returns the record version visible
+// to the current transaction if it exists, otherwise it returns nullptr
+template <class T,
+          class id_t,
+          class lock_t>
+T* max_visible(Record<T, id_t, lock_t>* record, const Transaction<id_t>& t)
+{
+    // move down through the versions of the nodes until you find the first
+    // one visible to this transaction
+    while(record != nullptr && !visible(*record, t))
+        record = record->newer();
+
+    // if no visible nodes were found, return nullptr
+    return record == nullptr ? record : &record->derived();
+}
+
+#endif
--- a/sync/caslock.hpp
+++ b/sync/caslock.hpp
@ -0,0 +1,37 @@
+#ifndef MEMGRAPH_UTILS_SYNC_CASLOCK_HPP
+#define MEMGRAPH_UTILS_SYNC_CASLOCK_HPP
+
+#include <atomic>
+#include <unistd.h>
+
+class CasLock
+{
+public:
+
+    void lock()
+    {
+        bool locked = false;
+
+        while(!lock_flag.compare_exchange_weak(locked, true,
+                                               std::memory_order_release,
+                                               std::memory_order_relaxed))
+        {
+            usleep(250);
+        }
+    }
+
+    void unlock()
+    {
+        lock_flag.store(0, std::memory_order_release);
+    }
+
+    bool locked()
+    {
+        return lock_flag.load(std::memory_order_relaxed);
+    }
+
+private:
+    std::atomic<bool> lock_flag;
+};
+
+#endif
--- a/test_skip.cpp
+++ b/test_skip.cpp
@ -0,0 +1,50 @@
+#include <iostream>
+
+#include "data_structures/skiplist/skiplist.hpp"
+
+
+int main(void)
+{
+    xorshift::init();
+    SkipList<int, int> skiplist;
+
+    int k1 = 1;
+    int i1 = 10;
+
+    int k2 = 2;
+    int i2 = 20;
+
+    std::cout << "find: " << k1 << " => " << (skiplist.find(&k1) == nullptr) << std::endl;
+    std::cout << skiplist.insert(&k1, &i1) << std::endl;
+    std::cout << "find: " << k1 << " => " << *skiplist.find(&k1)->item << std::endl;
+    std::cout << "find: " << k2 << " => " << (skiplist.find(&k2) == nullptr) << std::endl;
+    std::cout << skiplist.insert(&k2, &i2) << std::endl;
+    std::cout << skiplist.insert(&k2, &i2) << std::endl;
+    std::cout << skiplist.insert(&k1, &i1) << std::endl;
+    std::cout << skiplist.insert(&k2, &i2) << std::endl;
+    std::cout << "DELETE K1 " << skiplist.remove(&k1) << std::endl;
+    std::cout << "find: " << k1 << " => " << (skiplist.find(&k1) == nullptr) << std::endl;
+    std::cout << "find: " << k2 << " => " << *skiplist.find(&k2)->item << std::endl;
+    
+    std::cout << skiplist.size() << std::endl;
+
+    auto* node = skiplist.header.load(std::memory_order_consume);
+
+    for(size_t i = 0; i < skiplist.size(); ++i)
+    {
+        node = node->forward(0);
+
+        if(node == nullptr)
+            std::cout << "node == nullptr" << std::endl;
+
+        if(node->key == nullptr)
+            std::cout << "node->key == nullptr" << std::endl;
+
+        if(node->item == nullptr)
+            std::cout << "node->item == nullptr" << std::endl;
+
+        std::cout << *node->key << " => " << *node->item << std::endl;
+    }
+
+    return 0;
+}
--- a/transaction/transaction.hpp
+++ b/transaction/transaction.hpp
@ -8,18 +8,29 @@ template <class id_t>
 struct Transaction
 {
    Transaction(id_t id, std::vector<id_t> active)
-        : id(id), active(std::move(active)) {}
+        : id(id), cid(1), active(std::move(active)) {}

    // index of this transaction
    id_t id;

+    // index of the current command in the current transaction;
+    uint8_t cid;
+
    // the ids of the currently active transactions used by the mvcc
-    // implementation for snapshot isolation
+    // implementation for snapshot transaction isolation.
+    // std::vector is much faster than std::set for fewer number of items
+    // we don't expect the number of active transactions getting too large.
    std::vector<id_t> active;

-    bool operator<(const Transaction<id_t>& rhs)
+    // check weather the transaction with the xid looks committed from the
+    // database snapshot given to this transaction
+    bool committed(id_t xid)
    {
-        return id < rhs.id; 
+        for(size_t i = 0; i < active.size(); ++i)
+            if(xid < active[i])
+                return false;
+
+        return true;
    }
 };

--- a/transaction/transaction_engine.hpp
+++ b/transaction/transaction_engine.hpp
--- a/transaction/transaction_log.hpp
+++ b/transaction/transaction_log.hpp
@ -1,5 +1,13 @@
 #ifndef MEMGRAPH_TRANSACTION_TRANSACTION_LOG_HPP
 #define MEMGRAPH_TRANSACTION_TRANSACTION_LOG_HPP

+#include <cstdlib>
+
+template <class id_t>
+class TransactionLog
+{
+
+private:
+}

 #endif