Finished stack allocator.

2016-09-15 20:19:31 +01:00 · 2016-09-15 20:19:31 +01:00 · fd202b1ed7
commit fd202b1ed7
parent c67742e3fa
7 changed files with 182 additions and 55 deletions
--- a/build/compiled/cpu/hardcode/9470552549410791677.cpp
+++ b/build/compiled/cpu/hardcode/9470552549410791677.cpp
@ -5,6 +5,7 @@

 #include "query_engine/i_code_cpu.hpp"
 #include "storage/model/properties/all.hpp"
+#include "utils/memory/stack_allocator.hpp"

 using std::cout;
 using std::endl;
@ -71,6 +72,8 @@ bool vertex_filter_contained(DbAccessor &t, VertexAccessor &v, Node *before)

 void astar(DbAccessor &t, code_args_t &args, STREAM &stream)
 {
+    StackAllocator stack;
+
    VertexPropertyType<Double> tkey = t.vertex_property_key<Double>("score");

    auto cmp = [](Node *left, Node *right) { return left->cost > right->cost; };
@ -84,7 +87,7 @@ void astar(DbAccessor &t, code_args_t &args, STREAM &stream)
    }

    start_vr.get().fill();
-    Node *start = new Node(start_vr.take(), 0, tkey);
+    Node *start = stack.make<Node>(start_vr.take(), 0, tkey);
    queue.push(start);
    all_nodes.push_back(start);

@ -106,22 +109,24 @@ void astar(DbAccessor &t, code_args_t &args, STREAM &stream)
            VertexAccessor va = edge.to();
            if (vertex_filter_contained(t, va, now)) {
                auto cost = 1 - va.at(tkey).get()->value();
-                Node *n = new Node(va, now->cost + cost, now, tkey);
+                Node *n = stack.make<Node>(va, now->cost + cost, now, tkey);
                queue.push(n);
                all_nodes.push_back(n);
            }
        });
    } while (!queue.empty());

-	stream.write_field("n");
+    stream.write_field("n");
    stream.write_record();
    stream.write_list_header(0);
    stream.chunk();
    stream.write_meta("r");

-    for (auto n : all_nodes) {
-        delete n;
-    }
+    // for (auto n : all_nodes) {
+    //     delete n;
+    // }
+
+    stack.free();
 }

 class CodeCPU : public ICodeCPU<STREAM>
--- a/config/memgraph.yaml
+++ b/config/memgraph.yaml
@ -2,7 +2,7 @@ compile_cpu_path: "./compiled/cpu/"
 template_cpu_cpp_path: "./template/template_code_cpu.cpp"
 barrier_template_cpu_cpp_path: "./template/barrier_template_code_cpu.cpp"
 template_cpu_hpp_path: "./template/template_code_cpu.hpp"
-snapshots_path: "./snapshots"
-cleaning_cycle_sec: "60"
-snapshot_cycle_sec: "60"
+snapshots_path: "snapshots"
+cleaning_cycle_sec: "300"
+snapshot_cycle_sec: "3600"
 max_retained_snapshots: "3"
--- a/include/query_engine/exceptions/exceptions.hpp
+++ b/include/query_engine/exceptions/exceptions.hpp
@ -4,25 +4,36 @@

 class QueryEngineException : public BasicException
 {
+public:
    using BasicException::BasicException;
 };

 class CppGeneratorException : public BasicException
 {
+public:
    using BasicException::BasicException;
 };

 class DecoderException : public BasicException
 {
+public:
    using BasicException::BasicException;
 };

 class NotYetImplemented : public BasicException
 {
+public:
    using BasicException::BasicException;
 };

 class NonExhaustiveSwitch : public BasicException
 {
+public:
+    using BasicException::BasicException;
+};
+
+class OutOfMemory : public BasicException
+{
+public:
    using BasicException::BasicException;
 };
--- a/include/utils/memory/block_allocator.hpp
+++ b/include/utils/memory/block_allocator.hpp
@ -10,17 +10,11 @@ class BlockAllocator
 {
    struct Block
    {
-        Block()
-        {
-            data = malloc(block_size);
-        }
+        Block() { data = malloc(block_size); }

-        Block(void* ptr)
-        {
-            data = ptr;
-        }
+        Block(void *ptr) { data = ptr; }

-        void* data;
+        void *data;
    };

 public:
@ -28,24 +22,30 @@ public:

    BlockAllocator(size_t capacity = 0)
    {
-        for(size_t i = 0; i < capacity; ++i)
+        for (size_t i = 0; i < capacity; ++i)
            blocks.emplace_back();
    }

-    void* acquire()
+    ~BlockAllocator()
    {
-        if(blocks.size() == 0)
-            blocks.emplace_back();
+        for (auto b : blocks) {
+            free(b.data);
+        }
+        blocks.clear();
+    }

+    // Returns nullptr on no memory.
+    void *acquire()
+    {
+        if (blocks.size() == 0) blocks.emplace_back();
+
+        auto ptr = blocks.back().data;
        Auto(blocks.pop_back());
-        return blocks.back().data;
-    }
-    
-    void release(void* ptr)
-    {
-        blocks.emplace_back(ptr);
+        return ptr;
    }

+    void release(void *ptr) { blocks.emplace_back(ptr); }
+
 private:
    std::vector<Block> blocks;
 };
--- a/include/utils/memory/stack_allocator.hpp
+++ b/include/utils/memory/stack_allocator.hpp
@ -0,0 +1,110 @@
+#pragma once
+
+#include <cmath>
+#include "query_engine/exceptions/exceptions.hpp"
+#include "utils/memory/block_allocator.hpp"
+
+// Useful for allocating memory which can be freed with one call.
+// Most performant for data which need to be present to the end.
+class StackAllocator
+{
+    static constexpr size_t page_size = 64 * 1024;
+
+public:
+    ~StackAllocator() { free(); }
+
+    // Allocates memory for object of type T.
+    // Retruns pointer to memory for it.
+    template <class T>
+    inline T *allocate()
+    {
+        // If size is bigger than pages_size then this do-whil will never end
+        // until it eats all the memory.
+        static_assert(sizeof(T) <= page_size);
+        do {
+            // Mask which has log2(alignof(T)) lower bits setted to 0 and the
+            // rest to 1.
+            // example:
+            // alignof(T)==8 => mask=0xfffffffffffffff8;
+            // This will be calculated in compile time.
+            size_t mask = ~(((size_t)alignof(T)) - 1);
+
+            // aligned contains ptr aligned to alignof(T).
+            // There are two types of head ptr:
+            // a) aligned to alignof(T)
+            // b) not aligned to alignof(T)
+            // For a) whe want for aligned to be equal to head, and for b)
+            // aligned shuold be first aligned ptr greater than head.
+            //
+            // head - 1 => turns a) into b) now whe only have to get first
+            // aligned ptr greater than (head - 1).
+            //
+            // (head - 1) & mask => will produce first smaller than head ptr
+            // aligned to alignof(T).
+            //
+            // ( (head - 1) & mask ) + alignof(T) => will produce first grater
+            // or equal than head ptr aligned to alignof(T).
+            char *aligned =
+                (char *)(((((size_t)head) - 1) & mask) + alignof(T));
+
+            // New head which points to unallocated memory points to first byte
+            // after space for object T.
+            char *new_head = aligned + sizeof(T);
+
+            // If the new_head is greater than end that means that there isn't
+            // enough space for object T in current block of memory.
+            if (LIKELY(new_head <= end)) {
+
+                // All is fine, head can become new_head
+                head = new_head;
+
+                // Returns aligned ptr with enough space for object T.
+                return (T *)aligned;
+            }
+
+            // There isn't enough free space so whe must allocate more.
+            void *alloc = blocks.acquire();
+
+            // Check if there are memory. If not throw exception rather than
+            // return nullptr.
+            if (UNLIKELY(alloc == nullptr))
+                throw new OutOfMemory("BlockAllocator returned nullptr");
+
+            // Remember that whee allocated memory so that whe can free-it
+            // after.
+            allocated_blocks.push_back(alloc);
+
+            // Set new head, the old one isn't needed anymore.
+            head = (char *)alloc;
+
+            // Update end to point to first byte after newly allocated memory.
+            end = head + page_size;
+
+            // After allocating new memory lets try again to "allocate" place
+            // for T.
+        } while (true);
+    }
+
+    template <class T, class... Args>
+    inline T *make(Args &&... args)
+    {
+        auto ptr = allocate<T>();
+        new (ptr) T(std::forward<Args>(args)...);
+        return ptr;
+    }
+
+    // Relases all memory.
+    void free()
+    {
+        while (allocated_blocks.size() > 0) {
+            blocks.release(allocated_blocks.back());
+            allocated_blocks.pop_back();
+        }
+    }
+
+private:
+    BlockAllocator<page_size> blocks;
+    std::vector<void *> allocated_blocks;
+    char *head = {nullptr};
+    char *end = {nullptr};
+};
--- a/poc/astar_query.cpp
+++ b/poc/astar_query.cpp
@ -43,27 +43,30 @@ int main(int argc, char **argv)
    Db db("astar");
    barrier::CodeCPU cp;
    int bench_n = 1000;
-    double sum = 0;
-    for (int i = 0; i < bench_n; i++) {
-        auto start_vertex_index =
-            std::rand() % db.graph.vertices.access().size();

-        auto begin = clock();
+    do {
+        double sum = 0;
+        for (int i = 0; i < bench_n; i++) {
+            auto start_vertex_index =
+                std::rand() % db.graph.vertices.access().size();

-        code_args_t args;
-        args.push_back(Property(Int64(start_vertex_index), Int64::type));
+            auto begin = clock();

-        cp.run(barrier::trans(db), args, std::cout);
+            code_args_t args;
+            args.push_back(Property(Int64(start_vertex_index), Int64::type));

-        clock_t end = clock();
+            cp.run(barrier::trans(db), args, std::cout);

-        double elapsed_ms = (double(end - begin) / CLOCKS_PER_SEC) * 1000;
-        sum += elapsed_ms;
-    }
+            clock_t end = clock();

-    std::cout << "\nSearch for best " << barrier::limit
-              << " results has runing time of:\n    avg: " << sum / bench_n
-              << " [ms]\n";
+            double elapsed_ms = (double(end - begin) / CLOCKS_PER_SEC) * 1000;
+            sum += elapsed_ms;
+        }
+
+        std::cout << "\nSearch for best " << barrier::limit
+                  << " results has runing time of:\n    avg: " << sum / bench_n
+                  << " [ms]\n";
+    } while (true);

    return 0;
 }
--- a/poc/queries/astar.cpp
+++ b/poc/queries/astar.cpp
@ -5,6 +5,7 @@

 #include "query_engine/i_code_cpu.hpp"
 #include "storage/model/properties/all.hpp"
+#include "utils/memory/stack_allocator.hpp"

 using std::cout;
 using std::endl;
@ -15,8 +16,8 @@ using std::endl;
 namespace barrier
 {

-// using STREAM = std::ostream;
-using STREAM = RecordStream<::io::Socket>;
+using STREAM = std::ostream;
+// using STREAM = RecordStream<::io::Socket>;

 constexpr size_t max_depth = 3;
 constexpr size_t limit = 10;
@ -71,22 +72,21 @@ bool vertex_filter_contained(DbAccessor &t, VertexAccessor &v, Node *before)

 void astar(DbAccessor &t, code_args_t &args, STREAM &stream)
 {
+    StackAllocator stack;
    VertexPropertyType<Double> tkey = t.vertex_property_key<Double>("score");

    auto cmp = [](Node *left, Node *right) { return left->cost > right->cost; };
    std::priority_queue<Node *, std::vector<Node *>, decltype(cmp)> queue(cmp);
-    std::vector<Node *> all_nodes;

    auto start_vr = t.vertex_find(Id(args[0].as<Int64>().value()));
    if (!start_vr.is_present()) {
-        stream.write_failure({{}});
+        // stream.write_failure({{}});
        return;
    }

    start_vr.get().fill();
-    Node *start = new Node(start_vr.take(), 0, tkey);
+    Node *start = new (stack.allocate<Node>()) Node(start_vr.take(), 0, tkey);
    queue.push(start);
-    all_nodes.push_back(start);

    int count = 0;
    do {
@ -94,7 +94,7 @@ void astar(DbAccessor &t, code_args_t &args, STREAM &stream)
        queue.pop();

        if (max_depth <= now->depth) {
-            stream.write_success_empty();
+            // stream.write_success_empty();
            // best.push_back(now);
            count++;
            if (count >= limit) {
@ -107,16 +107,14 @@ void astar(DbAccessor &t, code_args_t &args, STREAM &stream)
            VertexAccessor va = edge.to();
            if (vertex_filter_contained(t, va, now)) {
                auto cost = 1 - va.at(tkey).get()->value();
-                Node *n = new Node(va, now->cost + cost, now, tkey);
+                Node *n = new (stack.allocate<Node>())
+                    Node(va, now->cost + cost, now, tkey);
                queue.push(n);
-                all_nodes.push_back(n);
            }
        });
    } while (!queue.empty());

-    for (auto n : all_nodes) {
-        delete n;
-    }
+    stack.free();
 }

 class CodeCPU : public ICodeCPU<STREAM>