From ad740e4ae25402699c94e24d25463316ab7f613d Mon Sep 17 00:00:00 2001 From: Teon Banek Date: Wed, 12 Feb 2020 10:27:59 +0100 Subject: [PATCH] Add _mgp.Graph and _mgp.VerticesIterator to embedded Python Reviewers: llugovic, ipaljak, mferencevic Reviewed By: ipaljak Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2669 --- include/mgp.py | 44 ++++++- src/memgraph_init.cpp | 2 + src/query/procedure/py_module.cpp | 187 ++++++++++++++++++++++++++++++ src/query/procedure/py_module.hpp | 12 ++ 4 files changed, 242 insertions(+), 3 deletions(-) diff --git a/include/mgp.py b/include/mgp.py index 56988b47a..ad6d4d74b 100644 --- a/include/mgp.py +++ b/include/mgp.py @@ -17,6 +17,8 @@ This module provides the API for usage in custom openCypher procedures. from collections import namedtuple import typing +import _mgp + class Label: '''Label of a Vertex.''' @@ -166,6 +168,9 @@ class Vertex: invalid Vertex instance will raise InvalidVertexError. ''' + def __init__(self, vertex): + raise NotImplementedError() + @property def id(self) -> VertexId: '''Raise InvalidVertexError.''' @@ -244,14 +249,42 @@ class InvalidProcCtxError(Exception): class Vertices: '''Iterable over vertices in a graph.''' + __slots__ = ('_graph',) + + def __init__(self, graph): + if not isinstance(graph, _mgp.Graph): + raise TypeError("Expected '_mgp.Graph', got '{}'".fmt(type(graph))) + self._graph = graph + + def is_valid(self) -> bool: + '''Return True if `self` is in valid context and may be used.''' + return self._graph.is_valid() def __iter__(self) -> typing.Iterable[Vertex]: '''Raise InvalidProcCtxError if context is invalid.''' - pass + if not self.is_valid(): + raise InvalidProcCtxError() + vertices_it = self._graph.iter_vertices() + vertex = vertices_it.get() + while vertex is not None: + yield Vertex(vertex) + if not self.is_valid(): + raise InvalidProcCtxError() + vertex = vertices_it.next() class Graph: '''State of the graph database in current ProcCtx.''' + __slots__ = ('_graph',) + + def __init__(self, graph): + if not isinstance(graph, _mgp.Graph): + raise TypeError("Expected '_mgp.Graph', got '{}'".format(type(graph))) + self._graph = graph + + def is_valid(self) -> bool: + '''Return True if `self` is in valid context and may be used.''' + return self._graph.is_valid() def get_vertex_by_id(self, vertex_id: VertexId) -> Vertex: '''Return the Vertex corresponding to given vertex_id from the graph. @@ -263,7 +296,10 @@ class Graph: Raise IndexError if unable to find the given vertex_id. Raise InvalidProcCtxError if context is invalid. ''' - pass + if not self.is_valid(): + raise InvalidProcCtxError() + vertex = self._graph.get_vertex_by_id(vertex_id) + return Vertex(vertex) @property def vertices(self) -> Vertices: @@ -275,7 +311,9 @@ class Graph: Raise InvalidProcCtxError if context is invalid. ''' - pass + if not self.is_valid(): + raise InvalidProcCtxError() + return Vertices(self._graph) class ProcCtx: diff --git a/src/memgraph_init.cpp b/src/memgraph_init.cpp index 28006833b..23a697b7e 100644 --- a/src/memgraph_init.cpp +++ b/src/memgraph_init.cpp @@ -6,6 +6,7 @@ #include "glue/communication.hpp" #include "py/py.hpp" #include "query/exceptions.hpp" +#include "query/procedure/py_module.hpp" #include "requests/requests.hpp" #include "storage/v2/view.hpp" #include "utils/signals.hpp" @@ -230,6 +231,7 @@ int WithInit(int argc, char **argv, // Set program name, so Python can find its way to runtime libraries relative // to executable. Py_SetProgramName(program_name); + PyImport_AppendInittab("_mgp", &query::procedure::PyInitMgpModule); Py_InitializeEx(0 /* = initsigs */); PyEval_InitThreads(); Py_BEGIN_ALLOW_THREADS; diff --git a/src/query/procedure/py_module.cpp b/src/query/procedure/py_module.cpp index 40b30c680..402091377 100644 --- a/src/query/procedure/py_module.cpp +++ b/src/query/procedure/py_module.cpp @@ -50,4 +50,191 @@ py::Object MgpValueToPyObject(const mgp_value &value) { } } +// Definitions of types wrapping C API types +// +// These should all be in the private `_mgp` Python module, which will be used +// by the `mgp` to implement the user friendly Python API. + +// Wraps mgp_graph in a PyObject. +// +// Executing a `CALL python_module.procedure(...)` in openCypher should +// instantiate exactly 1 mgp_graph instance. We will rely on this assumption in +// order to test for validity of usage. The idea is to clear the `graph` to +// `nullptr` after the execution completes. If a user stored a reference to +// `_mgp.Graph` in their global Python state, then we are no longer working with +// a valid graph so `nullptr` will catch this. `_mgp.Graph` provides `is_valid` +// method for checking this by our higher level API in `mgp` module. Python only +// does shallow copies by default, and we do not provide deep copy of +// `_mgp.Graph`, so this validity concept should work fine. +struct PyGraph { + PyObject_HEAD + const mgp_graph *graph; + mgp_memory *memory; +}; + +struct PyVerticesIterator { + PyObject_HEAD + mgp_vertices_iterator *it; + PyGraph *py_graph; +}; + +void PyVerticesIteratorDealloc(PyVerticesIterator *self) { + CHECK(self->it); + CHECK(self->py_graph); + // Avoid invoking `mgp_vertices_iterator_destroy` if we are not in valid + // execution context. The query execution should free all memory used during + // execution, so we may cause a double free issue. + if (self->py_graph->graph) mgp_vertices_iterator_destroy(self->it); + Py_DECREF(self->py_graph); +} + +PyObject *PyVerticesIteratorGet(PyVerticesIterator *self, + PyObject *Py_UNUSED(ignored)) { + CHECK(self->it); + CHECK(self->py_graph); + CHECK(self->py_graph->graph); + const auto *vertex = mgp_vertices_iterator_get(self->it); + if (!vertex) Py_RETURN_NONE; + // TODO: Wrap mgp_vertex_copy(vertex) into _mgp.Vertex and return it. + PyErr_SetString(PyExc_NotImplementedError, "get"); + return nullptr; +} + +PyObject *PyVerticesIteratorNext(PyVerticesIterator *self, + PyObject *Py_UNUSED(ignored)) { + CHECK(self->it); + CHECK(self->py_graph); + CHECK(self->py_graph->graph); + const auto *vertex = mgp_vertices_iterator_next(self->it); + if (!vertex) Py_RETURN_NONE; + // TODO: Wrap mgp_vertex_copy(vertex) into _mgp.Vertex and return it. + PyErr_SetString(PyExc_NotImplementedError, "next"); + return nullptr; +} + +static PyMethodDef PyVerticesIteratorMethods[] = { + {"get", reinterpret_cast(PyVerticesIteratorGet), METH_NOARGS, + "Get the current vertex pointed to by the iterator or return None."}, + {"next", reinterpret_cast(PyVerticesIteratorNext), METH_NOARGS, + "Advance the iterator to the next vertex and return it."}, + {nullptr}, +}; + +static PyTypeObject PyVerticesIteratorType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "_mgp.VerticesIterator", + .tp_doc = "Wraps struct mgp_vertices_iterator.", + .tp_basicsize = sizeof(PyVerticesIterator), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = PyType_GenericNew, + .tp_methods = PyVerticesIteratorMethods, + .tp_dealloc = reinterpret_cast(PyVerticesIteratorDealloc), +}; + + +PyObject *PyGraphIsValid(PyGraph *self, PyObject *Py_UNUSED(ignored)) { + return PyBool_FromLong(!!self->graph); +} + +PyObject *PyGraphGetVertexById(PyGraph *self, PyObject *args) { + CHECK(self->graph); + CHECK(self->memory); + static_assert(std::is_same_v); + int64_t id; + if (!PyArg_ParseTuple(args, "l", &id)) return nullptr; + auto *vertex = + mgp_graph_get_vertex_by_id(self->graph, mgp_vertex_id{id}, self->memory); + if (!vertex) { + PyErr_SetString(PyExc_IndexError, + "Unable to find the vertex with given ID."); + return nullptr; + } + // TODO: Wrap into _mgp.Vertex and let it handle mgp_vertex_destroy via + // dealloc function. + mgp_vertex_destroy(vertex); + PyErr_SetString(PyExc_NotImplementedError, "get_vertex_by_id"); + return nullptr; +} + +PyObject *PyGraphIterVertices(PyGraph *self, PyObject *Py_UNUSED(ignored)) { + CHECK(self->graph); + CHECK(self->memory); + auto *vertices_it = mgp_graph_iter_vertices(self->graph, self->memory); + if (!vertices_it) { + PyErr_SetString(PyExc_MemoryError, + "Unable to allocate mgp_vertices_iterator."); + return nullptr; + } + auto *py_vertices_it = + PyObject_New(PyVerticesIterator, &PyVerticesIteratorType); + if (!vertices_it) { + PyErr_SetString(PyExc_MemoryError, + "Unable to allocate _mgp.VerticesIterator."); + return nullptr; + } + py_vertices_it->it = vertices_it; + Py_INCREF(self); + py_vertices_it->py_graph = self; + return PyObject_Init(reinterpret_cast(py_vertices_it), + &PyVerticesIteratorType); +} + +static PyMethodDef PyGraphMethods[] = { + {"is_valid", reinterpret_cast(PyGraphIsValid), METH_NOARGS, + "Return True if Graph is in valid context and may be used."}, + {"get_vertex_by_id", reinterpret_cast(PyGraphGetVertexById), + METH_VARARGS, "Get the vertex or raise IndexError."}, + {"iter_vertices", reinterpret_cast(PyGraphIterVertices), + METH_NOARGS, "Return _mgp.VerticesIterator."}, + {nullptr}, +}; + +static PyTypeObject PyGraphType = { + PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name = "_mgp.Graph", + .tp_doc = "Wraps struct mgp_graph.", + .tp_basicsize = sizeof(PyGraph), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = PyType_GenericNew, + .tp_methods = PyGraphMethods, +}; + +PyObject *MakePyGraph(const mgp_graph *graph, mgp_memory *memory) { + auto *py_graph = PyObject_New(PyGraph, &PyGraphType); + if (!py_graph) return nullptr; + py_graph->graph = graph; + py_graph->memory = memory; + return PyObject_Init(reinterpret_cast(py_graph), &PyGraphType); +} + +static PyModuleDef PyMgpModule = { + PyModuleDef_HEAD_INIT, + .m_name = "_mgp", + .m_doc = "Contains raw bindings to mg_procedure.h C API.", + .m_size = -1, +}; + +PyObject *PyInitMgpModule() { + if (PyType_Ready(&PyVerticesIteratorType) < 0) return nullptr; + if (PyType_Ready(&PyGraphType) < 0) return nullptr; + PyObject *mgp = PyModule_Create(&PyMgpModule); + if (!mgp) return nullptr; + Py_INCREF(&PyVerticesIteratorType); + if (PyModule_AddObject( + mgp, "VerticesIterator", + reinterpret_cast(&PyVerticesIteratorType)) < 0) { + Py_DECREF(&PyVerticesIteratorType); + Py_DECREF(mgp); + return nullptr; + } + Py_INCREF(&PyGraphType); + if (PyModule_AddObject(mgp, "Graph", + reinterpret_cast(&PyGraphType)) < 0) { + Py_DECREF(&PyGraphType); + Py_DECREF(mgp); + return nullptr; + } + return mgp; +} + } // namespace query::procedure diff --git a/src/query/procedure/py_module.hpp b/src/query/procedure/py_module.hpp index c39b48014..12ebb2fe9 100644 --- a/src/query/procedure/py_module.hpp +++ b/src/query/procedure/py_module.hpp @@ -4,10 +4,22 @@ #include "py/py.hpp" +struct mgp_graph; +struct mgp_memory; struct mgp_value; namespace query::procedure { py::Object MgpValueToPyObject(const mgp_value &); +/// Create the _mgp module for use in embedded Python. +/// +/// The function is to be used before Py_Initialize via the following code. +/// +/// PyImport_AppendInittab("_mgp", &query::procedure::PyInitMgpModule); +PyObject *PyInitMgpModule(); + +/// Create an instance of _mgp.Graph class. +PyObject *MakePyGraph(const mgp_graph *, mgp_memory *); + } // namespace query::procedure