Implement the PageRank module
Reviewers: teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2730
This commit is contained in:
parent
b7738c64b3
commit
e5b3414335
@ -439,12 +439,13 @@ class Record:
|
||||
|
||||
class Vertices:
|
||||
'''Iterable over vertices in a graph.'''
|
||||
__slots__ = ('_graph',)
|
||||
__slots__ = ('_graph', '_len')
|
||||
|
||||
def __init__(self, graph):
|
||||
if not isinstance(graph, _mgp.Graph):
|
||||
raise TypeError("Expected '_mgp.Graph', got '{}'".format(type(graph)))
|
||||
self._graph = graph
|
||||
self._len = None
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
# This is the same as the shallow copy, because we want to share the
|
||||
@ -468,6 +469,18 @@ class Vertices:
|
||||
raise InvalidContextError()
|
||||
vertex = vertices_it.next()
|
||||
|
||||
def __contains__(self, vertex):
|
||||
try:
|
||||
_ = self.graph.get_vertex_by_id(vertex.id)
|
||||
return True
|
||||
except IndexError:
|
||||
return False
|
||||
|
||||
def __len__(self):
|
||||
if not self._len:
|
||||
self._len = sum(1 for _ in self)
|
||||
return self._len
|
||||
|
||||
|
||||
class Graph:
|
||||
'''State of the graph database in current ProcCtx.'''
|
||||
|
134
query_modules/pagerank.py
Normal file
134
query_modules/pagerank.py
Normal file
@ -0,0 +1,134 @@
|
||||
import mgp
|
||||
import networkx as nx
|
||||
from itertools import chain
|
||||
|
||||
|
||||
class VertexDictionary:
|
||||
def __init__(self, graph, prop):
|
||||
self.graph = graph
|
||||
self.prop = prop
|
||||
self.len = None
|
||||
|
||||
def get(self, vertex, default=None):
|
||||
return vertex.properties.get(self.prop, default=default)
|
||||
|
||||
def items(self):
|
||||
for v in self.graph.vertices:
|
||||
if self.prop in v.properties:
|
||||
yield v, v.properties[self.prop]
|
||||
|
||||
def keys(self):
|
||||
for k, v in self.items():
|
||||
yield k
|
||||
|
||||
def values(self):
|
||||
for k, v in self.items():
|
||||
yield v
|
||||
|
||||
def __len__(self):
|
||||
if not self.len:
|
||||
self.len = sum(1 for _ in self.items())
|
||||
return self.len
|
||||
|
||||
def __iter__(self):
|
||||
for k, v in self.items():
|
||||
yield k
|
||||
|
||||
def __getitem__(self, vertex):
|
||||
try:
|
||||
return vertex.properties[self.prop]
|
||||
except KeyError:
|
||||
raise KeyError(("Vertex {} doesn\t have the required " +
|
||||
"property '{}'").format(vertex.id, self.prop))
|
||||
|
||||
def __contains__(self, vertex):
|
||||
return vertex in self.graph.vertices and self.prop in vertex.properties
|
||||
|
||||
|
||||
@mgp.read_proc
|
||||
def pagerank(ctx: mgp.ProcCtx,
|
||||
alpha: mgp.Number = 0.85,
|
||||
personalization: mgp.Nullable[str] = None,
|
||||
max_iter: int = 100,
|
||||
tol: mgp.Number = 1e-06,
|
||||
nstart: mgp.Nullable[str] = None,
|
||||
weight: mgp.Nullable[str] = 'weight',
|
||||
dangling: mgp.Nullable[str] = None
|
||||
) -> mgp.Record(node=mgp.Vertex, rank=float):
|
||||
'''Run the PageRank algorithm on the whole graph.
|
||||
|
||||
The available parameters are:
|
||||
|
||||
- `alpha` -- Damping parameter.
|
||||
|
||||
- `personalization` -- The "personalization vector". A string specifying
|
||||
the property that will be looked up for every node to give the
|
||||
personalization value. If a node doesn't have the specified property, its
|
||||
personalization value will be zero. By default, a uniform distribution is
|
||||
used.
|
||||
|
||||
- `max_iter` -- Maximum number of iterations in the power method eigenvalue
|
||||
solver.
|
||||
|
||||
- `tol` -- Error tolerance used to check for convergence in the power
|
||||
eigenvalue method solver.
|
||||
|
||||
- `nstart` -- A string specifying the property that will be looked up for
|
||||
every node to give the starting value for the iteration.
|
||||
|
||||
- `weight` -- A string specifying the property that will be looked up for
|
||||
every edge to give the weight. If None or if the property doesn't exist,
|
||||
weights are set to 1.
|
||||
|
||||
- `dangling` -- The outedges to be assigned to any "dangling" nodes, i.e.,
|
||||
nodes without any outedges. A string specifying the property that will be
|
||||
looked up for every node to give the weight of the outedge that points to
|
||||
that node. By default, dangling nodes are given outedges according to the
|
||||
personalization vector. This must be selected to result in an irreducible
|
||||
transition matrix. It may be common to have the dangling dictionary be
|
||||
the same as the personalization dictionary.
|
||||
|
||||
Return a single record for every node. Each record has two fields, `node`
|
||||
and `rank`, which together give the calculated rank for the node.
|
||||
|
||||
As an example, the following openCypher query calculates the ranks of all
|
||||
the nodes in the graph using the PageRank algorithm. The personalization
|
||||
value for every node is taken from its property named 'personalization',
|
||||
while the `alpha` and `max_iter` parameters are set to 0.85 and 150
|
||||
respectively:
|
||||
|
||||
CALL pagerank.pagerank(0.85, 'personalization', 150) YIELD *;
|
||||
|
||||
'''
|
||||
def to_vertex_dictionary(prop):
|
||||
return None if prop is None else VertexDictionary(ctx.graph, prop)
|
||||
|
||||
def make_and_check_vertex(v):
|
||||
for prop in (personalization, nstart, dangling):
|
||||
if prop is None:
|
||||
continue
|
||||
if not isinstance(v.properties.get(prop, default=1), (int, float)):
|
||||
raise TypeError(("Property '{}' of vertex '{}' needs to " +
|
||||
"be a number").format(prop, v.id))
|
||||
return v
|
||||
|
||||
def make_and_check_edge(e):
|
||||
if (weight is not None and
|
||||
not isinstance(e.properties.get(weight, default=1), (int, float))):
|
||||
raise TypeError("Property '{}' of edge '{}' needs to be a number"
|
||||
.format(weight, e.id))
|
||||
return e.from_vertex, e.to_vertex, e.properties
|
||||
|
||||
g = nx.DiGraph()
|
||||
g.add_nodes_from(make_and_check_vertex(v) for v in ctx.graph.vertices)
|
||||
g.add_edges_from(make_and_check_edge(e)
|
||||
for v in ctx.graph.vertices
|
||||
for e in chain(v.in_edges, v.out_edges))
|
||||
|
||||
pg = nx.pagerank(g, alpha=alpha,
|
||||
personalization=to_vertex_dictionary(personalization),
|
||||
max_iter=max_iter, tol=tol,
|
||||
nstart=to_vertex_dictionary(nstart), weight=weight,
|
||||
dangling=to_vertex_dictionary(dangling))
|
||||
|
||||
return [mgp.Record(node=k, rank=v) for k, v in pg.items()]
|
Loading…
Reference in New Issue
Block a user