2017-08-03 18:08:39 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <experimental/tuple>
|
|
|
|
#include <iostream>
|
|
|
|
#include <numeric>
|
|
|
|
#include <random>
|
|
|
|
#include <tuple>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "graph.hpp"
|
|
|
|
|
|
|
|
namespace spinner {
|
|
|
|
// const for balancing penalty
|
|
|
|
double c = 2.0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the index of the maximum score in the given vector.
|
|
|
|
* If there are multiple minimums, one is chosen at random.
|
|
|
|
*/
|
|
|
|
auto MaxRandom(const std::vector<double> &scores) {
|
|
|
|
std::vector<size_t> best_indices;
|
|
|
|
double current_max = std::numeric_limits<double>::lowest();
|
|
|
|
|
|
|
|
for (size_t ind = 0; ind < scores.size(); ind++) {
|
|
|
|
if (scores[ind] > current_max) {
|
|
|
|
current_max = scores[ind];
|
|
|
|
best_indices.clear();
|
|
|
|
}
|
|
|
|
if (scores[ind] == current_max) {
|
|
|
|
best_indices.emplace_back(ind);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return best_indices[rand() % best_indices.size()];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2017-08-17 21:36:58 +08:00
|
|
|
* Returns the index of the best (highest scored) mnode
|
|
|
|
* for the given vertex. If there are multiple mnodes with
|
|
|
|
* the best score, vertex prefers to remain on the same mnode
|
2017-08-03 18:08:39 +08:00
|
|
|
* (if among the best), or one is chosen at random.
|
|
|
|
*
|
|
|
|
* @param distributed - the distributed system.
|
2017-08-17 21:36:58 +08:00
|
|
|
* @param vertex - the vertex which is being evaluated.
|
|
|
|
* @param penalties - a vector of penalties (per mnode).
|
|
|
|
* @param current_mnode - the mnode on which the given
|
|
|
|
* vertex is currently residing.
|
2017-08-03 18:08:39 +08:00
|
|
|
* @return - std::pair<int, std::vector<double>> which is a
|
2017-08-17 21:36:58 +08:00
|
|
|
* pair of (best mnode, score_per_mnode).
|
2017-08-03 18:08:39 +08:00
|
|
|
*/
|
2017-08-17 21:36:58 +08:00
|
|
|
auto BestMnode(const Distributed &distributed, const Vertex &vertex,
|
|
|
|
const std::vector<double> &penalties, int current_mnode) {
|
|
|
|
// scores per mnode
|
|
|
|
std::vector<double> scores(distributed.MnodeCount(), 0.0);
|
2017-08-03 18:08:39 +08:00
|
|
|
|
2017-08-17 21:36:58 +08:00
|
|
|
for (auto &edge : vertex.edges_in()) scores[edge.cur_mnid_] += 1.0;
|
|
|
|
for (auto &edge : vertex.edges_out()) scores[edge.cur_mnid_] += 1.0;
|
2017-08-03 18:08:39 +08:00
|
|
|
|
2017-08-17 21:36:58 +08:00
|
|
|
for (int mnode = 0; mnode < distributed.MnodeCount(); ++mnode) {
|
|
|
|
// normalize contribution of mnode over neighbourhood size
|
|
|
|
scores[mnode] /= vertex.edges_out().size() + vertex.edges_in().size();
|
2017-08-03 18:08:39 +08:00
|
|
|
// add balancing penalty
|
2017-08-17 21:36:58 +08:00
|
|
|
scores[mnode] -= penalties[mnode];
|
2017-08-03 18:08:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// pick the best destination, but prefer to stay if you can
|
|
|
|
size_t destination = MaxRandom(scores);
|
2017-08-17 21:36:58 +08:00
|
|
|
if (scores[current_mnode] == scores[destination])
|
|
|
|
destination = current_mnode;
|
2017-08-03 18:08:39 +08:00
|
|
|
|
|
|
|
return std::make_pair(destination, scores);
|
|
|
|
}
|
|
|
|
|
2017-08-17 21:36:58 +08:00
|
|
|
/** Indication if Spinner mnode penality is calculated based on
|
|
|
|
* vertex or edge mnode cardinalities */
|
2017-08-03 18:08:39 +08:00
|
|
|
enum class PenaltyType { Vertex, Edge };
|
|
|
|
|
2017-08-17 21:36:58 +08:00
|
|
|
/** Calcualtes Spinner penalties for mnodes in the given
|
2017-08-03 18:08:39 +08:00
|
|
|
* distributed system. */
|
|
|
|
auto Penalties(const Distributed &distributed,
|
|
|
|
PenaltyType penalty_type = PenaltyType::Edge) {
|
|
|
|
std::vector<double> penalties;
|
|
|
|
int64_t total_count{0};
|
|
|
|
|
2017-08-17 21:36:58 +08:00
|
|
|
for (const auto &mnode : distributed) {
|
|
|
|
int64_t mnode_count{0};
|
2017-08-03 18:08:39 +08:00
|
|
|
switch (penalty_type) {
|
|
|
|
case PenaltyType::Vertex:
|
2017-08-17 21:36:58 +08:00
|
|
|
mnode_count += mnode.VertexCount();
|
2017-08-03 18:08:39 +08:00
|
|
|
break;
|
|
|
|
case PenaltyType::Edge:
|
2017-08-17 21:36:58 +08:00
|
|
|
for (const auto &vertex_kv : mnode) {
|
|
|
|
// Spinner counts the edges on a mnode as the sum
|
|
|
|
// of degrees of vertices on that mnode. In that sense
|
2017-08-03 18:08:39 +08:00
|
|
|
// both incoming and outgoing edges are individually
|
|
|
|
// added...
|
2017-08-17 21:36:58 +08:00
|
|
|
mnode_count += vertex_kv.second.edges_out().size();
|
|
|
|
mnode_count += vertex_kv.second.edges_in().size();
|
2017-08-03 18:08:39 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2017-08-17 21:36:58 +08:00
|
|
|
total_count += mnode_count;
|
|
|
|
penalties.emplace_back(mnode_count);
|
2017-08-03 18:08:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &penalty : penalties)
|
2017-08-17 21:36:58 +08:00
|
|
|
penalty /= c * total_count / distributed.MnodeCount();
|
2017-08-03 18:08:39 +08:00
|
|
|
|
|
|
|
return penalties;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Do one spinner step (modifying the given distributed) */
|
|
|
|
void PerformSpinnerStep(Distributed &distributed) {
|
|
|
|
auto penalties = Penalties(distributed);
|
|
|
|
|
|
|
|
// here a strategy can be injected for limiting
|
|
|
|
// the number of movements performed in one step.
|
|
|
|
// limiting could be based on (for example):
|
2017-08-17 21:36:58 +08:00
|
|
|
// - limiting the number of movements per mnode
|
2017-08-03 18:08:39 +08:00
|
|
|
// - limiting only to movements that are above
|
|
|
|
// a treshold (score improvement or something)
|
2017-08-17 21:36:58 +08:00
|
|
|
// - not executing on all the mnodes (also prevents
|
2017-08-03 18:08:39 +08:00
|
|
|
// oscilations)
|
|
|
|
//
|
|
|
|
// in the first implementation just accumulate all
|
|
|
|
// the movements and execute together.
|
|
|
|
|
2017-08-17 21:36:58 +08:00
|
|
|
// relocation info: contains the address of the Vertex
|
|
|
|
// that needs to relocate and it's destination mnode
|
|
|
|
std::vector<std::pair<GlobalVertAddress, int>> movements;
|
2017-08-03 18:08:39 +08:00
|
|
|
|
2017-08-17 21:36:58 +08:00
|
|
|
for (const ShardedStorage &mnode : distributed)
|
|
|
|
for (const auto &gid_vertex_pair : mnode) {
|
|
|
|
// (best destination, scores) pair for vertex
|
2017-08-03 18:08:39 +08:00
|
|
|
std::pair<int, std::vector<double>> destination_scores =
|
2017-08-17 21:36:58 +08:00
|
|
|
BestMnode(distributed, gid_vertex_pair.second, penalties, mnode.mnid_);
|
|
|
|
if (destination_scores.first != mnode.mnid_)
|
|
|
|
movements.emplace_back(GlobalVertAddress(mnode.mnid_, gid_vertex_pair.first),
|
2017-08-03 18:08:39 +08:00
|
|
|
destination_scores.first);
|
|
|
|
}
|
|
|
|
|
|
|
|
// execute movements. it is likely that in the real system
|
|
|
|
// this will need to happen as a single db transaction
|
2017-08-17 21:36:58 +08:00
|
|
|
for (const auto &m : movements) distributed.MoveVertex(m.first, m.second);
|
2017-08-03 18:08:39 +08:00
|
|
|
}
|
|
|
|
} // namespace spinner
|