Make csv_to_snapshot more user friendly

Summary:
Time csv_to_snapshot conversion and log it.
Check if writing csv_to_snapshot failed.
Extract LoadConfig from memgraph_bolt to config.hpp.
Read memgraph config in csv_to_snapshot for snapshot_directory.
Rename csv_to_snapshot to mg_import_csv.
Add tests for tools.
Run tools tests in apollo.

Reviewers: mislav.bradac, florijan, mferencevic, buda

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D931
This commit is contained in:
Teon Banek 2017-10-25 15:28:10 +02:00
parent cfb8db9e20
commit df72861b90
19 changed files with 343 additions and 110 deletions

56
src/config.hpp Normal file
View File

@ -0,0 +1,56 @@
#pragma once
#include <experimental/filesystem>
#include <string>
#include <vector>
#include <gflags/gflags.h>
#include <glog/logging.h>
/// Reads the memgraph configuration files.
///
/// Load flags in this order, the last one has the highest priority:
/// 1) /etc/memgraph/memgraph.conf
/// 2) ~/.memgraph/config
/// 3) env - MEMGRAPH_CONFIG
void LoadConfig() {
namespace fs = std::experimental::filesystem;
std::vector<fs::path> configs = {fs::path("/etc/memgraph/memgraph.conf")};
if (getenv("HOME") != nullptr)
configs.emplace_back(fs::path(getenv("HOME")) /
fs::path(".memgraph/config"));
{
auto memgraph_config = getenv("MEMGRAPH_CONFIG");
if (memgraph_config != nullptr) {
auto path = fs::path(memgraph_config);
CHECK(fs::exists(path))
<< "MEMGRAPH_CONFIG environment variable set to nonexisting path: "
<< path.generic_string();
configs.emplace_back(path);
}
}
std::vector<std::string> flagfile_arguments;
for (const auto &config : configs)
if (fs::exists(config)) {
flagfile_arguments.emplace_back(
std::string("--flag-file=" + config.generic_string()));
}
int custom_argc = static_cast<int>(flagfile_arguments.size()) + 1;
char **custom_argv = new char *[custom_argc];
custom_argv[0] = strdup(std::string("memgraph").c_str());
for (int i = 0; i < static_cast<int>(flagfile_arguments.size()); ++i) {
custom_argv[i + 1] = strdup(flagfile_arguments[i].c_str());
}
// setup flags from config flags
gflags::ParseCommandLineFlags(&custom_argc, &custom_argv, false);
// unconsumed arguments have to be freed to avoid memory leak since they are
// strdup-ed.
for (int i = 0; i < custom_argc; ++i) free(custom_argv[i]);
delete[] custom_argv;
}

View File

@ -69,7 +69,7 @@ bool Snapshooter::Encode(const fs::path &snapshot_file,
}
buffer.WriteSummary(vertex_num, edge_num);
buffer.Close();
} catch (std::ifstream::failure e) {
} catch (const std::ifstream::failure &) {
if (fs::exists(snapshot_file) && !fs::remove(snapshot_file)) {
LOG(ERROR) << "Error while removing corrupted snapshot file: "
<< snapshot_file;
@ -79,7 +79,7 @@ bool Snapshooter::Encode(const fs::path &snapshot_file,
return true;
}
fs::path Snapshooter::GetSnapshotFileName(const fs::path &snapshot_folder) {
fs::path GetSnapshotFileName(const fs::path &snapshot_folder) {
std::string date_str =
Timestamp(Timestamp::now())
.to_string("{:04d}_{:02d}_{:02d}__{:02d}_{:02d}_{:02d}_{:05d}");

View File

@ -8,6 +8,11 @@ namespace fs = std::experimental::filesystem;
class GraphDbAccessor;
/**
* Returns path to new snapshot file in format snapshot_folder/timestamp.
*/
fs::path GetSnapshotFileName(const fs::path &snapshot_folder);
/**
* Class responsible for making snapshots. Snapshots are stored in folder
* memgraph/build/$snapshot_folder/$db_name using bolt protocol.
@ -29,11 +34,6 @@ class Snapshooter {
int snapshot_max_retained);
private:
/**
* Method returns path to new snapshot file in format
* memgraph/build/$snapshot_folder/$db_name/$timestamp
*/
fs::path GetSnapshotFileName(const fs::path &snapshot_folder);
/**
* Method used to keep given number of snapshots in snapshot folder. Newest
* max_retained_files snapshots are kept, other snapshots are deleted. If

View File

@ -6,18 +6,16 @@
#include "communication/bolt/v1/session.hpp"
#include "communication/server.hpp"
#include "config.hpp"
#include "io/network/network_endpoint.hpp"
#include "io/network/network_error.hpp"
#include "io/network/socket.hpp"
#include "utils/flag_validation.hpp"
#include "utils/scheduler.hpp"
#include "utils/signals/handler.hpp"
#include "utils/stacktrace.hpp"
#include "utils/sysinfo/memory.hpp"
#include "utils/terminate_handler.hpp"
#include "version.hpp"
namespace fs = std::experimental::filesystem;
@ -42,60 +40,14 @@ DEFINE_uint64(memory_warning_threshold, 1024,
"less available RAM available it will log a warning. Set to 0 to "
"disable.");
// Load flags in this order, the last one has the highest priority:
// 1) /etc/memgraph/memgraph.conf
// 2) ~/.memgraph/config
// 3) env - MEMGRAPH_CONFIG
// 4) command line flags
void LoadConfig(int &argc, char **&argv) {
std::vector<fs::path> configs = {fs::path("/etc/memgraph/memgraph.conf")};
if (getenv("HOME") != nullptr)
configs.emplace_back(fs::path(getenv("HOME")) /
fs::path(".memgraph/config"));
{
auto memgraph_config = getenv("MEMGRAPH_CONFIG");
if (memgraph_config != nullptr) {
auto path = fs::path(memgraph_config);
CHECK(fs::exists(path))
<< "MEMGRAPH_CONFIG environment variable set to nonexisting path: "
<< path.generic_string();
configs.emplace_back(path);
}
}
std::vector<std::string> flagfile_arguments;
for (const auto &config : configs)
if (fs::exists(config)) {
flagfile_arguments.emplace_back(
std::string("--flag-file=" + config.generic_string()));
}
int custom_argc = static_cast<int>(flagfile_arguments.size()) + 1;
char **custom_argv = new char *[custom_argc];
custom_argv[0] = strdup(std::string("memgraph").c_str());
for (int i = 0; i < static_cast<int>(flagfile_arguments.size()); ++i) {
custom_argv[i + 1] = strdup(flagfile_arguments[i].c_str());
}
// setup flags from config flags
gflags::ParseCommandLineFlags(&custom_argc, &custom_argv, false);
// unconsumed arguments have to be freed to avoid memory leak since they are
// strdup-ed.
for (int i = 0; i < custom_argc; ++i) free(custom_argv[i]);
delete[] custom_argv;
// setup flags from command line
gflags::ParseCommandLineFlags(&argc, &argv, true);
}
int main(int argc, char **argv) {
google::SetUsageMessage("Memgraph database server");
gflags::SetVersionString(version_string);
LoadConfig(argc, argv);
// Load config before parsing arguments, so that flags from the command line
// overwrite the config.
LoadConfig();
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
google::SetLogDestination(google::INFO, FLAGS_log_file.c_str());

View File

@ -131,6 +131,6 @@ memgraph_snapshot_dir=${dataset_dir}/memgraph/default
mkdir -p ${memgraph_snapshot_dir}
cd ${memgraph_snapshot_dir}
echo "Converting CSV dataset to '${memgraph_snapshot_dir}/snapshot'"
${base_dir}/tools/csv_to_snapshot --out snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
${base_dir}/tools/mg_import_csv --out snapshot ${csv_dataset} --csv-delimiter "|" --array-delimiter ";"
echo "Done!"

2
tools/.gitignore vendored
View File

@ -1,2 +1,2 @@
build/
csv_to_snapshot
mg_import_csv

View File

@ -72,3 +72,5 @@ set(memgraph_src_dir ${PROJECT_SOURCE_DIR}/../src)
include_directories(${memgraph_src_dir})
add_subdirectory(src)
enable_testing()
add_subdirectory(tests)

View File

@ -19,6 +19,8 @@ cd build_release
cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1000 make -j$THREADS memgraph_link_target memgraph__macro_benchmark memgraph__stress
cd ../tools/apollo
cd ../tools
./setup
cd apollo
./generate debug

View File

@ -32,6 +32,8 @@ cd build
cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1000 make -j$THREADS memgraph_link_target memgraph__macro_benchmark
cd ../../memgraph/tools/apollo
cd ../../memgraph/tools
./setup
cd apollo
./generate diff

View File

@ -13,6 +13,8 @@ BASE_DIR_NAME = os.path.basename(BASE_DIR)
BUILD_DIR = os.path.join(BASE_DIR, "build")
LIBS_DIR = os.path.join(BASE_DIR, "libs")
TESTS_DIR = os.path.join(BUILD_DIR, "tests")
TOOLS_DIR = os.path.join(BASE_DIR, "tools")
TOOLS_BUILD_DIR = os.path.join(TOOLS_DIR, "build")
OUTPUT_DIR = os.path.join(BUILD_DIR, "apollo")
# output lists
@ -284,11 +286,11 @@ if mode == "release":
if mode == "release":
ldbc_path = os.path.join(BASE_DIR, "tests", "public_benchmark", "ldbc")
neo4j_path = os.path.join(BASE_DIR, "libs", "neo4j")
csv_to_snapshot_path = os.path.join(BASE_DIR, "tools", "csv_to_snapshot")
mg_import_csv_path = os.path.join(BASE_DIR, "tools", "mg_import_csv")
plot_ldbc_latency_path = os.path.join(BASE_DIR, "tools", "plot_ldbc_latency")
infile = create_archive("ldbc", [binary_release_path, ldbc_path,
binary_release_link_path, neo4j_path, config_path,
csv_to_snapshot_path, plot_ldbc_latency_path],
mg_import_csv_path, plot_ldbc_latency_path],
cwd = WORKSPACE_DIR)
cmd = "cd memgraph/tests/public_benchmark/ldbc\n. continuous_integration\n"
outfile_paths = "\./memgraph/tests/public_benchmark/ldbc/results/.+\n" \
@ -297,6 +299,18 @@ if mode == "release":
infile = infile, outfile_paths = outfile_paths,
slave_group = "remote_20c140g", enable_network = True))
# tools tests
ctest_output = run_cmd(["ctest", "-N"], TOOLS_BUILD_DIR)
tools_infile = create_archive("tools_test", [TOOLS_BUILD_DIR], cwd = WORKSPACE_DIR)
for row in ctest_output.split("\n"):
# Filter rows only containing tests.
if "Test #" not in row: continue
test_name = row.split(":")[1].strip()
test_dir = os.path.relpath(TOOLS_BUILD_DIR, WORKSPACE_DIR)
commands = "cd {}\nctest --output-on-failure -R \"^{}$\"".format(test_dir, test_name)
run = generate_run("tools_" + test_name, commands = commands, infile = tools_infile)
RUNS.append(run)
# store ARCHIVES and RUNS
store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)

View File

@ -1,6 +1,6 @@
FROM debian:stretch
COPY csv_to_snapshot /usr/local/bin/csv_to_snapshot
COPY mg_import_csv /usr/local/bin/mg_import_csv
# Setup memgraph user and group.
RUN groupadd -r memgraph
@ -16,7 +16,7 @@ USER memgraph:memgraph
VOLUME /data
WORKDIR /data
ENTRYPOINT ["csv_to_snapshot"]
ENTRYPOINT ["mg_import_csv"]
# Print help and usage by default, since at least one --nodes argument is
# required.
CMD ["--help"]

View File

@ -1,9 +1,10 @@
add_executable(csv_to_snapshot
csv_to_snapshot/main.cpp
# This is just friggin terrible. csv_to_snapshot needs to depend almost on
add_executable(mg_import_csv
mg_import_csv/main.cpp
# This is just friggin terrible. mg_import_csv needs to depend almost on
# the whole memgraph, just to use TypedValue and BaseEncoder.
${memgraph_src_dir}/data_structures/concurrent/skiplist_gc.cpp
${memgraph_src_dir}/database/graph_db_accessor.cpp
${memgraph_src_dir}/durability/snapshooter.cpp
${memgraph_src_dir}/query/typed_value.cpp
${memgraph_src_dir}/storage/edge_accessor.cpp
${memgraph_src_dir}/storage/locking/record_lock.cpp
@ -16,12 +17,12 @@ add_executable(csv_to_snapshot
# Strip the executable in release build.
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
if (lower_build_type STREQUAL "release")
add_custom_command(TARGET csv_to_snapshot POST_BUILD
COMMAND strip -s csv_to_snapshot
COMMENT Stripping symbols and sections from csv_to_snapshot)
add_custom_command(TARGET mg_import_csv POST_BUILD
COMMAND strip -s mg_import_csv
COMMENT Stripping symbols and sections from mg_import_csv)
endif()
target_link_libraries(csv_to_snapshot stdc++fs Threads::Threads fmt
target_link_libraries(mg_import_csv stdc++fs Threads::Threads fmt
gflags glog cppitertools)
install(TARGETS csv_to_snapshot
install(TARGETS mg_import_csv
RUNTIME DESTINATION .)

View File

@ -9,9 +9,12 @@
#include "glog/logging.h"
#include "communication/bolt/v1/encoder/base_encoder.hpp"
#include "config.hpp"
#include "durability/file_writer_buffer.hpp"
#include "durability/snapshooter.hpp"
#include "durability/version.hpp"
#include "utils/string.hpp"
#include "utils/timer.hpp"
bool ValidateNotEmpty(const char *flagname, const std::string &value) {
if (utils::Trim(value).empty()) {
@ -21,8 +24,10 @@ bool ValidateNotEmpty(const char *flagname, const std::string &value) {
return true;
}
DEFINE_string(out, "", "Destination for the created snapshot file");
DEFINE_validator(out, &ValidateNotEmpty);
DEFINE_string(out, "",
"Destination for the created snapshot file. Without it, snapshot "
"is written inside the expected snapshots directory of Memgraph "
"installation.");
DEFINE_bool(overwrite, false, "Overwrite the output file if it exists");
DEFINE_string(array_delimiter, ";",
"Delimiter between elements of array values, default is ';'");
@ -108,6 +113,7 @@ std::vector<std::string> ReadRow(std::istream &stream) {
std::vector<char> column;
char c;
while (!stream.get(c).eof()) {
if (!stream) LOG(FATAL) << "Unable to read CSV row";
if (quoting) {
if (c == quoting)
quoting = 0;
@ -316,56 +322,95 @@ auto ConvertRelationships(
}
void Convert(const std::vector<std::string> &nodes,
const std::vector<std::string> &relationships) {
FileWriterBuffer buffer(FLAGS_out);
communication::bolt::BaseEncoder<FileWriterBuffer> encoder(buffer);
int64_t node_count = 0;
int64_t relationship_count = 0;
MemgraphNodeIdMap node_id_map;
// Snapshot file has the following contents in order:
// 1) magic number
// 2) transactional snapshot of the snapshoter. When the snapshot is
// generated it's an empty list.
// 3) list of label+property index
// 4) all nodes, sequentially, but not encoded as a list
// 5) all relationships, sequentially, but not encoded as a list
// 5) summary with node count, relationship count and hash digest
encoder.WriteRAW(durability::kMagicNumber.data(),
durability::kMagicNumber.size());
encoder.WriteTypedValue(durability::kVersion);
encoder.WriteList({}); // Transactional snapshot.
encoder.WriteList({}); // Label + property indexes.
for (const auto &nodes_file : nodes) {
node_count += ConvertNodes(nodes_file, node_id_map, encoder);
const std::vector<std::string> &relationships,
const std::string &output_path) {
try {
FileWriterBuffer buffer(output_path);
communication::bolt::BaseEncoder<FileWriterBuffer> encoder(buffer);
int64_t node_count = 0;
int64_t relationship_count = 0;
MemgraphNodeIdMap node_id_map;
// Snapshot file has the following contents in order:
// 1) magic number
// 2) transactional snapshot of the snapshoter. When the snapshot is
// generated it's an empty list.
// 3) list of label+property index
// 4) all nodes, sequentially, but not encoded as a list
// 5) all relationships, sequentially, but not encoded as a list
// 5) summary with node count, relationship count and hash digest
encoder.WriteRAW(durability::kMagicNumber.data(),
durability::kMagicNumber.size());
encoder.WriteTypedValue(durability::kVersion);
encoder.WriteList({}); // Transactional snapshot.
encoder.WriteList({}); // Label + property indexes.
for (const auto &nodes_file : nodes) {
node_count += ConvertNodes(nodes_file, node_id_map, encoder);
}
for (const auto &relationships_file : relationships) {
relationship_count +=
ConvertRelationships(relationships_file, node_id_map, encoder);
}
buffer.WriteSummary(node_count, relationship_count);
} catch (const std::ios_base::failure &) {
// Only FileWriterBuffer sets the underlying fstream to throw.
LOG(FATAL) << fmt::format("Unable to write to '{}'", output_path);
}
for (const auto &relationships_file : relationships) {
relationship_count +=
ConvertRelationships(relationships_file, node_id_map, encoder);
}
buffer.WriteSummary(node_count, relationship_count);
}
static const char *usage =
"[OPTION]... --out SNAPSHOT_FILE [--nodes=CSV_FILE]... [--edges=CSV_FILE]...\n"
"[OPTION]... [--out=SNAPSHOT_FILE] [--nodes=CSV_FILE]... "
"[--relationships=CSV_FILE]...\n"
"Create a Memgraph recovery snapshot file from CSV.\n";
// Used only to get the value from memgraph's configuration files.
DEFINE_HIDDEN_string(snapshot_directory, "", "Snapshot directory");
std::string GetOutputPath() {
// If we have the 'out' flag, use that.
if (!utils::Trim(FLAGS_out).empty()) return FLAGS_out;
// Without the 'out', fall back to reading the memgraph configuration for
// snapshot_directory. Hopefully, memgraph configuration doesn't contain other
// flags which are defined in this file.
LoadConfig();
// Without snapshot_directory, we have to require 'out' flag.
if (utils::Trim(FLAGS_snapshot_directory).empty())
LOG(FATAL) << "Unable to determine snapshot output location. Please, "
"provide the 'out' flag";
// TODO: Remove 'default' when Dbms is purged.
std::string snapshot_dir = FLAGS_snapshot_directory + "/default";
try {
if (!std::experimental::filesystem::exists(snapshot_dir) &&
!std::experimental::filesystem::create_directories(snapshot_dir)) {
LOG(FATAL) << fmt::format("Cannot create snapshot directory '{}'",
snapshot_dir);
}
} catch (const std::experimental::filesystem::filesystem_error &error) {
LOG(FATAL) << error.what();
}
return std::string(GetSnapshotFileName(snapshot_dir));
}
int main(int argc, char *argv[]) {
gflags::SetUsageMessage(usage);
auto nodes = ParseRepeatedFlag("nodes", argc, argv);
auto relationships = ParseRepeatedFlag("relationships", argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
if (std::experimental::filesystem::exists(FLAGS_out) && !FLAGS_overwrite) {
std::string output_path(GetOutputPath());
if (std::experimental::filesystem::exists(output_path) && !FLAGS_overwrite) {
LOG(FATAL) << fmt::format(
"File exists: '{}'. Pass --overwrite if you want to overwrite.",
FLAGS_out);
output_path);
}
auto iter_all_inputs = iter::chain(nodes, relationships);
std::vector<std::string> all_inputs(iter_all_inputs.begin(),
iter_all_inputs.end());
LOG(INFO) << fmt::format("Converting {} to '{}'",
utils::Join(all_inputs, ", "), FLAGS_out);
Convert(nodes, relationships);
LOG(INFO) << fmt::format("Created '{}'", FLAGS_out);
utils::Join(all_inputs, ", "), output_path);
utils::Timer conversion_timer;
Convert(nodes, relationships, output_path);
double conversion_sec = conversion_timer.Elapsed().count();
LOG(INFO) << fmt::format("Created '{}' in {:.2f} seconds", output_path,
conversion_sec);
return 0;
}

View File

@ -0,0 +1,35 @@
include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
add_executable(mg_recovery_check
mg_recovery_check.cpp
${memgraph_src_dir}/communication/bolt/v1/decoder/decoded_value.cpp
${memgraph_src_dir}/data_structures/concurrent/skiplist_gc.cpp
${memgraph_src_dir}/database/dbms.cpp
${memgraph_src_dir}/database/graph_db.cpp
${memgraph_src_dir}/database/graph_db_accessor.cpp
${memgraph_src_dir}/durability/recovery.cpp
${memgraph_src_dir}/durability/snapshooter.cpp
${memgraph_src_dir}/query/typed_value.cpp
${memgraph_src_dir}/storage/edge_accessor.cpp
${memgraph_src_dir}/storage/locking/record_lock.cpp
${memgraph_src_dir}/storage/property_value.cpp
${memgraph_src_dir}/storage/record_accessor.cpp
${memgraph_src_dir}/storage/vertex_accessor.cpp
${memgraph_src_dir}/transactions/transaction.cpp
)
target_link_libraries(mg_recovery_check stdc++fs Threads::Threads fmt glog
gflags cppitertools)
target_link_libraries(mg_recovery_check gtest gtest_main)
# Copy CSV data to CMake build dir
configure_file(csv/comment_nodes.csv csv/comment_nodes.csv COPYONLY)
configure_file(csv/forum_nodes.csv csv/forum_nodes.csv COPYONLY)
configure_file(csv/relationships.csv csv/relationships.csv COPYONLY)
# Copy the actual runner to CMake build dir
configure_file(test_mg_import_csv test_mg_import_csv COPYONLY)
add_test(NAME test_mg_import_csv
COMMAND test_mg_import_csv
--mg-import-csv ../src/mg_import_csv
--mg-recovery-check ./mg_recovery_check)

View File

@ -0,0 +1,6 @@
id:ID(COMMENT_ID)|country:string|browser:string|content:string|:LABEL
0|Croatia|Chrome|yes|Message;Comment
1|United Kingdom|Chrome|thanks|Message;Comment
2|Germany||LOL|Message;Comment
3|France|Firefox|I see|Message;Comment
4|Italy|Internet Explorer|fine|Message;Comment
1 id:ID(COMMENT_ID) country:string browser:string content:string :LABEL
2 0 Croatia Chrome yes Message;Comment
3 1 United Kingdom Chrome thanks Message;Comment
4 2 Germany LOL Message;Comment
5 3 France Firefox I see Message;Comment
6 4 Italy Internet Explorer fine Message;Comment

View File

@ -0,0 +1,6 @@
id:ID(FORUM_ID)|title:string|:LABEL
0|General|Forum
1|Support|Forum
2|Music|Forum
3|Film|Forum
4|Programming|Forum
1 id:ID(FORUM_ID) title:string :LABEL
2 0 General Forum
3 1 Support Forum
4 2 Music Forum
5 3 Film Forum
6 4 Programming Forum

View File

@ -0,0 +1,6 @@
:START_ID(COMMENT_ID)|:END_ID(FORUM_ID)|:TYPE
0|0|POSTED_ON
1|1|POSTED_ON
2|2|POSTED_ON
3|3|POSTED_ON
4|4|POSTED_ON
1 :START_ID(COMMENT_ID) :END_ID(FORUM_ID) :TYPE
2 0 0 POSTED_ON
3 1 1 POSTED_ON
4 2 2 POSTED_ON
5 3 3 POSTED_ON
6 4 4 POSTED_ON

View File

@ -0,0 +1,68 @@
#include <string>
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "database/dbms.hpp"
#include "query/typed_value.hpp"
static const char *usage =
"--snapshot-dir SNAPSHOT_DIR\n"
"Check that Memgraph can recover that snapshot. This tool should be "
"invoked through 'test_mg_import' wrapper, so as to check that 'mg_import' "
"tools work correctly.\n";
DEFINE_string(snapshot_dir, "", "Path to where the snapshot is stored");
class RecoveryTest : public ::testing::Test {
protected:
void SetUp() override {
Recovery recovery;
std::string snapshot(FLAGS_snapshot_dir + "/snapshot");
recovery.Recover(snapshot, *dbms_.active());
}
Dbms dbms_;
};
TEST_F(RecoveryTest, TestVerticesRecovered) {
auto dba = dbms_.active();
EXPECT_EQ(dba->VerticesCount(), 10);
EXPECT_EQ(dba->VerticesCount(dba->Label("Comment")), 5);
for (const auto &vertex : dba->Vertices(dba->Label("Comment"), false)) {
EXPECT_TRUE(vertex.has_label(dba->Label("Message")));
}
EXPECT_EQ(dba->VerticesCount(dba->Label("Forum")), 5);
}
TEST_F(RecoveryTest, TestPropertyNull) {
auto dba = dbms_.active();
bool found = false;
for (const auto &vertex : dba->Vertices(dba->Label("Comment"), false)) {
auto id_prop = query::TypedValue(vertex.PropsAt(dba->Property("id")));
auto browser = query::TypedValue(vertex.PropsAt(dba->Property("browser")));
if (id_prop.IsString() && id_prop.Value<std::string>() == "2") {
EXPECT_FALSE(found);
found = true;
EXPECT_TRUE(browser.IsNull());
} else {
EXPECT_FALSE(browser.IsNull());
}
}
ASSERT_TRUE(found);
}
TEST_F(RecoveryTest, TestEdgesRecovered) {
auto dba = dbms_.active();
EXPECT_EQ(dba->EdgesCount(), 5);
for (const auto &edge : dba->Edges(false)) {
EXPECT_TRUE(edge.EdgeType() == dba->EdgeType("POSTED_ON"));
}
}
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc, argv);
gflags::SetUsageMessage(usage);
gflags::ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}

38
tools/tests/test_mg_import_csv Executable file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env python3
'''Run mg_import_csv and test that recovery works with mg_recovery_check.'''
import argparse
import subprocess
import os
import tempfile
_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
def parse_args():
argp = argparse.ArgumentParser(description=__doc__)
argp.add_argument('--mg-import-csv', required=True,
help='Path to mg_import_csv executable.')
argp.add_argument('--mg-recovery-check', required=True,
help='Path to mg_recovery_check executable.')
return argp.parse_args()
def main():
args = parse_args()
comment_nodes = os.path.join(_SCRIPT_DIR, 'csv', 'comment_nodes.csv')
forum_nodes = os.path.join(_SCRIPT_DIR, 'csv', 'forum_nodes.csv')
relationships = os.path.join(_SCRIPT_DIR, 'csv', 'relationships.csv')
with tempfile.TemporaryDirectory(suffix='-snapshots', dir=_SCRIPT_DIR) as snapshot_dir:
out_snapshot = os.path.join(snapshot_dir, 'snapshot')
mg_import_csv = [args.mg_import_csv, '--nodes', comment_nodes,
'--nodes', forum_nodes, '--relationships', relationships,
'--out', out_snapshot, '--csv-delimiter=|', '--array-delimiter=;']
subprocess.check_call(mg_import_csv)
mg_recovery_check = [args.mg_recovery_check, '--snapshot-dir', snapshot_dir]
subprocess.check_call(mg_recovery_check)
if __name__ == '__main__':
main()