Fix CSV multiple edge file import
Summary: The CSV importer used to generate non-unique edge IDs when generating edges from multiple CSV files. This is incompatible with the unique ID requirements introduced by the WAL. Tested and fixed in this diff. Reviewers: teon.banek, mferencevic Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D980
This commit is contained in:
parent
885d19401f
commit
b7188c296c
@ -300,10 +300,10 @@ void WriteRelationshipsRow(
|
||||
encoder.WriteMap(properties);
|
||||
}
|
||||
|
||||
auto ConvertRelationships(
|
||||
void ConvertRelationships(
|
||||
const std::string &relationships_path, const MemgraphNodeIdMap &node_id_map,
|
||||
communication::bolt::BaseEncoder<HashedFileWriter> &encoder) {
|
||||
int64_t relationship_count = 0;
|
||||
communication::bolt::BaseEncoder<HashedFileWriter> &encoder,
|
||||
int64_t &next_relationship_id) {
|
||||
std::ifstream relationships_file(relationships_path);
|
||||
CHECK(relationships_file)
|
||||
<< fmt::format("Unable to open '{}'", relationships_path);
|
||||
@ -312,13 +312,10 @@ auto ConvertRelationships(
|
||||
while (!row.empty()) {
|
||||
CHECK_EQ(row.size(), fields.size())
|
||||
<< "Expected as many values as there are header fields";
|
||||
auto relationship_id = relationship_count;
|
||||
WriteRelationshipsRow(fields, row, node_id_map, relationship_id, encoder);
|
||||
// Increase count and move to next row.
|
||||
relationship_count += 1;
|
||||
WriteRelationshipsRow(fields, row, node_id_map, next_relationship_id++,
|
||||
encoder);
|
||||
row = ReadRow(relationships_file);
|
||||
}
|
||||
return relationship_count;
|
||||
}
|
||||
|
||||
void Convert(const std::vector<std::string> &nodes,
|
||||
@ -328,7 +325,7 @@ void Convert(const std::vector<std::string> &nodes,
|
||||
HashedFileWriter buffer(output_path);
|
||||
communication::bolt::BaseEncoder<HashedFileWriter> encoder(buffer);
|
||||
int64_t node_count = 0;
|
||||
int64_t relationship_count = 0;
|
||||
int64_t next_relationship_id = 0;
|
||||
MemgraphNodeIdMap node_id_map;
|
||||
// Snapshot file has the following contents in order:
|
||||
// 1) Magic number.
|
||||
@ -349,11 +346,11 @@ void Convert(const std::vector<std::string> &nodes,
|
||||
node_count += ConvertNodes(nodes_file, node_id_map, encoder);
|
||||
}
|
||||
for (const auto &relationships_file : relationships) {
|
||||
relationship_count +=
|
||||
ConvertRelationships(relationships_file, node_id_map, encoder);
|
||||
ConvertRelationships(relationships_file, node_id_map, encoder,
|
||||
next_relationship_id);
|
||||
}
|
||||
buffer.WriteValue(node_count);
|
||||
buffer.WriteValue(relationship_count);
|
||||
buffer.WriteValue(next_relationship_id);
|
||||
buffer.WriteValue(buffer.hash());
|
||||
} catch (const std::ios_base::failure &) {
|
||||
// Only HashedFileWriter sets the underlying fstream to throw.
|
||||
|
@ -25,7 +25,8 @@ target_link_libraries(mg_recovery_check gtest gtest_main)
|
||||
# Copy CSV data to CMake build dir
|
||||
configure_file(csv/comment_nodes.csv csv/comment_nodes.csv COPYONLY)
|
||||
configure_file(csv/forum_nodes.csv csv/forum_nodes.csv COPYONLY)
|
||||
configure_file(csv/relationships.csv csv/relationships.csv COPYONLY)
|
||||
configure_file(csv/relationships_0.csv csv/relationships_0.csv COPYONLY)
|
||||
configure_file(csv/relationships_1.csv csv/relationships_1.csv COPYONLY)
|
||||
# Copy the actual runner to CMake build dir
|
||||
configure_file(test_mg_import_csv test_mg_import_csv COPYONLY)
|
||||
|
||||
|
@ -2,5 +2,3 @@
|
||||
0|0|POSTED_ON
|
||||
1|1|POSTED_ON
|
||||
2|2|POSTED_ON
|
||||
3|3|POSTED_ON
|
||||
4|4|POSTED_ON
|
|
3
tools/tests/csv/relationships_1.csv
Normal file
3
tools/tests/csv/relationships_1.csv
Normal file
@ -0,0 +1,3 @@
|
||||
:START_ID(COMMENT_ID)|:END_ID(FORUM_ID)|:TYPE
|
||||
3|3|POSTED_ON
|
||||
4|4|POSTED_ON
|
|
@ -23,11 +23,13 @@ def main():
|
||||
args = parse_args()
|
||||
comment_nodes = os.path.join(_SCRIPT_DIR, 'csv', 'comment_nodes.csv')
|
||||
forum_nodes = os.path.join(_SCRIPT_DIR, 'csv', 'forum_nodes.csv')
|
||||
relationships = os.path.join(_SCRIPT_DIR, 'csv', 'relationships.csv')
|
||||
relationships_0 = os.path.join(_SCRIPT_DIR, 'csv', 'relationships_0.csv')
|
||||
relationships_1 = os.path.join(_SCRIPT_DIR, 'csv', 'relationships_1.csv')
|
||||
with tempfile.TemporaryDirectory(suffix='-snapshots', dir=_SCRIPT_DIR) as snapshot_dir:
|
||||
out_snapshot = os.path.join(snapshot_dir, 'snapshot')
|
||||
mg_import_csv = [args.mg_import_csv, '--nodes', comment_nodes,
|
||||
'--nodes', forum_nodes, '--relationships', relationships,
|
||||
'--nodes', forum_nodes, '--relationships', relationships_0,
|
||||
'--relationships', relationships_1,
|
||||
'--out', out_snapshot, '--csv-delimiter=|', '--array-delimiter=;']
|
||||
subprocess.check_call(mg_import_csv)
|
||||
mg_recovery_check = [args.mg_recovery_check, '--snapshot-dir', snapshot_dir]
|
||||
|
Loading…
Reference in New Issue
Block a user