Integrated C++ CSV to snapshot into LDBC.
Reviewers: mislav.bradac Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D826
This commit is contained in:
parent
d9503d6b65
commit
99494d77e3
@ -9,8 +9,7 @@
|
|||||||
./run_benchmark --create-index --run-db memgraph # or neo4j
|
./run_benchmark --create-index --run-db memgraph # or neo4j
|
||||||
# To run update queries pass the properties file for updates and slow down
|
# To run update queries pass the properties file for updates and slow down
|
||||||
# the execution by setting a larger time compression ratio.
|
# the execution by setting a larger time compression ratio.
|
||||||
./run_benchmark --create-index --run-db memgraph \
|
./run_benchmark --create-index --run-db memgraph --test-type updates \
|
||||||
--properties-file ldbc-snb-impls-updates.properties \
|
|
||||||
--time-compression-ratio 1.5
|
--time-compression-ratio 1.5
|
||||||
|
|
||||||
## How to run a specific test?
|
## How to run a specific test?
|
||||||
|
@ -12,6 +12,6 @@ TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREA
|
|||||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
|
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
|
||||||
|
|
||||||
# run update benchmarks
|
# run update benchmarks
|
||||||
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
|
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5
|
||||||
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
|
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5
|
||||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
|
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
|
||||||
|
@ -29,6 +29,7 @@ session = driver.session()
|
|||||||
|
|
||||||
# The fist program argument is path to a file with indexes.
|
# The fist program argument is path to a file with indexes.
|
||||||
with open(args.indexfile, "r") as f:
|
with open(args.indexfile, "r") as f:
|
||||||
|
print("Starting index creation...")
|
||||||
for line in f.readlines():
|
for line in f.readlines():
|
||||||
session.run(line.strip()).consume()
|
session.run(line.strip()).consume()
|
||||||
print("%s -> DONE" % line.strip())
|
print("%s -> DONE" % line.strip())
|
||||||
|
@ -43,17 +43,17 @@ ldbc.snb.interactive.LdbcQuery14_freq=49
|
|||||||
|
|
||||||
# At least one needs to be enabled in order to cause interleaving of short
|
# At least one needs to be enabled in order to cause interleaving of short
|
||||||
# reads. To get all the reads, you should run with a large operation count.
|
# reads. To get all the reads, you should run with a large operation count.
|
||||||
ldbc.snb.interactive.LdbcQuery1_enable=false
|
ldbc.snb.interactive.LdbcQuery1_enable=true
|
||||||
ldbc.snb.interactive.LdbcQuery2_enable=false
|
ldbc.snb.interactive.LdbcQuery2_enable=true
|
||||||
ldbc.snb.interactive.LdbcQuery3_enable=false
|
ldbc.snb.interactive.LdbcQuery3_enable=false
|
||||||
ldbc.snb.interactive.LdbcQuery4_enable=false
|
ldbc.snb.interactive.LdbcQuery4_enable=true
|
||||||
ldbc.snb.interactive.LdbcQuery5_enable=false
|
ldbc.snb.interactive.LdbcQuery5_enable=true
|
||||||
ldbc.snb.interactive.LdbcQuery6_enable=false
|
ldbc.snb.interactive.LdbcQuery6_enable=false
|
||||||
ldbc.snb.interactive.LdbcQuery7_enable=false
|
ldbc.snb.interactive.LdbcQuery7_enable=false
|
||||||
ldbc.snb.interactive.LdbcQuery8_enable=true
|
ldbc.snb.interactive.LdbcQuery8_enable=true
|
||||||
ldbc.snb.interactive.LdbcQuery9_enable=false
|
ldbc.snb.interactive.LdbcQuery9_enable=false
|
||||||
ldbc.snb.interactive.LdbcQuery10_enable=false
|
ldbc.snb.interactive.LdbcQuery10_enable=false
|
||||||
ldbc.snb.interactive.LdbcQuery11_enable=false
|
ldbc.snb.interactive.LdbcQuery11_enable=true
|
||||||
ldbc.snb.interactive.LdbcQuery12_enable=false
|
ldbc.snb.interactive.LdbcQuery12_enable=false
|
||||||
ldbc.snb.interactive.LdbcQuery13_enable=false
|
ldbc.snb.interactive.LdbcQuery13_enable=false
|
||||||
ldbc.snb.interactive.LdbcQuery14_enable=false
|
ldbc.snb.interactive.LdbcQuery14_enable=false
|
@ -44,7 +44,7 @@ class Memgraph:
|
|||||||
database_args = [binary, "--num-workers", self.num_workers,
|
database_args = [binary, "--num-workers", self.num_workers,
|
||||||
"--snapshot-directory", os.path.join(self.dataset,
|
"--snapshot-directory", os.path.join(self.dataset,
|
||||||
"memgraph"),
|
"memgraph"),
|
||||||
"--recover-on-startup", "true",
|
"--snapshot-recover-on-startup", "true",
|
||||||
"--port", self.port]
|
"--port", self.port]
|
||||||
|
|
||||||
# database env
|
# database env
|
||||||
@ -119,12 +119,12 @@ def parse_args():
|
|||||||
help='Dataset scale to use for benchmarking.')
|
help='Dataset scale to use for benchmarking.')
|
||||||
argp.add_argument('--host', default='127.0.0.1', help='Database host.')
|
argp.add_argument('--host', default='127.0.0.1', help='Database host.')
|
||||||
argp.add_argument('--port', default='7687', help='Database port.')
|
argp.add_argument('--port', default='7687', help='Database port.')
|
||||||
argp.add_argument('--time-compression-ratio', type=float, default=0.01,
|
argp.add_argument('--time-compression-ratio', type=float, default=0.001,
|
||||||
help='Compress/stretch durations between operation start '
|
help='Compress/stretch durations between operation start '
|
||||||
'times to increase/decrease benchmark load. '
|
'times to increase/decrease benchmark load. '
|
||||||
'E.g. 2.0 = run benchmark 2x slower, 0.1 = run '
|
'E.g. 2.0 = run benchmark 2x slower, 0.1 = run '
|
||||||
'benchmark 10x faster. Default is 0.01.')
|
'benchmark 10x faster. Default is 0.001.')
|
||||||
argp.add_argument('--operation-count', type=int, default=200,
|
argp.add_argument('--operation-count', type=int, default=1000,
|
||||||
help='Number of operations to generate during benchmark '
|
help='Number of operations to generate during benchmark '
|
||||||
'execution.')
|
'execution.')
|
||||||
argp.add_argument('--thread-count', type=int, default=8,
|
argp.add_argument('--thread-count', type=int, default=8,
|
||||||
@ -136,10 +136,8 @@ def parse_args():
|
|||||||
help='Time unit to use for measuring performance metrics')
|
help='Time unit to use for measuring performance metrics')
|
||||||
argp.add_argument('--result-file-prefix', default='',
|
argp.add_argument('--result-file-prefix', default='',
|
||||||
help='Result file name prefix')
|
help='Result file name prefix')
|
||||||
argp.add_argument('--properties-file',
|
argp.add_argument('--test-type', choices=('reads', 'updates'),
|
||||||
default=os.path.join(
|
default='reads', help='Test queries of type')
|
||||||
SCRIPT_DIR, 'ldbc-snb-impls-short-reads.properties'),
|
|
||||||
help='Properties file used to select queries')
|
|
||||||
argp.add_argument('--run-db', choices=('memgraph', 'neo4j'),
|
argp.add_argument('--run-db', choices=('memgraph', 'neo4j'),
|
||||||
help='Run the database before starting LDBC')
|
help='Run the database before starting LDBC')
|
||||||
argp.add_argument('--create-index', action='store_true', default=False,
|
argp.add_argument('--create-index', action='store_true', default=False,
|
||||||
@ -194,7 +192,8 @@ def main():
|
|||||||
parameters_dir = os.path.join(dataset, 'substitution_parameters')
|
parameters_dir = os.path.join(dataset, 'substitution_parameters')
|
||||||
java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client',
|
java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client',
|
||||||
'-P', LDBC_DEFAULT_PROPERTIES,
|
'-P', LDBC_DEFAULT_PROPERTIES,
|
||||||
'-P', os.path.join(os.getcwd(), args.properties_file),
|
'-P', os.path.join(SCRIPT_DIR, "ldbc-snb-impls-{}."
|
||||||
|
"properties".format(args.test_type)),
|
||||||
'-p', 'ldbc.snb.interactive.updates_dir', updates_dir,
|
'-p', 'ldbc.snb.interactive.updates_dir', updates_dir,
|
||||||
'-p', 'host', args.host, '-p', 'port', args.port,
|
'-p', 'host', args.host, '-p', 'port', args.port,
|
||||||
'-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb',
|
'-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb',
|
||||||
|
@ -69,6 +69,7 @@ cd ${script_dir}
|
|||||||
#git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
|
#git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
|
||||||
git clone $deps_git_url/ldbc-snb-impls.git
|
git clone $deps_git_url/ldbc-snb-impls.git
|
||||||
cd ldbc-snb-impls
|
cd ldbc-snb-impls
|
||||||
|
git checkout 4b3bc129e6991dfa9adc974bb8fc53258036e127
|
||||||
sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml
|
sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml
|
||||||
$mvn install
|
$mvn install
|
||||||
$mvn clean compile assembly:single
|
$mvn clean compile assembly:single
|
||||||
|
@ -12,6 +12,10 @@ cd build
|
|||||||
cmake -DCMAKE_BUILD_TYPE=release ..
|
cmake -DCMAKE_BUILD_TYPE=release ..
|
||||||
TIMEOUT=1000 make -j$THREADS
|
TIMEOUT=1000 make -j$THREADS
|
||||||
|
|
||||||
cd ../tools/apollo
|
cd ../tools
|
||||||
|
|
||||||
|
./setup
|
||||||
|
|
||||||
|
cd apollo
|
||||||
|
|
||||||
./generate release
|
./generate release
|
||||||
|
@ -53,8 +53,6 @@ def parse_args():
|
|||||||
default="ms", help="The time unit that should be used.")
|
default="ms", help="The time unit that should be used.")
|
||||||
argp.add_argument("--output", default="",
|
argp.add_argument("--output", default="",
|
||||||
help="Save plot to file (instead of displaying it).")
|
help="Save plot to file (instead of displaying it).")
|
||||||
argp.add_argument("--max-label-width", default=11, type=int,
|
|
||||||
help="Maximum length of the x-axis labels (-1 is unlimited)")
|
|
||||||
return argp.parse_args()
|
return argp.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@ -66,7 +64,7 @@ def autolabel(ax, rects):
|
|||||||
height = rect.get_height()
|
height = rect.get_height()
|
||||||
# TODO: adjust more vendors
|
# TODO: adjust more vendors
|
||||||
ax.text(rect.get_x() + rect.get_width()/2., 1.00*height,
|
ax.text(rect.get_x() + rect.get_width()/2., 1.00*height,
|
||||||
'%d' % int(height),
|
'{:.1f}'.format(height),
|
||||||
ha='center', va='bottom')
|
ha='center', va='bottom')
|
||||||
|
|
||||||
|
|
||||||
@ -87,6 +85,15 @@ def main():
|
|||||||
vendors.append(vendor)
|
vendors.append(vendor)
|
||||||
assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
|
assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
|
||||||
|
|
||||||
|
# Helper for shortening the query name.
|
||||||
|
def shorten_query_name(name):
|
||||||
|
if name.lower().startswith("ldbc"):
|
||||||
|
name = name[4:]
|
||||||
|
# Long query names on the x-axis don't look compelling.
|
||||||
|
num = "".join(filter(lambda x: x in string.digits, name))
|
||||||
|
prefix = name.split(num)[0]
|
||||||
|
return prefix + num
|
||||||
|
|
||||||
# Collect the benchmark data.
|
# Collect the benchmark data.
|
||||||
print("LDBC Latency Data")
|
print("LDBC Latency Data")
|
||||||
for vendor in vendors:
|
for vendor in vendors:
|
||||||
@ -96,13 +103,19 @@ def main():
|
|||||||
mean_runtime = (query_data["run_time"]["mean"] /
|
mean_runtime = (query_data["run_time"]["mean"] /
|
||||||
LDBC_TIME_FACTORS[results_data["unit"]] *
|
LDBC_TIME_FACTORS[results_data["unit"]] *
|
||||||
TIME_FACTORS[args.time_unit])
|
TIME_FACTORS[args.time_unit])
|
||||||
query_name = query_data['name']
|
query_name = shorten_query_name(query_data['name'])
|
||||||
vendor['results'].append((query_name, mean_runtime))
|
vendor['results'].append((query_name, mean_runtime))
|
||||||
|
|
||||||
|
# Helper for sorting the results.
|
||||||
|
def sort_key(obj):
|
||||||
|
name = obj[0]
|
||||||
|
num = int("".join(filter(lambda x: x in string.digits, name)))
|
||||||
|
prefix = name.split(str(num))[0]
|
||||||
|
return (prefix, num)
|
||||||
|
|
||||||
# Sort results.
|
# Sort results.
|
||||||
for vendor in vendors:
|
for vendor in vendors:
|
||||||
vendor['results'].sort(key=lambda item: int("".join(filter(
|
vendor['results'].sort(key=sort_key)
|
||||||
lambda x: x in string.digits, item[0]))))
|
|
||||||
|
|
||||||
# Print results.
|
# Print results.
|
||||||
for vendor in vendors:
|
for vendor in vendors:
|
||||||
@ -116,6 +129,15 @@ def main():
|
|||||||
"Queries between different vendors are different!"
|
"Queries between different vendors are different!"
|
||||||
query_names = all_query_names[0]
|
query_names = all_query_names[0]
|
||||||
|
|
||||||
|
# Font size.
|
||||||
|
plt.rc('font', size=12) # controls default text sizes
|
||||||
|
plt.rc('axes', titlesize=24) # fontsize of the axes title
|
||||||
|
plt.rc('axes', labelsize=16) # fontsize of the x and y labels
|
||||||
|
plt.rc('xtick', labelsize=12) # fontsize of the tick labels
|
||||||
|
plt.rc('ytick', labelsize=12) # fontsize of the tick labels
|
||||||
|
plt.rc('legend', fontsize=16) # legend fontsize
|
||||||
|
plt.rc('figure', titlesize=24) # fontsize of the figure title
|
||||||
|
|
||||||
# Plot.
|
# Plot.
|
||||||
ind = np.arange(len(query_names)) # the x locations for the groups
|
ind = np.arange(len(query_names)) # the x locations for the groups
|
||||||
width = 0.40 # the width of the bars
|
width = 0.40 # the width of the bars
|
||||||
@ -124,20 +146,7 @@ def main():
|
|||||||
ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title
|
ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title
|
||||||
ax.set_facecolor('#dcdcdc') # plot bg color (light gray)
|
ax.set_facecolor('#dcdcdc') # plot bg color (light gray)
|
||||||
ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors)
|
ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors)
|
||||||
|
ax.set_xticklabels(query_names, rotation=30)
|
||||||
def shorten_query_name(query_name):
|
|
||||||
# Long query names on the x-axis don't look compelling.
|
|
||||||
if query_name.lower().startswith('ldbc'):
|
|
||||||
query_name = query_name[4:]
|
|
||||||
if len(query_name) > args.max_label_width:
|
|
||||||
query_name = query_name[:args.max_label_width] + '\N{HORIZONTAL ELLIPSIS}'
|
|
||||||
return query_name
|
|
||||||
labels = query_names
|
|
||||||
if args.max_label_width == 0:
|
|
||||||
labels = ["Q{}".format(i) for i, _ in enumerate(query_names)]
|
|
||||||
elif args.max_label_width > 0:
|
|
||||||
labels = map(shorten_query_name, query_names)
|
|
||||||
ax.set_xticklabels(labels, rotation=30)
|
|
||||||
# set only horizontal grid lines
|
# set only horizontal grid lines
|
||||||
for line in ax.get_xgridlines():
|
for line in ax.get_xgridlines():
|
||||||
line.set_linestyle(' ')
|
line.set_linestyle(' ')
|
||||||
@ -149,11 +158,11 @@ def main():
|
|||||||
ax.set_title(args.plot_title)
|
ax.set_title(args.plot_title)
|
||||||
# Draw logo or plot title
|
# Draw logo or plot title
|
||||||
if args.logo_path != None:
|
if args.logo_path != None:
|
||||||
# TODO: improve the logo positioning
|
|
||||||
im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
|
im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
|
||||||
args.logo_path)))
|
args.logo_path)))
|
||||||
plt.gcf().subplots_adjust(top=0.85)
|
plt.gcf().subplots_adjust(top=0.85)
|
||||||
newax = fig.add_axes([0.46, 0.85, 0.12, 0.15], anchor='N')
|
# magic numbers for logo size - DO NOT TOUCH!
|
||||||
|
newax = fig.add_axes([0.46, 0.85, 0.126, 0.15], anchor='N')
|
||||||
newax.imshow(im)
|
newax.imshow(im)
|
||||||
newax.axis('off')
|
newax.axis('off')
|
||||||
# Draw bars
|
# Draw bars
|
||||||
|
@ -210,7 +210,7 @@ void WriteNodeRow(const std::vector<Field> &fields,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
id = node_id_map.Insert(node_id);
|
id = node_id_map.Insert(node_id);
|
||||||
properties["id"] = *id;
|
properties["id"] = node_id.id;
|
||||||
} else if (field.type == "label") {
|
} else if (field.type == "label") {
|
||||||
for (const auto &label : utils::Split(value, FLAGS_array_delimiter)) {
|
for (const auto &label : utils::Split(value, FLAGS_array_delimiter)) {
|
||||||
labels.emplace_back(utils::Trim(label));
|
labels.emplace_back(utils::Trim(label));
|
||||||
|
Loading…
Reference in New Issue
Block a user