Integrated C++ CSV to snapshot into LDBC.
Reviewers: mislav.bradac Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D826
This commit is contained in:
parent
d9503d6b65
commit
99494d77e3
@ -9,8 +9,7 @@
|
||||
./run_benchmark --create-index --run-db memgraph # or neo4j
|
||||
# To run update queries pass the properties file for updates and slow down
|
||||
# the execution by setting a larger time compression ratio.
|
||||
./run_benchmark --create-index --run-db memgraph \
|
||||
--properties-file ldbc-snb-impls-updates.properties \
|
||||
./run_benchmark --create-index --run-db memgraph --test-type updates \
|
||||
--time-compression-ratio 1.5
|
||||
|
||||
## How to run a specific test?
|
||||
|
@ -12,6 +12,6 @@ TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREA
|
||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
|
||||
|
||||
# run update benchmarks
|
||||
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
|
||||
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
|
||||
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5
|
||||
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5
|
||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
|
||||
|
@ -29,6 +29,7 @@ session = driver.session()
|
||||
|
||||
# The fist program argument is path to a file with indexes.
|
||||
with open(args.indexfile, "r") as f:
|
||||
print("Starting index creation...")
|
||||
for line in f.readlines():
|
||||
session.run(line.strip()).consume()
|
||||
print("%s -> DONE" % line.strip())
|
||||
|
@ -43,17 +43,17 @@ ldbc.snb.interactive.LdbcQuery14_freq=49
|
||||
|
||||
# At least one needs to be enabled in order to cause interleaving of short
|
||||
# reads. To get all the reads, you should run with a large operation count.
|
||||
ldbc.snb.interactive.LdbcQuery1_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery2_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery1_enable=true
|
||||
ldbc.snb.interactive.LdbcQuery2_enable=true
|
||||
ldbc.snb.interactive.LdbcQuery3_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery4_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery5_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery4_enable=true
|
||||
ldbc.snb.interactive.LdbcQuery5_enable=true
|
||||
ldbc.snb.interactive.LdbcQuery6_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery7_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery8_enable=true
|
||||
ldbc.snb.interactive.LdbcQuery9_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery10_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery11_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery11_enable=true
|
||||
ldbc.snb.interactive.LdbcQuery12_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery13_enable=false
|
||||
ldbc.snb.interactive.LdbcQuery14_enable=false
|
@ -44,7 +44,7 @@ class Memgraph:
|
||||
database_args = [binary, "--num-workers", self.num_workers,
|
||||
"--snapshot-directory", os.path.join(self.dataset,
|
||||
"memgraph"),
|
||||
"--recover-on-startup", "true",
|
||||
"--snapshot-recover-on-startup", "true",
|
||||
"--port", self.port]
|
||||
|
||||
# database env
|
||||
@ -119,12 +119,12 @@ def parse_args():
|
||||
help='Dataset scale to use for benchmarking.')
|
||||
argp.add_argument('--host', default='127.0.0.1', help='Database host.')
|
||||
argp.add_argument('--port', default='7687', help='Database port.')
|
||||
argp.add_argument('--time-compression-ratio', type=float, default=0.01,
|
||||
argp.add_argument('--time-compression-ratio', type=float, default=0.001,
|
||||
help='Compress/stretch durations between operation start '
|
||||
'times to increase/decrease benchmark load. '
|
||||
'E.g. 2.0 = run benchmark 2x slower, 0.1 = run '
|
||||
'benchmark 10x faster. Default is 0.01.')
|
||||
argp.add_argument('--operation-count', type=int, default=200,
|
||||
'benchmark 10x faster. Default is 0.001.')
|
||||
argp.add_argument('--operation-count', type=int, default=1000,
|
||||
help='Number of operations to generate during benchmark '
|
||||
'execution.')
|
||||
argp.add_argument('--thread-count', type=int, default=8,
|
||||
@ -136,10 +136,8 @@ def parse_args():
|
||||
help='Time unit to use for measuring performance metrics')
|
||||
argp.add_argument('--result-file-prefix', default='',
|
||||
help='Result file name prefix')
|
||||
argp.add_argument('--properties-file',
|
||||
default=os.path.join(
|
||||
SCRIPT_DIR, 'ldbc-snb-impls-short-reads.properties'),
|
||||
help='Properties file used to select queries')
|
||||
argp.add_argument('--test-type', choices=('reads', 'updates'),
|
||||
default='reads', help='Test queries of type')
|
||||
argp.add_argument('--run-db', choices=('memgraph', 'neo4j'),
|
||||
help='Run the database before starting LDBC')
|
||||
argp.add_argument('--create-index', action='store_true', default=False,
|
||||
@ -194,7 +192,8 @@ def main():
|
||||
parameters_dir = os.path.join(dataset, 'substitution_parameters')
|
||||
java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client',
|
||||
'-P', LDBC_DEFAULT_PROPERTIES,
|
||||
'-P', os.path.join(os.getcwd(), args.properties_file),
|
||||
'-P', os.path.join(SCRIPT_DIR, "ldbc-snb-impls-{}."
|
||||
"properties".format(args.test_type)),
|
||||
'-p', 'ldbc.snb.interactive.updates_dir', updates_dir,
|
||||
'-p', 'host', args.host, '-p', 'port', args.port,
|
||||
'-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb',
|
||||
|
@ -69,6 +69,7 @@ cd ${script_dir}
|
||||
#git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
|
||||
git clone $deps_git_url/ldbc-snb-impls.git
|
||||
cd ldbc-snb-impls
|
||||
git checkout 4b3bc129e6991dfa9adc974bb8fc53258036e127
|
||||
sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml
|
||||
$mvn install
|
||||
$mvn clean compile assembly:single
|
||||
|
@ -12,6 +12,10 @@ cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=release ..
|
||||
TIMEOUT=1000 make -j$THREADS
|
||||
|
||||
cd ../tools/apollo
|
||||
cd ../tools
|
||||
|
||||
./setup
|
||||
|
||||
cd apollo
|
||||
|
||||
./generate release
|
||||
|
@ -53,8 +53,6 @@ def parse_args():
|
||||
default="ms", help="The time unit that should be used.")
|
||||
argp.add_argument("--output", default="",
|
||||
help="Save plot to file (instead of displaying it).")
|
||||
argp.add_argument("--max-label-width", default=11, type=int,
|
||||
help="Maximum length of the x-axis labels (-1 is unlimited)")
|
||||
return argp.parse_args()
|
||||
|
||||
|
||||
@ -66,7 +64,7 @@ def autolabel(ax, rects):
|
||||
height = rect.get_height()
|
||||
# TODO: adjust more vendors
|
||||
ax.text(rect.get_x() + rect.get_width()/2., 1.00*height,
|
||||
'%d' % int(height),
|
||||
'{:.1f}'.format(height),
|
||||
ha='center', va='bottom')
|
||||
|
||||
|
||||
@ -87,6 +85,15 @@ def main():
|
||||
vendors.append(vendor)
|
||||
assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
|
||||
|
||||
# Helper for shortening the query name.
|
||||
def shorten_query_name(name):
|
||||
if name.lower().startswith("ldbc"):
|
||||
name = name[4:]
|
||||
# Long query names on the x-axis don't look compelling.
|
||||
num = "".join(filter(lambda x: x in string.digits, name))
|
||||
prefix = name.split(num)[0]
|
||||
return prefix + num
|
||||
|
||||
# Collect the benchmark data.
|
||||
print("LDBC Latency Data")
|
||||
for vendor in vendors:
|
||||
@ -96,13 +103,19 @@ def main():
|
||||
mean_runtime = (query_data["run_time"]["mean"] /
|
||||
LDBC_TIME_FACTORS[results_data["unit"]] *
|
||||
TIME_FACTORS[args.time_unit])
|
||||
query_name = query_data['name']
|
||||
query_name = shorten_query_name(query_data['name'])
|
||||
vendor['results'].append((query_name, mean_runtime))
|
||||
|
||||
# Helper for sorting the results.
|
||||
def sort_key(obj):
|
||||
name = obj[0]
|
||||
num = int("".join(filter(lambda x: x in string.digits, name)))
|
||||
prefix = name.split(str(num))[0]
|
||||
return (prefix, num)
|
||||
|
||||
# Sort results.
|
||||
for vendor in vendors:
|
||||
vendor['results'].sort(key=lambda item: int("".join(filter(
|
||||
lambda x: x in string.digits, item[0]))))
|
||||
vendor['results'].sort(key=sort_key)
|
||||
|
||||
# Print results.
|
||||
for vendor in vendors:
|
||||
@ -116,6 +129,15 @@ def main():
|
||||
"Queries between different vendors are different!"
|
||||
query_names = all_query_names[0]
|
||||
|
||||
# Font size.
|
||||
plt.rc('font', size=12) # controls default text sizes
|
||||
plt.rc('axes', titlesize=24) # fontsize of the axes title
|
||||
plt.rc('axes', labelsize=16) # fontsize of the x and y labels
|
||||
plt.rc('xtick', labelsize=12) # fontsize of the tick labels
|
||||
plt.rc('ytick', labelsize=12) # fontsize of the tick labels
|
||||
plt.rc('legend', fontsize=16) # legend fontsize
|
||||
plt.rc('figure', titlesize=24) # fontsize of the figure title
|
||||
|
||||
# Plot.
|
||||
ind = np.arange(len(query_names)) # the x locations for the groups
|
||||
width = 0.40 # the width of the bars
|
||||
@ -124,20 +146,7 @@ def main():
|
||||
ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title
|
||||
ax.set_facecolor('#dcdcdc') # plot bg color (light gray)
|
||||
ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors)
|
||||
|
||||
def shorten_query_name(query_name):
|
||||
# Long query names on the x-axis don't look compelling.
|
||||
if query_name.lower().startswith('ldbc'):
|
||||
query_name = query_name[4:]
|
||||
if len(query_name) > args.max_label_width:
|
||||
query_name = query_name[:args.max_label_width] + '\N{HORIZONTAL ELLIPSIS}'
|
||||
return query_name
|
||||
labels = query_names
|
||||
if args.max_label_width == 0:
|
||||
labels = ["Q{}".format(i) for i, _ in enumerate(query_names)]
|
||||
elif args.max_label_width > 0:
|
||||
labels = map(shorten_query_name, query_names)
|
||||
ax.set_xticklabels(labels, rotation=30)
|
||||
ax.set_xticklabels(query_names, rotation=30)
|
||||
# set only horizontal grid lines
|
||||
for line in ax.get_xgridlines():
|
||||
line.set_linestyle(' ')
|
||||
@ -149,11 +158,11 @@ def main():
|
||||
ax.set_title(args.plot_title)
|
||||
# Draw logo or plot title
|
||||
if args.logo_path != None:
|
||||
# TODO: improve the logo positioning
|
||||
im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
|
||||
args.logo_path)))
|
||||
plt.gcf().subplots_adjust(top=0.85)
|
||||
newax = fig.add_axes([0.46, 0.85, 0.12, 0.15], anchor='N')
|
||||
# magic numbers for logo size - DO NOT TOUCH!
|
||||
newax = fig.add_axes([0.46, 0.85, 0.126, 0.15], anchor='N')
|
||||
newax.imshow(im)
|
||||
newax.axis('off')
|
||||
# Draw bars
|
||||
|
@ -210,7 +210,7 @@ void WriteNodeRow(const std::vector<Field> &fields,
|
||||
}
|
||||
}
|
||||
id = node_id_map.Insert(node_id);
|
||||
properties["id"] = *id;
|
||||
properties["id"] = node_id.id;
|
||||
} else if (field.type == "label") {
|
||||
for (const auto &label : utils::Split(value, FLAGS_array_delimiter)) {
|
||||
labels.emplace_back(utils::Trim(label));
|
||||
|
Loading…
Reference in New Issue
Block a user