diff --git a/tests/public_benchmark/ldbc/README.md b/tests/public_benchmark/ldbc/README.md index ac9d06e91..aa93139a6 100644 --- a/tests/public_benchmark/ldbc/README.md +++ b/tests/public_benchmark/ldbc/README.md @@ -9,8 +9,7 @@ ./run_benchmark --create-index --run-db memgraph # or neo4j # To run update queries pass the properties file for updates and slow down # the execution by setting a larger time compression ratio. - ./run_benchmark --create-index --run-db memgraph \ - --properties-file ldbc-snb-impls-updates.properties \ + ./run_benchmark --create-index --run-db memgraph --test-type updates \ --time-compression-ratio 1.5 ## How to run a specific test? diff --git a/tests/public_benchmark/ldbc/continuous_integration b/tests/public_benchmark/ldbc/continuous_integration index 0b761128f..12cb12611 100644 --- a/tests/public_benchmark/ldbc/continuous_integration +++ b/tests/public_benchmark/ldbc/continuous_integration @@ -12,6 +12,6 @@ TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREA ./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png # run update benchmarks -TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties -TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties +TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 +TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 ./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png diff --git a/tests/public_benchmark/ldbc/index_creation.py b/tests/public_benchmark/ldbc/index_creation.py index 3f5df6844..c63324c08 100755 --- a/tests/public_benchmark/ldbc/index_creation.py +++ b/tests/public_benchmark/ldbc/index_creation.py @@ -29,6 +29,7 @@ session = driver.session() # The fist program argument is path to a file with indexes. with open(args.indexfile, "r") as f: + print("Starting index creation...") for line in f.readlines(): session.run(line.strip()).consume() print("%s -> DONE" % line.strip()) diff --git a/tests/public_benchmark/ldbc/ldbc-snb-impls-short-reads.properties b/tests/public_benchmark/ldbc/ldbc-snb-impls-reads.properties similarity index 92% rename from tests/public_benchmark/ldbc/ldbc-snb-impls-short-reads.properties rename to tests/public_benchmark/ldbc/ldbc-snb-impls-reads.properties index 367736f22..cfe70d92d 100644 --- a/tests/public_benchmark/ldbc/ldbc-snb-impls-short-reads.properties +++ b/tests/public_benchmark/ldbc/ldbc-snb-impls-reads.properties @@ -43,17 +43,17 @@ ldbc.snb.interactive.LdbcQuery14_freq=49 # At least one needs to be enabled in order to cause interleaving of short # reads. To get all the reads, you should run with a large operation count. -ldbc.snb.interactive.LdbcQuery1_enable=false -ldbc.snb.interactive.LdbcQuery2_enable=false +ldbc.snb.interactive.LdbcQuery1_enable=true +ldbc.snb.interactive.LdbcQuery2_enable=true ldbc.snb.interactive.LdbcQuery3_enable=false -ldbc.snb.interactive.LdbcQuery4_enable=false -ldbc.snb.interactive.LdbcQuery5_enable=false +ldbc.snb.interactive.LdbcQuery4_enable=true +ldbc.snb.interactive.LdbcQuery5_enable=true ldbc.snb.interactive.LdbcQuery6_enable=false ldbc.snb.interactive.LdbcQuery7_enable=false ldbc.snb.interactive.LdbcQuery8_enable=true ldbc.snb.interactive.LdbcQuery9_enable=false ldbc.snb.interactive.LdbcQuery10_enable=false -ldbc.snb.interactive.LdbcQuery11_enable=false +ldbc.snb.interactive.LdbcQuery11_enable=true ldbc.snb.interactive.LdbcQuery12_enable=false ldbc.snb.interactive.LdbcQuery13_enable=false ldbc.snb.interactive.LdbcQuery14_enable=false diff --git a/tests/public_benchmark/ldbc/run_benchmark b/tests/public_benchmark/ldbc/run_benchmark index 445598d73..06faacb3f 100755 --- a/tests/public_benchmark/ldbc/run_benchmark +++ b/tests/public_benchmark/ldbc/run_benchmark @@ -44,7 +44,7 @@ class Memgraph: database_args = [binary, "--num-workers", self.num_workers, "--snapshot-directory", os.path.join(self.dataset, "memgraph"), - "--recover-on-startup", "true", + "--snapshot-recover-on-startup", "true", "--port", self.port] # database env @@ -119,12 +119,12 @@ def parse_args(): help='Dataset scale to use for benchmarking.') argp.add_argument('--host', default='127.0.0.1', help='Database host.') argp.add_argument('--port', default='7687', help='Database port.') - argp.add_argument('--time-compression-ratio', type=float, default=0.01, + argp.add_argument('--time-compression-ratio', type=float, default=0.001, help='Compress/stretch durations between operation start ' 'times to increase/decrease benchmark load. ' 'E.g. 2.0 = run benchmark 2x slower, 0.1 = run ' - 'benchmark 10x faster. Default is 0.01.') - argp.add_argument('--operation-count', type=int, default=200, + 'benchmark 10x faster. Default is 0.001.') + argp.add_argument('--operation-count', type=int, default=1000, help='Number of operations to generate during benchmark ' 'execution.') argp.add_argument('--thread-count', type=int, default=8, @@ -136,10 +136,8 @@ def parse_args(): help='Time unit to use for measuring performance metrics') argp.add_argument('--result-file-prefix', default='', help='Result file name prefix') - argp.add_argument('--properties-file', - default=os.path.join( - SCRIPT_DIR, 'ldbc-snb-impls-short-reads.properties'), - help='Properties file used to select queries') + argp.add_argument('--test-type', choices=('reads', 'updates'), + default='reads', help='Test queries of type') argp.add_argument('--run-db', choices=('memgraph', 'neo4j'), help='Run the database before starting LDBC') argp.add_argument('--create-index', action='store_true', default=False, @@ -194,7 +192,8 @@ def main(): parameters_dir = os.path.join(dataset, 'substitution_parameters') java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client', '-P', LDBC_DEFAULT_PROPERTIES, - '-P', os.path.join(os.getcwd(), args.properties_file), + '-P', os.path.join(SCRIPT_DIR, "ldbc-snb-impls-{}." + "properties".format(args.test_type)), '-p', 'ldbc.snb.interactive.updates_dir', updates_dir, '-p', 'host', args.host, '-p', 'port', args.port, '-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb', diff --git a/tests/public_benchmark/ldbc/setup b/tests/public_benchmark/ldbc/setup index 485b2bbd8..3e7ec1b3f 100755 --- a/tests/public_benchmark/ldbc/setup +++ b/tests/public_benchmark/ldbc/setup @@ -69,6 +69,7 @@ cd ${script_dir} #git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git git clone $deps_git_url/ldbc-snb-impls.git cd ldbc-snb-impls +git checkout 4b3bc129e6991dfa9adc974bb8fc53258036e127 sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml $mvn install $mvn clean compile assembly:single diff --git a/tools/apollo/build_release b/tools/apollo/build_release index 558d673fe..de33e8009 100644 --- a/tools/apollo/build_release +++ b/tools/apollo/build_release @@ -12,6 +12,10 @@ cd build cmake -DCMAKE_BUILD_TYPE=release .. TIMEOUT=1000 make -j$THREADS -cd ../tools/apollo +cd ../tools + +./setup + +cd apollo ./generate release diff --git a/tools/plot_ldbc_latency b/tools/plot_ldbc_latency index 421aa517e..0f1006abe 100755 --- a/tools/plot_ldbc_latency +++ b/tools/plot_ldbc_latency @@ -53,8 +53,6 @@ def parse_args(): default="ms", help="The time unit that should be used.") argp.add_argument("--output", default="", help="Save plot to file (instead of displaying it).") - argp.add_argument("--max-label-width", default=11, type=int, - help="Maximum length of the x-axis labels (-1 is unlimited)") return argp.parse_args() @@ -66,7 +64,7 @@ def autolabel(ax, rects): height = rect.get_height() # TODO: adjust more vendors ax.text(rect.get_x() + rect.get_width()/2., 1.00*height, - '%d' % int(height), + '{:.1f}'.format(height), ha='center', va='bottom') @@ -87,6 +85,15 @@ def main(): vendors.append(vendor) assert len(vendors) == 2, "The graph is tailored for only 2 vendors." + # Helper for shortening the query name. + def shorten_query_name(name): + if name.lower().startswith("ldbc"): + name = name[4:] + # Long query names on the x-axis don't look compelling. + num = "".join(filter(lambda x: x in string.digits, name)) + prefix = name.split(num)[0] + return prefix + num + # Collect the benchmark data. print("LDBC Latency Data") for vendor in vendors: @@ -96,13 +103,19 @@ def main(): mean_runtime = (query_data["run_time"]["mean"] / LDBC_TIME_FACTORS[results_data["unit"]] * TIME_FACTORS[args.time_unit]) - query_name = query_data['name'] + query_name = shorten_query_name(query_data['name']) vendor['results'].append((query_name, mean_runtime)) + # Helper for sorting the results. + def sort_key(obj): + name = obj[0] + num = int("".join(filter(lambda x: x in string.digits, name))) + prefix = name.split(str(num))[0] + return (prefix, num) + # Sort results. for vendor in vendors: - vendor['results'].sort(key=lambda item: int("".join(filter( - lambda x: x in string.digits, item[0])))) + vendor['results'].sort(key=sort_key) # Print results. for vendor in vendors: @@ -116,6 +129,15 @@ def main(): "Queries between different vendors are different!" query_names = all_query_names[0] + # Font size. + plt.rc('font', size=12) # controls default text sizes + plt.rc('axes', titlesize=24) # fontsize of the axes title + plt.rc('axes', labelsize=16) # fontsize of the x and y labels + plt.rc('xtick', labelsize=12) # fontsize of the tick labels + plt.rc('ytick', labelsize=12) # fontsize of the tick labels + plt.rc('legend', fontsize=16) # legend fontsize + plt.rc('figure', titlesize=24) # fontsize of the figure title + # Plot. ind = np.arange(len(query_names)) # the x locations for the groups width = 0.40 # the width of the bars @@ -124,20 +146,7 @@ def main(): ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title ax.set_facecolor('#dcdcdc') # plot bg color (light gray) ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors) - - def shorten_query_name(query_name): - # Long query names on the x-axis don't look compelling. - if query_name.lower().startswith('ldbc'): - query_name = query_name[4:] - if len(query_name) > args.max_label_width: - query_name = query_name[:args.max_label_width] + '\N{HORIZONTAL ELLIPSIS}' - return query_name - labels = query_names - if args.max_label_width == 0: - labels = ["Q{}".format(i) for i, _ in enumerate(query_names)] - elif args.max_label_width > 0: - labels = map(shorten_query_name, query_names) - ax.set_xticklabels(labels, rotation=30) + ax.set_xticklabels(query_names, rotation=30) # set only horizontal grid lines for line in ax.get_xgridlines(): line.set_linestyle(' ') @@ -149,11 +158,11 @@ def main(): ax.set_title(args.plot_title) # Draw logo or plot title if args.logo_path != None: - # TODO: improve the logo positioning im = plt.imread(get_sample_data(os.path.join(os.getcwd(), args.logo_path))) plt.gcf().subplots_adjust(top=0.85) - newax = fig.add_axes([0.46, 0.85, 0.12, 0.15], anchor='N') + # magic numbers for logo size - DO NOT TOUCH! + newax = fig.add_axes([0.46, 0.85, 0.126, 0.15], anchor='N') newax.imshow(im) newax.axis('off') # Draw bars diff --git a/tools/src/csv_to_snapshot/main.cpp b/tools/src/csv_to_snapshot/main.cpp index 41e696e2a..3c734180f 100644 --- a/tools/src/csv_to_snapshot/main.cpp +++ b/tools/src/csv_to_snapshot/main.cpp @@ -210,7 +210,7 @@ void WriteNodeRow(const std::vector<Field> &fields, } } id = node_id_map.Insert(node_id); - properties["id"] = *id; + properties["id"] = node_id.id; } else if (field.type == "label") { for (const auto &label : utils::Split(value, FLAGS_array_delimiter)) { labels.emplace_back(utils::Trim(label));