Integrated C++ CSV to snapshot into LDBC.

Reviewers: mislav.bradac

Reviewed By: mislav.bradac

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D826
This commit is contained in:
Matej Ferencevic 2017-09-23 22:43:58 +02:00
parent d9503d6b65
commit 99494d77e3
9 changed files with 55 additions and 42 deletions

View File

@ -9,8 +9,7 @@
./run_benchmark --create-index --run-db memgraph # or neo4j
# To run update queries pass the properties file for updates and slow down
# the execution by setting a larger time compression ratio.
./run_benchmark --create-index --run-db memgraph \
--properties-file ldbc-snb-impls-updates.properties \
./run_benchmark --create-index --run-db memgraph --test-type updates \
--time-compression-ratio 1.5
## How to run a specific test?

View File

@ -12,6 +12,6 @@ TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREA
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
# run update benchmarks
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --time-compression-ratio 1.5 --properties-file ldbc-snb-impls-updates.properties
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png

View File

@ -29,6 +29,7 @@ session = driver.session()
# The fist program argument is path to a file with indexes.
with open(args.indexfile, "r") as f:
print("Starting index creation...")
for line in f.readlines():
session.run(line.strip()).consume()
print("%s -> DONE" % line.strip())

View File

@ -43,17 +43,17 @@ ldbc.snb.interactive.LdbcQuery14_freq=49
# At least one needs to be enabled in order to cause interleaving of short
# reads. To get all the reads, you should run with a large operation count.
ldbc.snb.interactive.LdbcQuery1_enable=false
ldbc.snb.interactive.LdbcQuery2_enable=false
ldbc.snb.interactive.LdbcQuery1_enable=true
ldbc.snb.interactive.LdbcQuery2_enable=true
ldbc.snb.interactive.LdbcQuery3_enable=false
ldbc.snb.interactive.LdbcQuery4_enable=false
ldbc.snb.interactive.LdbcQuery5_enable=false
ldbc.snb.interactive.LdbcQuery4_enable=true
ldbc.snb.interactive.LdbcQuery5_enable=true
ldbc.snb.interactive.LdbcQuery6_enable=false
ldbc.snb.interactive.LdbcQuery7_enable=false
ldbc.snb.interactive.LdbcQuery8_enable=true
ldbc.snb.interactive.LdbcQuery9_enable=false
ldbc.snb.interactive.LdbcQuery10_enable=false
ldbc.snb.interactive.LdbcQuery11_enable=false
ldbc.snb.interactive.LdbcQuery11_enable=true
ldbc.snb.interactive.LdbcQuery12_enable=false
ldbc.snb.interactive.LdbcQuery13_enable=false
ldbc.snb.interactive.LdbcQuery14_enable=false

View File

@ -44,7 +44,7 @@ class Memgraph:
database_args = [binary, "--num-workers", self.num_workers,
"--snapshot-directory", os.path.join(self.dataset,
"memgraph"),
"--recover-on-startup", "true",
"--snapshot-recover-on-startup", "true",
"--port", self.port]
# database env
@ -119,12 +119,12 @@ def parse_args():
help='Dataset scale to use for benchmarking.')
argp.add_argument('--host', default='127.0.0.1', help='Database host.')
argp.add_argument('--port', default='7687', help='Database port.')
argp.add_argument('--time-compression-ratio', type=float, default=0.01,
argp.add_argument('--time-compression-ratio', type=float, default=0.001,
help='Compress/stretch durations between operation start '
'times to increase/decrease benchmark load. '
'E.g. 2.0 = run benchmark 2x slower, 0.1 = run '
'benchmark 10x faster. Default is 0.01.')
argp.add_argument('--operation-count', type=int, default=200,
'benchmark 10x faster. Default is 0.001.')
argp.add_argument('--operation-count', type=int, default=1000,
help='Number of operations to generate during benchmark '
'execution.')
argp.add_argument('--thread-count', type=int, default=8,
@ -136,10 +136,8 @@ def parse_args():
help='Time unit to use for measuring performance metrics')
argp.add_argument('--result-file-prefix', default='',
help='Result file name prefix')
argp.add_argument('--properties-file',
default=os.path.join(
SCRIPT_DIR, 'ldbc-snb-impls-short-reads.properties'),
help='Properties file used to select queries')
argp.add_argument('--test-type', choices=('reads', 'updates'),
default='reads', help='Test queries of type')
argp.add_argument('--run-db', choices=('memgraph', 'neo4j'),
help='Run the database before starting LDBC')
argp.add_argument('--create-index', action='store_true', default=False,
@ -194,7 +192,8 @@ def main():
parameters_dir = os.path.join(dataset, 'substitution_parameters')
java_cmd = ('java', '-cp', cp, 'com.ldbc.driver.Client',
'-P', LDBC_DEFAULT_PROPERTIES,
'-P', os.path.join(os.getcwd(), args.properties_file),
'-P', os.path.join(SCRIPT_DIR, "ldbc-snb-impls-{}."
"properties".format(args.test_type)),
'-p', 'ldbc.snb.interactive.updates_dir', updates_dir,
'-p', 'host', args.host, '-p', 'port', args.port,
'-db', 'net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb',

View File

@ -69,6 +69,7 @@ cd ${script_dir}
#git clone https://phabricator.memgraph.io/source/ldbc-snb-impls.git
git clone $deps_git_url/ldbc-snb-impls.git
cd ldbc-snb-impls
git checkout 4b3bc129e6991dfa9adc974bb8fc53258036e127
sed -r '/(snb-interactive-tools|snb-interactive-titan|snb-interactive-torc)/s@^( +)(.+)$@\1<!--\2-->@' -i pom.xml
$mvn install
$mvn clean compile assembly:single

View File

@ -12,6 +12,10 @@ cd build
cmake -DCMAKE_BUILD_TYPE=release ..
TIMEOUT=1000 make -j$THREADS
cd ../tools/apollo
cd ../tools
./setup
cd apollo
./generate release

View File

@ -53,8 +53,6 @@ def parse_args():
default="ms", help="The time unit that should be used.")
argp.add_argument("--output", default="",
help="Save plot to file (instead of displaying it).")
argp.add_argument("--max-label-width", default=11, type=int,
help="Maximum length of the x-axis labels (-1 is unlimited)")
return argp.parse_args()
@ -66,7 +64,7 @@ def autolabel(ax, rects):
height = rect.get_height()
# TODO: adjust more vendors
ax.text(rect.get_x() + rect.get_width()/2., 1.00*height,
'%d' % int(height),
'{:.1f}'.format(height),
ha='center', va='bottom')
@ -87,6 +85,15 @@ def main():
vendors.append(vendor)
assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
# Helper for shortening the query name.
def shorten_query_name(name):
if name.lower().startswith("ldbc"):
name = name[4:]
# Long query names on the x-axis don't look compelling.
num = "".join(filter(lambda x: x in string.digits, name))
prefix = name.split(num)[0]
return prefix + num
# Collect the benchmark data.
print("LDBC Latency Data")
for vendor in vendors:
@ -96,13 +103,19 @@ def main():
mean_runtime = (query_data["run_time"]["mean"] /
LDBC_TIME_FACTORS[results_data["unit"]] *
TIME_FACTORS[args.time_unit])
query_name = query_data['name']
query_name = shorten_query_name(query_data['name'])
vendor['results'].append((query_name, mean_runtime))
# Helper for sorting the results.
def sort_key(obj):
name = obj[0]
num = int("".join(filter(lambda x: x in string.digits, name)))
prefix = name.split(str(num))[0]
return (prefix, num)
# Sort results.
for vendor in vendors:
vendor['results'].sort(key=lambda item: int("".join(filter(
lambda x: x in string.digits, item[0]))))
vendor['results'].sort(key=sort_key)
# Print results.
for vendor in vendors:
@ -116,6 +129,15 @@ def main():
"Queries between different vendors are different!"
query_names = all_query_names[0]
# Font size.
plt.rc('font', size=12) # controls default text sizes
plt.rc('axes', titlesize=24) # fontsize of the axes title
plt.rc('axes', labelsize=16) # fontsize of the x and y labels
plt.rc('xtick', labelsize=12) # fontsize of the tick labels
plt.rc('ytick', labelsize=12) # fontsize of the tick labels
plt.rc('legend', fontsize=16) # legend fontsize
plt.rc('figure', titlesize=24) # fontsize of the figure title
# Plot.
ind = np.arange(len(query_names)) # the x locations for the groups
width = 0.40 # the width of the bars
@ -124,20 +146,7 @@ def main():
ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title
ax.set_facecolor('#dcdcdc') # plot bg color (light gray)
ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors)
def shorten_query_name(query_name):
# Long query names on the x-axis don't look compelling.
if query_name.lower().startswith('ldbc'):
query_name = query_name[4:]
if len(query_name) > args.max_label_width:
query_name = query_name[:args.max_label_width] + '\N{HORIZONTAL ELLIPSIS}'
return query_name
labels = query_names
if args.max_label_width == 0:
labels = ["Q{}".format(i) for i, _ in enumerate(query_names)]
elif args.max_label_width > 0:
labels = map(shorten_query_name, query_names)
ax.set_xticklabels(labels, rotation=30)
ax.set_xticklabels(query_names, rotation=30)
# set only horizontal grid lines
for line in ax.get_xgridlines():
line.set_linestyle(' ')
@ -149,11 +158,11 @@ def main():
ax.set_title(args.plot_title)
# Draw logo or plot title
if args.logo_path != None:
# TODO: improve the logo positioning
im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
args.logo_path)))
plt.gcf().subplots_adjust(top=0.85)
newax = fig.add_axes([0.46, 0.85, 0.12, 0.15], anchor='N')
# magic numbers for logo size - DO NOT TOUCH!
newax = fig.add_axes([0.46, 0.85, 0.126, 0.15], anchor='N')
newax.imshow(im)
newax.axis('off')
# Draw bars

View File

@ -210,7 +210,7 @@ void WriteNodeRow(const std::vector<Field> &fields,
}
}
id = node_id_map.Insert(node_id);
properties["id"] = *id;
properties["id"] = node_id.id;
} else if (field.type == "label") {
for (const auto &label : utils::Split(value, FLAGS_array_delimiter)) {
labels.emplace_back(utils::Trim(label));