From d68106051c185df59cbefb176b14c8aefc753742 Mon Sep 17 00:00:00 2001 From: Matej Ferencevic Date: Mon, 9 Dec 2019 11:31:27 +0100 Subject: [PATCH] Replace hardcoded configuration with generator Summary: The configuration file that was used for the Debian/CentOS package was manually written. This diff adds a configuration file generator that extracts all of the necessary information about the flags directly from the built binary and uses that information to generate the configuration file for the packages. Reviewers: teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2589 --- config/community.conf | 119 --------------------- config/flags.yaml | 97 +++++++++++++++++ config/generate.py | 124 ++++++++++++++++++++++ init | 1 + src/CMakeLists.txt | 18 +++- src/memgraph.cpp | 9 +- src/memgraph_ha.cpp | 4 +- src/query/plan/variable_start_planner.cpp | 2 +- 8 files changed, 243 insertions(+), 131 deletions(-) delete mode 100644 config/community.conf create mode 100644 config/flags.yaml create mode 100755 config/generate.py diff --git a/config/community.conf b/config/community.conf deleted file mode 100644 index e0b6fc7ff..000000000 --- a/config/community.conf +++ /dev/null @@ -1,119 +0,0 @@ -# Default Memgraph Configuration -# -# This is the default configuration for memgraph. Settings from this file will -# be overridden by a configuration file in '$HOME/.memgraph/config', so you can -# keep this file intact. Additional configuration can be specified in a file -# pointed to by 'MEMGRAPH_CONFIG' environment variable or by passing arguments -# on the command line. -# -# Each configuration setting is of the form: '--setting-name=value'. - -## Database - -# IP address the server should listen on. ---interface=0.0.0.0 - -# Port the server should listen on. ---port=7687 - -# Path to a SSL certificate file that should be used. ---cert-file=/etc/memgraph/ssl/cert.pem - -# Path to a SSL key file that should be used. ---key-file=/etc/memgraph/ssl/key.pem - -# Number of workers used by the Memgraph server. By default, this will be the -# number of processing units available on the machine. -# --num-workers=8 - -# Interval, in seconds, when the garbage collection (GC) should run. GC is used -# for releasing memory that is no longer needed. For example, deleted graph -# elements which cannot be seen by any running or new transactions. If set to -# -1 the GC will never run (use with caution, memory will never get released). ---gc-cycle-sec=30 - -# If Memgraph detects there is less available RAM than the given number in MB, -# it will log a warning. ---memory-warning-threshold=1024 - -# The telemetry collects data about the machine that is executing the database -# (CPU, Memory, OS and Kernel Information) and data about the database runtime -# (CPU usage, Memory usage, Vertices and Edges count). It is used to provide a -# better product, easy to disable and does not collect any sensitive data. ---telemetry-enabled=true - -# Memgraph offers an option to store a certain amount of data on a disk. More -# precisely, the user can pass a list of properties they wish to keep stored on -# a disk. The property names have to be separated with a comma. An example -# would be --properties-on-disk=biography,summary. -#--properties-on-disk= - -## Query -# -# Various settings related to openCypher query execution. - -# Maximum allowed query execution time, in seconds. Any queries exceeding this -# limit will be aborted. Setting to -1 removes the limit. ---query-execution-time-sec=30 - -# Cache generated query execution plans. This speeds up planning repeated -# queries which produce multiple complex execution plans. The downside is that -# some executions may use inferior plans if the database state changed. To -# disable caching, set to false. -#--query-plan-cache=false - -# Time to live for cached query plans, in seconds. This tries to minimize the -# downside of caching by evicting old plans after the given time. -#--query-plan-cache-ttl=60 - -## Durability -# -# Memgraph can store database state to persistent storage. Two mechanisms -# are used: snapshots store the total current database state while write-ahead -# logs store small changes incrementally. They are used in tandem to provide -# fast and storage-efficient persistence. Some aspects of snapshot taking -# and write-ahead logging are configurable. ---durability-enabled=true - -# Path to the directory where snapshots and write-ahead log files will be stored. ---durability-directory=/var/lib/memgraph/durability - -# Recover the database on startup. ---db-recover-on-startup=true - -# Interval of taking snapshots, in seconds. If set to -1, the snapshot feature -# will be turned off. ---snapshot-cycle-sec=300 - -# Create a snapshot when closing Memgraph. ---snapshot-on-exit=true - -# Maximum number of kept snapshots. Old snapshots will be deleted to make room -# for new ones. If set to -1, the number of kept snapshots is unlimited. ---snapshot-max-retained=3 - -# Specifies whether WAL updates should be written on disk immediately after a -# transaction finishes. Setting this parameter to false does introduce risk of -# database inconsistency because an operating system or hardware crash might -# lead to missing transactions in the write-ahead log, but the database will -# handle this as if those transactions never happened. Turning -# synchronous-commit off can be a useful trade-off between exact durability and -# performance. ---synchronous-commit=false - -## Logging - -# Path to where the log should be stored. ---log-file=/var/log/memgraph/memgraph.log - -# If true, log messages will go to stderr in addition to logfiles. -#--also-log-to-stderr=true - -## Additional Configuration Inclusion - -# Include additional configuration from this file. Settings with the same name -# will override previously read values. Note, that reading the configuration, -# which called '--flag-file' will continue after inclusion. Therefore, settings -# after '--flag-file' may override the included ones. -#--flag-file=another.conf - diff --git a/config/flags.yaml b/config/flags.yaml new file mode 100644 index 000000000..37a57ab93 --- /dev/null +++ b/config/flags.yaml @@ -0,0 +1,97 @@ +header: >- + Memgraph Configuration + + This is the main configuration file for Memgraph. You can modify this file to + suit your specific needs. Additional configuration can be specified by + including another configuration file, in a file pointed to by the + 'MEMGRAPH_CONFIG' environment variable or by passing arguments on the command + line. + + Each configuration setting is in the form: '--setting-name=value'. + +footer: >- + Additional Configuration Inclusion + + You can include additional configuration files from this file. Additional + files are processed after this file. Settings that are set in the additional + files will override previously set values. Additional configuration files are + specified with the '--flag-file' flag. + + Example: + + --flag-file=another.conf + +modifications: + + # Each modification should consist of the following parameters: + # * name: the name of the flag that should be modified (with underscores) + # [string] + # * value: the value that should be set instead of the binary provided + # default value [string] + # * override: set to `true` to uncomment the config option by default + # [boolean] + + - name: "data_directory" + value: "/var/lib/memgraph" + override: true + + - name: "log_file" + value: "/var/log/memgraph/memgraph.log" + override: true + + - name: "bolt_cert_file" + value: "/etc/memgraph/ssl/cert.pem" + override: true + + - name: "bolt_key_file" + value: "/etc/memgraph/ssl/key.pem" + override: true + + - name: "bolt_num_workers" + value: "" + override: false + + - name: "storage_recover_on_startup" + value: "true" + override: true + + - name: "storage_snapshot_interval_sec" + value: "300" + override: true + + - name: "storage_snapshot_on_exit" + value: "true" + override: true + + - name: "storage_snapshot_retention_count" + value: "3" + override: true + + - name: "storage_wal_enabled" + value: "true" + override: true + + - name: "telemetry_enabled" + value: "true" + override: true + + - name: "query_modules_directory" + value: "/usr/lib/memgraph/query_modules" + override: true + + - name: "auth_module_executable" + value: "/usr/lib/memgraph/auth_module/example.py" + override: false + +undocumented: + - "flag_file" + - "log_file_mode" + - "log_link_basename" + - "log_prefix" + - "max_log_size" + - "min_log_level" + - "help" + - "help_xml" + - "stderr_threshold" + - "stop_logging_if_full_disk" + - "version" diff --git a/config/generate.py b/config/generate.py new file mode 100755 index 000000000..863c0017e --- /dev/null +++ b/config/generate.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +import argparse +import copy +import os +import subprocess +import sys +import textwrap + +import xml.etree.ElementTree as ET + +import yaml + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +CONFIG_FILE = os.path.join(SCRIPT_DIR, "flags.yaml") +WIDTH = 80 + + +def wrap_text(s, initial_indent="# "): + return "\n#\n".join( + map(lambda x: textwrap.fill(x, WIDTH, initial_indent=initial_indent, + subsequent_indent="# "), s.split("\n"))) + + +def extract_flags(binary_path): + ret = {} + data = subprocess.run([binary_path, "--help-xml"], + stdout=subprocess.PIPE).stdout.decode("utf-8") + root = ET.fromstring(data) + for child in root: + if child.tag == "usage" and child.text.lower().count("warning"): + raise Exception("You should set the usage message!") + if child.tag == "flag": + flag = {} + for elem in child: + flag[elem.tag] = elem.text if elem.text is not None else "" + flag["override"] = False + ret[flag["name"]] = flag + return ret + + +def apply_config_to_flags(config, flags): + flags = copy.deepcopy(flags) + for name in config["undocumented"]: + flags.pop(name) + for modification in config["modifications"]: + name = modification["name"] + if name not in flags: + print("WARNING: Flag '" + name + "' missing from binary!", + file=sys.stderr) + continue + flags[name]["default"] = modification["value"] + flags[name]["override"] = modification["override"] + return flags + + +def extract_sections(flags): + sections = [] + other = [] + current_section = "" + current_flags = [] + for name in sorted(flags.keys()): + section = name.split("_")[0] + if section == current_section: + current_flags.append(name) + else: + if len(current_flags) < 2: + other.extend(current_flags) + else: + sections.append((current_section, current_flags)) + current_section = section + current_flags = [name] + if len(current_flags) < 2: + other.extend(current_flags) + else: + sections.append((current_section, current_flags)) + sections.append(("other", other)) + assert set(sum(map(lambda x: x[1], sections), [])) == set(flags.keys()), \ + "The section extraction algorithm lost some flags!" + return sections + + +def generate_config_file(sections, flags): + ret = wrap_text(config["header"]) + "\n\n\n" + for section, section_flags in sections: + ret += wrap_text(section.capitalize(), initial_indent="## ") + "\n\n" + for name in section_flags: + flag = flags[name] + helpstr = flag["meaning"] + " [" + flag["type"] + "]" + ret += wrap_text(helpstr) + "\n" + prefix = "# " if not flag["override"] else "" + ret += prefix + "--" + flag["name"].replace("_", "-") + \ + "=" + flag["default"] + "\n\n" + ret += "\n" + ret += wrap_text(config["footer"]) + return ret.strip() + "\n" + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("memgraph_binary", + help="path to Memgraph binary") + parser.add_argument("output_file", + help="path where to store the generated Memgraph " + "configuration file") + parser.add_argument("--config-file", default=CONFIG_FILE, + help="path to generator configuration file") + + args = parser.parse_args() + flags = extract_flags(args.memgraph_binary) + + with open(args.config_file) as f: + config = yaml.safe_load(f) + + flags = apply_config_to_flags(config, flags) + sections = extract_sections(flags) + data = generate_config_file(sections, flags) + + dirname = os.path.dirname(args.output_file) + if dirname and not os.path.exists(dirname): + os.makedirs(dirname) + + with open(args.output_file, "w") as f: + f.write(data) diff --git a/init b/init index 7bbde1d31..e0f41d36b 100755 --- a/init +++ b/init @@ -8,6 +8,7 @@ required_pkgs=(git arcanist # source code control libssl-dev libseccomp-dev python3 python-virtualenv python3-pip # for qa, macro_benchmark and stress tests + python3-yaml # for the configuration generator uuid-dev # mg-utils libcurl4-openssl-dev # mg-requests sbcl # for custom Lisp C++ preprocessing diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 616058004..2999a606d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -290,15 +290,25 @@ set_target_properties(memgraph PROPERTIES add_custom_command(TARGET memgraph POST_BUILD COMMAND ${CMAKE_COMMAND} -E create_symlink $ ${CMAKE_BINARY_DIR}/memgraph BYPRODUCTS ${CMAKE_BINARY_DIR}/memgraph - COMMENT Creating symlink to memgraph executable) + COMMENT "Creating symlink to memgraph executable") # Strip the executable in release build. if (lower_build_type STREQUAL "release") add_custom_command(TARGET memgraph POST_BUILD COMMAND strip -s $ - COMMENT Stripping symbols and sections from memgraph) + COMMENT "Stripping symbols and sections from memgraph") endif() +# Generate the configuration file. +add_custom_command(TARGET memgraph POST_BUILD + COMMAND ${CMAKE_SOURCE_DIR}/config/generate.py + ${CMAKE_BINARY_DIR}/memgraph + ${CMAKE_BINARY_DIR}/config/memgraph.conf + DEPENDS ${CMAKE_SOURCE_DIR}/config/generate.py + ${CMAKE_SOURCE_DIR}/config/flags.yaml + BYPRODUCTS ${CMAKE_BINARY_DIR}/config/memgraph.conf + COMMENT "Generating memgraph configuration file") + # Everything here is under "memgraph" install component. set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "memgraph") @@ -315,7 +325,7 @@ install(PROGRAMS $ install(FILES ${CMAKE_SOURCE_DIR}/include/mg_procedure.h DESTINATION include/memgraph) # Install the config file (must use absolute path). -install(FILES ${CMAKE_SOURCE_DIR}/config/community.conf +install(FILES ${CMAKE_BINARY_DIR}/config/memgraph.conf DESTINATION /etc/memgraph RENAME memgraph.conf) # Install logrotate configuration (must use absolute path). install(FILES ${CMAKE_SOURCE_DIR}/release/logrotate.conf @@ -355,4 +365,4 @@ set_target_properties(memgraph_ha PROPERTIES add_custom_command(TARGET memgraph_ha POST_BUILD COMMAND ${CMAKE_COMMAND} -E create_symlink $ ${CMAKE_BINARY_DIR}/memgraph_ha BYPRODUCTS ${CMAKE_BINARY_DIR}/memgraph_ha - COMMENT Creating symlink to memgraph single node high availability executable) + COMMENT "Creating symlink to memgraph single node high availability executable") diff --git a/src/memgraph.cpp b/src/memgraph.cpp index b08c3b059..0b3cca741 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -104,7 +104,7 @@ DEFINE_uint64(query_execution_timeout_sec, 180, DEFINE_VALIDATED_string( query_modules_directory, "", - "Directory where modules with custom query procedures are stored", { + "Directory where modules with custom query procedures are stored.", { if (value.empty()) return true; if (utils::DirExists(value)) return true; std::cout << "Expected --" << flagname << " to point to a directory." @@ -116,8 +116,6 @@ using ServerT = communication::Server; using communication::ServerContext; void SingleNodeMain() { - google::SetUsageMessage("Memgraph single-node database server"); - // All enterprise features should be constructed before the main database // storage. This will cause them to be destructed *after* the main database // storage. That way any errors that happen during enterprise features @@ -247,4 +245,7 @@ void SingleNodeMain() { query::procedure::gModuleRegistry.UnloadAllModules(); } -int main(int argc, char **argv) { return WithInit(argc, argv, SingleNodeMain); } +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph database server"); + return WithInit(argc, argv, SingleNodeMain); +} diff --git a/src/memgraph_ha.cpp b/src/memgraph_ha.cpp index 06aedc30b..70faf8f62 100644 --- a/src/memgraph_ha.cpp +++ b/src/memgraph_ha.cpp @@ -40,9 +40,6 @@ using ServerT = communication::Server; using communication::ServerContext; void SingleNodeHAMain() { - google::SetUsageMessage( - "Memgraph high availability single-node database server"); - auto durability_directory = std::filesystem::path(FLAGS_durability_directory); database::GraphDb db; @@ -77,5 +74,6 @@ void SingleNodeHAMain() { } int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph high availability database server"); return WithInit(argc, argv, SingleNodeHAMain); } diff --git a/src/query/plan/variable_start_planner.cpp b/src/query/plan/variable_start_planner.cpp index b85cfef9b..3350cd552 100644 --- a/src/query/plan/variable_start_planner.cpp +++ b/src/query/plan/variable_start_planner.cpp @@ -8,7 +8,7 @@ #include "utils/flag_validation.hpp" DEFINE_VALIDATED_HIDDEN_uint64( - query_max_plans, 1000U, "Maximum number of generated plans for a query", + query_max_plans, 1000U, "Maximum number of generated plans for a query.", FLAG_IN_RANGE(1, std::numeric_limits::max())); namespace query::plan::impl {