From 00a6a5fa3c1964737ef233168568cd437599bc5b Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 3 Feb 2025 23:15:55 +0100 Subject: [PATCH 01/11] Added other_engines folder to src for non-qlever engine scripts and some Configfiles --- pyproject.toml | 3 +- src/other_engines/Configfiles/Configfile.dblp | 20 +++++++++++++ .../Configfiles/Configfile.default | 29 +++++++++++++++++++ src/other_engines/Configfiles/Configfile.imdb | 20 +++++++++++++ .../Configfiles/Configfile.olympics | 16 ++++++++++ src/other_engines/__init__.py | 0 src/other_engines/engines/__init__.py | 0 7 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 src/other_engines/Configfiles/Configfile.dblp create mode 100644 src/other_engines/Configfiles/Configfile.default create mode 100644 src/other_engines/Configfiles/Configfile.imdb create mode 100644 src/other_engines/Configfiles/Configfile.olympics create mode 100644 src/other_engines/__init__.py create mode 100644 src/other_engines/engines/__init__.py diff --git a/pyproject.toml b/pyproject.toml index 87238c2c..6c65e507 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,10 +28,11 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" "qlever-old" = "qlever.qlever_old:main" +"qoxigraph" = "other_engines.main:main" [tool.setuptools] license-files = ["LICENSE"] -package-data = { "qlever" = ["Qleverfiles/*"] } +package-data = { "qlever" = ["Qleverfiles/*"], "qoxigraph" = ["Configfiles/*"] } [tool.pytest.ini_options] pythonpath = ["src"] diff --git a/src/other_engines/Configfiles/Configfile.dblp b/src/other_engines/Configfiles/Configfile.dblp new file mode 100644 index 00000000..bdf40982 --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.dblp @@ -0,0 +1,20 @@ +# Configfile for DBLP, use with https://github.com/ad-freiburg/qlever-control + +[data] +NAME = dblp +DATA_TARFILE = dblp_KG_with_associated_data.tar +GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE} +GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE} +VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE") +DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL} (version ${VERSION}) +FORMAT = ttl + +[index] +INPUT_FILES = *.gz + +[server] +PORT = 7015 + +[runtime] +SYSTEM = docker + diff --git a/src/other_engines/Configfiles/Configfile.default b/src/other_engines/Configfiles/Configfile.default new file mode 100644 index 00000000..2e622560 --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.default @@ -0,0 +1,29 @@ +# Default Configfile, use with https://github.com/ad-freiburg/qlever-control +# +# If you have never seen a Configfile before, we recommend that you first look +# at the example Configfiles on http://qlever.cs.uni-freiburg.de/qlever-control/ +# src/other_engines/Configfiles . Or execute `q{engine_name} setup-config ` on the +# command line to obtain the example Configfiles for . + +# As a minimum, each dataset needs a name. If you want `q{engine_name} get-data` to do +# something meaningful, you need to define GET_DATA_CMD. Otherwise, you need to +# generate (or download or copy from somewhere) the input files yourself. Each +# dataset should have a short DESCRIPTION, ideally with a date. +[data] +NAME = +GET_DATA_CMD = +DESCRIPTION = + +# The format for INPUT_FILES should be such that `ls ${INPUT_FILES}` lists all +# input files. +[index] +INPUT_FILES = *.ttl + +# The server listens on PORT. +[server] +PORT = 8888 + +# Use SYSTEM = docker to run inside a docker container +[runtime] +SYSTEM = docker + diff --git a/src/other_engines/Configfiles/Configfile.imdb b/src/other_engines/Configfiles/Configfile.imdb new file mode 100644 index 00000000..acb1ae39 --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.imdb @@ -0,0 +1,20 @@ +# Configfile for IMDB, use with https://github.com/ad-freiburg/qlever-control + +[data] +NAME = imdb +IMDB_DATA_URL = https://datasets.imdbws.com +GET_PREFIXES = echo "@prefix imdb: ." +GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE} +GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE} +GET_DATA_CMD = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl +DESCRIPTION = RDF data derived from ${IMDB_DATA_URL} + +[index] +INPUT_FILES = ${data:NAME}.ttl + +[server] +PORT = 7029 + +[runtime] +SYSTEM = docker + diff --git a/src/other_engines/Configfiles/Configfile.olympics b/src/other_engines/Configfiles/Configfile.olympics new file mode 100644 index 00000000..8b80d06f --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.olympics @@ -0,0 +1,16 @@ +# Configfile for Olympics, use with https://github.com/ad-freiburg/qlever-control + +[data] +NAME = olympics +BASE_URL = https://github.com/wallscope/olympics-rdf +GET_DATA_CMD = curl -sLo olympics.zip -C - ${BASE_URL}/raw/master/data/olympics-nt-nodup.zip && unzip -q -o olympics.zip && rm olympics.zip +DESCRIPTION = 120 Years of Olympics, data from ${BASE_URL} + +[index] +INPUT_FILES = olympics.nt + +[server] +PORT = 7887 + +[runtime] +SYSTEM = docker diff --git a/src/other_engines/__init__.py b/src/other_engines/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/other_engines/engines/__init__.py b/src/other_engines/engines/__init__.py new file mode 100644 index 00000000..e69de29b From 6cb8cc93f93ecaeb8e0e33d8b13bc53f4e1ceabd Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 3 Feb 2025 23:17:26 +0100 Subject: [PATCH 02/11] main function to call the correct command for the specified engine --- src/other_engines/main.py | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 src/other_engines/main.py diff --git a/src/other_engines/main.py b/src/other_engines/main.py new file mode 100644 index 00000000..1c95931e --- /dev/null +++ b/src/other_engines/main.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# PYTHON_ARGCOMPLETE_OK + +# Copyright 2024, University of Freiburg, +# Chair of Algorithms and Data Structures +# Author: Tanmay Garg + +from __future__ import annotations + +import sys +import traceback +from importlib import import_module +from pathlib import Path + +from termcolor import colored + +from other_engines.config import ArgumentsManager +from qlever.config import ConfigException +from qlever.log import log, log_levels + + +def main(): + selected_engine = Path(sys.argv[0]).stem[1:] + engine_class_name = selected_engine.capitalize() + module_path = f"other_engines.engines.{selected_engine}" + try: + module = import_module(module_path) + except ImportError as e: + raise Exception( + f"Could not import module {module_path} " + f"for engine {selected_engine}: {e}" + ) + + engine_class = getattr(module, engine_class_name)() + + # Parse the command line arguments and read the Configfile + try: + engine_config = ArgumentsManager(engine=engine_class) + args = engine_config.parse_args() + except ConfigException as e: + log.error(e) + log.info("") + log.info(traceback.format_exc()) + exit(1) + + # Execute the command. + command = f"{args.command.replace('-', '_')}_command" + log.setLevel(log_levels[args.log_level]) + try: + log.info("") + log.info(colored(f"Command: {command}", attrs=["bold"])) + log.info("") + commandWasSuccesful = getattr(engine_class, command)(args) + log.info("") + if not commandWasSuccesful: + exit(1) + except KeyboardInterrupt: + log.info("") + log.info("Ctrl-C pressed, exiting ...") + log.info("") + exit(1) + except Exception as e: + log.error(f"An unexpected error occurred: {e}") + log.info("") + log.info(traceback.format_exc()) + exit(1) From e3466430ad1f8a3553182a0041a6208dcd0fcec0 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 3 Feb 2025 23:18:07 +0100 Subject: [PATCH 03/11] ArgumentsManager class to handle Configfile and command line arguments for specified engine and command --- src/other_engines/config.py | 232 ++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 src/other_engines/config.py diff --git a/src/other_engines/config.py b/src/other_engines/config.py new file mode 100644 index 00000000..fd5080ca --- /dev/null +++ b/src/other_engines/config.py @@ -0,0 +1,232 @@ +from __future__ import annotations + +import argparse +import os +from pathlib import Path + +import argcomplete +from termcolor import colored + +from other_engines.engine import SparqlEngine +from qlever.containerize import Containerize +from qlever.log import log, log_levels +from qlever.qleverfile import Qleverfile + + +def all_arguments(): + """ + Take all existing arguments from Qleverfile + and add/replace the ones that are new/different from QLever + """ + + def arg(*args, **kwargs): + return (args, kwargs) + + all_args = Qleverfile.all_arguments() + all_args["runtime"]["system"] = arg( + "--system", + type=str, + choices=Containerize.supported_systems(), + default="docker", + help=( + "Which system to use to run commands like `index` " + "or `start` in a container" + ), + ) + all_args["runtime"]["index_container"] = arg( + "--index-container", + type=str, + help="The name of the container used by the index command", + ) + all_args["runtime"]["server_container"] = arg( + "--server-container", + type=str, + help="The name of the container used by the start command", + ) + return all_args + + +class ArgumentsManager: + SPECIAL_ARGS = ["image", "index_container", "server_container"] + + def __init__(self, engine: SparqlEngine) -> None: + self.engine = engine + self.engine_name = engine.engine_name + self.commands = engine.commands + + def get_default_config_value(self, arg_name: str, config): + """ + Get default values for SPECIAL_ARGS + """ + name = config["data"]["name"] + if arg_name == "image": + return self.engine.image + if arg_name == "index_container": + return f"{self.engine_name.lower()}.index.{name}" + if arg_name == "server_container": + return f"{self.engine_name.lower()}.server.{name}" + + def add_subparser_for_command( + self, + subparsers, + command_name: str, + description: str, + config=None, + ) -> None: + """ + Add subparser for the given command. Take the arguments from + `self.engine.get_config_arguments()` and report an error if + one of them is not contained in `all_arguments`. Overwrite the + default values with the values from `config` if specified. + """ + + arg_names = self.engine.get_config_arguments(command_name) + all_configfile_args = all_arguments() + + def argument_error(prefix: str): + log.info("") + log.error( + f"{prefix} in `other_engines.configfile.all_arguments()` " + f"for command `{command_name}`" + ) + log.info("") + log.info( + f"Value of `get_config_arguments_for_command` " + f"`{command_name}`:" + ) + log.info("") + log.info(f"{arg_names}") + log.info("") + exit(1) + + # Add the subparser. + subparser = subparsers.add_parser( + command_name, description=description, help=description + ) + + # Add the arguments relevant for the command. + for section in arg_names: + if section not in all_configfile_args: + argument_error(f"Section `{section}` not found") + for arg_name in arg_names[section]: + if arg_name not in all_configfile_args[section]: + argument_error( + f"Argument `{arg_name}` of section " + f"`{section}` not found" + ) + args, kwargs = all_configfile_args[section][arg_name] + kwargs_copy = kwargs.copy() + # If `configfile_config` is given, add info about default + # values to the help string. + if config is not None: + default_value = kwargs.get("default", None) + config_value = ( + config.get(section, arg_name, fallback=None) + if arg_name not in self.SPECIAL_ARGS + else self.get_default_config_value(arg_name, config) + ) + if config_value is not None: + kwargs_copy["default"] = config_value + kwargs_copy["required"] = False + kwargs_copy["help"] += ( + f" [default, from {self.engine_name}file: " + f"{config_value}]" + ) + else: + kwargs_copy["help"] += f" [default: {default_value}]" + subparser.add_argument(*args, **kwargs_copy) + + # Additional arguments that are shared by all commands. + self.engine.additional_arguments(command_name, subparser) + subparser.add_argument( + "--show", + action="store_true", + default=False, + help="Only show what would be executed, but don't execute it", + ) + subparser.add_argument( + "--log-level", + choices=log_levels.keys(), + default="INFO", + help="Set the log level", + ) + + def parse_args(self): + # Determine whether we are in autocomplete mode or not. + autocomplete_mode = "COMP_LINE" in os.environ + + # Check if the user has registered this script for argcomplete. + argcomplete_check_off = os.environ.get( + f"{self.engine_name.upper()}_ARGCOMPLETE_CHECK_OFF" + ) + argcomplete_enabled = os.environ.get( + f"{self.engine_name.upper()}_ARGCOMPLETE_ENABLED" + ) + if not argcomplete_enabled and not argcomplete_check_off: + log.info("") + log.warn( + f"To enable autocompletion, run the following command, " + f"and consider adding it to your `.bashrc` or `.zshrc`:" + f"\n\n" + f'eval "$(register-python-argcomplete q{self.engine_name.lower()})"' + f" && export {self.engine_name.upper()}_ARGCOMPLETE_ENABLED=1" + ) + log.info("") + + configfile_path = Path(f"{self.engine_name}file") + configfile_exists = configfile_path.is_file() + + if configfile_exists and not autocomplete_mode: + try: + config = Qleverfile.read_qleverfile(configfile_path) + except Exception as e: + log.info("") + log.error( + f"Error parsing {self.engine_name}file `{configfile_path}`" + f": {e}" + ) + log.info("") + exit(1) + else: + config = None + + parser = argparse.ArgumentParser( + description=colored( + f"This is the q{self.engine_name.lower()} command line tool, " + f"it's all you need to work with {self.engine_name} in a " + f"{' or '.join(Containerize.supported_systems())} " + "container environment", + attrs=["bold"], + ) + ) + subparsers = parser.add_subparsers(dest="command") + subparsers.required = True + for command_name, description in self.commands.items(): + self.add_subparser_for_command( + subparsers=subparsers, + command_name=command_name, + description=description, + config=config, + ) + + argcomplete.autocomplete(parser, always_complete_options="long") + + # If called without arguments, show the help message. + if len(os.sys.argv) == 1: + parser.print_help() + exit(0) + + args = parser.parse_args() + + # If the command says that we should have a Qleverfile, but we don't, + # issue a warning. + if self.engine.command_should_have_configfile(args.command): + if not configfile_exists: + log.warning( + f"Invoking command `{args.command}` without a " + f"{self.engine_name}file. You have to specify all " + "required arguments on the command line. " + "This is possible, but not recommended." + ) + + return args From 707f60d5d6867b8a7285d27d9ca3549404b21a4f Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 3 Feb 2025 23:18:33 +0100 Subject: [PATCH 04/11] Base class with commands for SparqlEngine --- src/other_engines/engine.py | 321 ++++++++++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 src/other_engines/engine.py diff --git a/src/other_engines/engine.py b/src/other_engines/engine.py new file mode 100644 index 00000000..658e6fec --- /dev/null +++ b/src/other_engines/engine.py @@ -0,0 +1,321 @@ +from __future__ import annotations + +import inspect +import re +import subprocess +from abc import ABC, abstractmethod +from pathlib import Path + +from termcolor import colored + +from qlever.commands.example_queries import ExampleQueriesCommand +from qlever.commands.get_data import GetDataCommand +from qlever.commands.query import QueryCommand +from qlever.commands.stop import stop_container +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import get_random_string + + +class SparqlEngine(ABC): + """ + A base class for SparqlEngine != QLever + The class holds engine_name and a dict {command_name: command_description} + The command names and description are automatically taken from functions + that end with "_command" and their docstring. + !! Make sure to have trailing __command only for functions that represent + a command being executed. + Most common functions that are shared between different engines are + implemented here, but can be overriden in child classes. + Only container based setup is supported for now! + """ + + def __init__(self, engine_name: str) -> None: + self.engine_name = engine_name + self.commands = self.get_command_dict() + self.configfiles_path = Path(__file__).parent / "Configfiles" + self.configfile_path = Path(f"{self.engine_name}file") + + def get_command_dict(self) -> dict[str, str]: + """ + Get a dictionary for all commands supported by this Engine + {command_name: command_description} + Command name is taken from command functions without the "_command" + Command description is taken from the command function docstring. + """ + command_dict = {} + for name, method in inspect.getmembers( + self.__class__, predicate=inspect.isfunction + ): + if name.endswith("_command"): + docstring = inspect.getdoc(getattr(self.__class__, name)) or "" + clean_docstring = re.sub(r"\s+", " ", docstring.strip()) + clean_docstring = clean_docstring.replace( + "Configfile", f"{self.engine_name}file" + ) + command_name = name[: -len("_command")].replace("_", "-") + command_dict[command_name] = clean_docstring + return command_dict + + def command_should_have_configfile(self, command: str) -> bool: + """ + Return `True` if the command should have a Configfile, `False` + otherwise. If a command should have a Configfile, but none is + specified, the command can still be executed if all the required + arguments are specified on the command line, but there will be warning. + """ + cmds_that_need_configfile = [ + "get-data", + "index", + "start", + "stop", + "log", + ] + return command in cmds_that_need_configfile + + @abstractmethod + def get_config_arguments(self, command: str) -> dict[str : list[str]]: + """ + Return the arguments relevant for the passed command. This must be a + subset of the names of `all_arguments` defined in configfile.py. + Only these arguments can then be used in the respective command method. + """ + if command in ("example-queries", "query"): + return {"server": ["port"]} + return None + + def additional_arguments(self, command: str, subparser) -> None: + """ + Add additional command-specific arguments (which are not in + `configfile.all_arguments` and cannot be specified in the Configfile) + to the given `subparser`. + """ + configfile_names = [ + p.name.split(".")[1] + for p in self.configfiles_path.glob("Configfile.*") + ] + if command == "setup-config": + subparser.add_argument( + "config_name", + type=str, + choices=configfile_names, + nargs="?", + default="default", + help=( + f"The name of the pre-configured {self.engine_name}" + "file to create [default = default]" + ), + ) + if command == "log": + subparser.add_argument( + "--tail-num-lines", + type=int, + default=20, + help=( + "Show this many of the last lines of the log " + "file [default = 20]" + ), + ) + subparser.add_argument( + "--from-beginning", + action="store_true", + default=False, + help="Show all lines of the log file [default = False]", + ) + subparser.add_argument( + "--no-follow", + action="store_true", + default=False, + help="Don't follow the log file [default = False]", + ) + if command == "example-queries": + subparser.add_argument( + "--ui_config", + type=str, + choices=configfile_names, + nargs="?", + default="default", + help=( + "The name of the pre-configured QLever ui_config " + "to use to get example queries [default = default]" + ), + ) + ExampleQueriesCommand().additional_arguments(subparser) + if command == "query": + subparser.add_argument( + "--access-token", + type=str, + help=( + "QLever access_token to send privileged commands " + "to the server" + ), + ) + QueryCommand().additional_arguments(subparser) + + def show(self, command_description: str, only_show: bool = False): + """ + Helper function that shows the command line or description of an + action, together with an explanation. + """ + + log.info(colored(command_description, "blue")) + log.info("") + if only_show: + log.info( + f'You called "q{self.engine_name.lower()} ... --show", ' + "therefore the command is only shown, but not executed " + '(omit the "--show" to execute it)' + ) + + def setup_config_command(self, args) -> bool: + """ + Get a pre-configured Configfile for the given engine and config_name + """ + # Construct the command line and show it. + configfile_path = ( + self.configfiles_path / f"Configfile.{args.config_name}" + ) + setup_config_cmd = ( + f"cat {configfile_path}" + f" | sed -E 's/(^ACCESS_TOKEN.*)/\\1_{get_random_string(12)}/'" + ) + setup_config_cmd += f"> {self.engine_name}file" + self.show(setup_config_cmd, only_show=args.show) + if args.show: + return True + + # If there is already a Configfile in the current directory, exit. + if self.configfile_path.is_file(): + log.error( + f"`{self.engine_name}file` already exists in current directory" + ) + log.info("") + log.info( + f"If you want to create a new {self.engine_name}file using " + f"`q{self.engine_name.lower()} setup-config`, " + f"delete the existing {self.engine_name}file first" + ) + return False + + # Copy the Configfile to the current directory. + try: + subprocess.run( + setup_config_cmd, + shell=True, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + ) + except Exception as e: + log.error( + f'Could not copy "{configfile_path}" to current directory: {e}' + ) + return False + + # If we get here, everything went well. + log.info( + f'Created {self.engine_name}file for config "{args.config_name}"' + f" in current directory" + ) + return True + + def get_data_command(self, args) -> bool: + """ + Get data using the GET_DATA_CMD in the Configfile + """ + GetDataCommand.show = self.show + return GetDataCommand().execute(args) + + def log_command(self, args) -> bool: + """ + Show the last lines of the index/server container log and follow it + """ + system = args.system + index_container = args.index_container + server_container = args.server_container + + log_cmd = f"{system} logs " + if not args.from_beginning: + log_cmd += f"-n {args.tail_num_lines} " + if not args.no_follow: + log_cmd += "-f " + if Containerize().is_running(system, index_container): + log_cmd += index_container + active_ps = "index" + elif Containerize().is_running(system, server_container): + log_cmd += server_container + active_ps = "start" + else: + log_cmd = None + + if log_cmd is None: + log.info( + f"No running index or start {system} container found!" + f"Are you sure you called `q{self.engine_name.lower()} index` " + f"or `q{self.engine_name.lower()} start` " + "and have a process running?" + ) + return False + + # Show the command line. + self.show(log_cmd, only_show=args.show) + if args.show: + return True + + log.info( + f"Showing logs for {active_ps} command. Press Ctrl-C to stop " + f"following (will not stop the {active_ps} process)" + ) + + # run_command(log_cmd, show_output=True) + subprocess.run( + log_cmd, + shell=True, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + ) + return True + + @abstractmethod + def index_command(self) -> bool: + """ + Build the index for a given RDF dataset + """ + pass + + @abstractmethod + def start_command(self) -> bool: + """ + Start the server for given Engine + """ + pass + + def stop_command(self, args) -> bool: + """ + Stop the server by stopping and removing the server container + """ + server_container = args.server_container + + description = f"Checking for container with name {server_container}" + self.show(description, only_show=args.show) + if args.show: + return True + + # First check if container is running and if yes, stop and remove it + if stop_container(server_container): + return True + + def example_queries_command(self, args) -> bool: + """ + Execute queries against a SPARQL endpoint and get runtime information + """ + ExampleQueriesCommand.show = self.show + return ExampleQueriesCommand().execute(args) + + def query_command(self, args) -> bool: + """ + Send a query to a SPARQL endpoint + """ + QueryCommand.show = self.show + return QueryCommand().execute(args) From 0ad9da69b03bda85d494dae99f992077a88bb2dd Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 3 Feb 2025 23:18:52 +0100 Subject: [PATCH 05/11] Oxigraph class with index and start commands --- src/other_engines/engines/oxigraph.py | 194 ++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 src/other_engines/engines/oxigraph.py diff --git a/src/other_engines/engines/oxigraph.py b/src/other_engines/engines/oxigraph.py new file mode 100644 index 00000000..4d5af3ef --- /dev/null +++ b/src/other_engines/engines/oxigraph.py @@ -0,0 +1,194 @@ +import glob +import shlex +from pathlib import Path + +from other_engines.engine import SparqlEngine +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class Oxigraph(SparqlEngine): + def __init__(self) -> None: + super().__init__(engine_name="Oxigraph") + self.image = "ghcr.io/oxigraph/oxigraph" + + def get_config_arguments(self, command: str) -> dict[str : list[str]]: + config_args = super().get_config_arguments(command) + if config_args is not None: + return config_args + if command == "setup-config": + return {} + if command == "get-data": + return {"data": ["name", "get_data_cmd"], "index": ["input_files"]} + if command == "index": + return { + "data": ["name", "format"], + "index": [ + "input_files", + ], + "runtime": ["system", "image", "index_container"], + } + if command == "start": + return { + "data": ["name", "description"], + "server": [ + "host_name", + "port", + ], + "runtime": [ + "system", + "image", + "server_container", + "index_container", + ], + } + if command == "log": + return { + "data": ["name"], + "runtime": [ + "system", + "image", + "server_container", + "index_container", + ], + } + if command == "stop": + return { + "data": ["name"], + "server": ["port"], + "runtime": ["server_container"], + } + raise ValueError( + f"Couldn't fetch relevant Configfile arguments for {command}. " + f"The command must be one of {self.commands.keys()}" + ) + + def index_command(self, args) -> bool: + # Run the command in a container (if so desired). + system = args.system + input_files = args.input_files + index_container = args.index_container + index_cmd = f"load --location /index --file /index/{input_files}" + # index_cmd += f" > {dataset}.index-log.txt 2>&1" + index_cmd = Containerize().containerize_command( + cmd=index_cmd, + container_system=system, + run_subcommand="run -d --rm", + image_name=self.image, + container_name=index_container, + volumes=[("$(pwd)", "/index")], + use_bash=False, + ) + + # Show the command line. + self.show(index_cmd, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + "Did you call `qoxigraph get-data`? If you did, check " + "GET_DATA_CMD and INPUT_FILES in the Oxigraphfile" + ) + return False + + if len([p.name for p in Path.cwd().glob("*.sst")]) != 0: + log.error( + "Index files (*.sst) found in current directory " + "which shows presence of a previous index" + ) + log.info("") + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + log.info( + "Run `qoxigraph log` to see the progress of index command " + "after this command terminates" + ) + run_command(index_cmd, show_output=True) + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True + + def start_command(self, args) -> bool: + """ + Start the server for Oxigraph (requires that you have built an index with + `qoxigraph index` before) + """ + system = args.system + dataset = args.name + + # Check if index and server container still running + index_container = args.index_container + server_container = args.server_container + if Containerize().is_running(system, index_container): + log.info( + f"{system} container {index_container} is still up, " + "which means that data loading is in progress. Please wait...\n" + f"Check status of {index_container} with `qoxigraph log`" + ) + return False + + if Containerize().is_running(system, server_container): + log.info( + f"{system} container {server_container} exists, " + f"which means that server for {dataset} is already running. \n" + f"Stop the container {server_container} with `qoxigraph stop` " + "first before starting a new one." + ) + return False + + # Check if index files (*.sst) present in cwd + if len([p.name for p in Path.cwd().glob("*.sst")]) == 0: + log.info( + f"No Oxigraph index files for {dataset} found! " + "Did you call `qoxigraph index`? If you did, check if .sst " + "index files are present in current working directory." + ) + return False + + port = int(args.port) + start_cmd = "serve-read-only --location /index --bind=0.0.0.0:7878" + start_cmd = Containerize().containerize_command( + cmd=start_cmd, + container_system=system, + run_subcommand="run -d --restart=unless-stopped", + image_name=self.image, + container_name=server_container, + volumes=[("$(pwd)", "/index")], + ports=[(port, 7878)], + use_bash=False, + ) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + # Run the start command. + try: + run_command(start_cmd, show_output=True) + log.info( + "Follow the server log by running `qoxigraph log` until " + "the server is ready. (Ctrl-C stops following the log, " + "but not the server)" + ) + log.info( + f"Oxigraph server webapp for {dataset} will be available at " + f"http://localhost:{port} and the sparql endpoint for " + f"queries is http://localhost:{port}/query" + ) + except Exception as e: + log.error(f"Starting the Oxigraph server failed: {e}") + return False + + return True From 2ed845f7b7683eb3f93ca902beaadb25c708fc0a Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 3 Feb 2025 23:19:35 +0100 Subject: [PATCH 06/11] Added use_bash parameter to allow for commands that don't rely on bash in container --- src/qlever/containerize.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/qlever/containerize.py b/src/qlever/containerize.py index 42a8f13e..43ec47d2 100644 --- a/src/qlever/containerize.py +++ b/src/qlever/containerize.py @@ -40,6 +40,7 @@ def containerize_command( volumes: list[tuple[str, str]] = [], ports: list[tuple[int, int]] = [], working_directory: Optional[str] = None, + use_bash: bool = True, ) -> str: """ Get the command to run `cmd` with the given `container_system` and the @@ -80,11 +81,17 @@ def containerize_command( f"{volume_options}" f"{port_options}" f"{working_directory_option}" + f" --name {container_name}" f" --init" - f" --entrypoint bash" - f" --name {container_name} {image_name}" - f" -c {shlex.quote(cmd)}" ) + if use_bash: + containerized_cmd += ( + f" --entrypoint bash {image_name} -c {shlex.quote(cmd)}" + ) + else: + containerized_cmd += ( + f" {image_name} {cmd}" + ) return containerized_cmd @staticmethod @@ -92,7 +99,8 @@ def is_running(container_system: str, container_name: str) -> bool: # Note: the `{{{{` and `}}}}` result in `{{` and `}}`, respectively. containers = ( run_command( - f'{container_system} ps --format="{{{{.Names}}}}"', return_output=True + f'{container_system} ps --format="{{{{.Names}}}}"', + return_output=True, ) .strip() .splitlines() @@ -100,7 +108,9 @@ def is_running(container_system: str, container_name: str) -> bool: return container_name in containers @staticmethod - def stop_and_remove_container(container_system: str, container_name: str) -> bool: + def stop_and_remove_container( + container_system: str, container_name: str + ) -> bool: """ Stop the container with the given name using the given system. Return `True` if a container with that name was found and stopped, `False` From f41c6e6d12aa6139a1d56204df686c2d6a966efa Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 3 Feb 2025 23:20:14 +0100 Subject: [PATCH 07/11] Separated reading qleverfile by ConfigParser logic for use in other_engines.config --- src/qlever/qleverfile.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py index a269a889..3ff20668 100644 --- a/src/qlever/qleverfile.py +++ b/src/qlever/qleverfile.py @@ -336,17 +336,10 @@ def arg(*args, **kwargs): return all_args @staticmethod - def read(qleverfile_path): + def read_qleverfile(qleverfile_path): """ - Read the given Qleverfile (the function assumes that it exists) and - return a `ConfigParser` object with all the options and their values. - - NOTE: The keys have the same hierarchical structure as the keys in - `all_arguments()`. The Qleverfile may contain options that are not - defined in `all_arguments()`. They can be used as temporary variables - to define other options, but cannot be accessed by the commands later. + Read the Qleverfile using ConfigParser and return cleaned config """ - # Read the Qleverfile. defaults = {"random": "83724324hztz", "version": "01.01.01"} config = ConfigParser( @@ -383,6 +376,22 @@ def read(qleverfile_path): exit(1) config[section][option] = value + return config + + @staticmethod + def read(qleverfile_path): + """ + Read the given Qleverfile (the function assumes that it exists) and + return a `ConfigParser` object with all the options and their values. + + NOTE: The keys have the same hierarchical structure as the keys in + `all_arguments()`. The Qleverfile may contain options that are not + defined in `all_arguments()`. They can be used as temporary variables + to define other options, but cannot be accessed by the commands later. + """ + + config = Qleverfile.read_qleverfile(qleverfile_path) + # Make sure that all the sections are there. for section in ["data", "index", "server", "runtime", "ui"]: if section not in config: From b462ca0057b47009d554429d0c002c3f1f9bdc83 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Wed, 12 Feb 2025 01:17:40 +0100 Subject: [PATCH 08/11] Added --run-in-foreground option to oxigraph `index` and `start` and show log if run in background --- src/other_engines/config.py | 18 +++++++++ src/other_engines/engine.py | 32 +++++++++------ src/other_engines/engines/oxigraph.py | 58 +++++++++++---------------- 3 files changed, 60 insertions(+), 48 deletions(-) diff --git a/src/other_engines/config.py b/src/other_engines/config.py index fd5080ca..e21d3da4 100644 --- a/src/other_engines/config.py +++ b/src/other_engines/config.py @@ -23,6 +23,24 @@ def arg(*args, **kwargs): return (args, kwargs) all_args = Qleverfile.all_arguments() + all_args["runtime"]["index_cmd"] = arg( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the index command in the foreground " + "(default: run in the background with `docker run -d`)" + ), + ) + all_args["runtime"]["start_cmd"] = arg( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the server in the foreground " + "(default: run in the background with `docker run -d`)" + ), + ) all_args["runtime"]["system"] = arg( "--system", type=str, diff --git a/src/other_engines/engine.py b/src/other_engines/engine.py index 658e6fec..030458fe 100644 --- a/src/other_engines/engine.py +++ b/src/other_engines/engine.py @@ -167,6 +167,22 @@ def show(self, command_description: str, only_show: bool = False): '(omit the "--show" to execute it)' ) + @staticmethod + def show_container_logs(log_cmd: str, active_ps: str) -> None: + """ + Execute a container logs command and show the output for a given + active process active_ps + """ + log.info( + f"Showing logs for {active_ps} command. Press Ctrl-C to stop " + f"following (will not stop the {active_ps} process)" + ) + + try: + run_command(log_cmd, show_output=True) + except Exception as e: + log.error(f"Cannot display container logs - {e}") + def setup_config_command(self, args) -> bool: """ Get a pre-configured Configfile for the given engine and config_name @@ -235,10 +251,12 @@ def log_command(self, args) -> bool: server_container = args.server_container log_cmd = f"{system} logs " + if not args.from_beginning: log_cmd += f"-n {args.tail_num_lines} " if not args.no_follow: log_cmd += "-f " + if Containerize().is_running(system, index_container): log_cmd += index_container active_ps = "index" @@ -262,19 +280,7 @@ def log_command(self, args) -> bool: if args.show: return True - log.info( - f"Showing logs for {active_ps} command. Press Ctrl-C to stop " - f"following (will not stop the {active_ps} process)" - ) - - # run_command(log_cmd, show_output=True) - subprocess.run( - log_cmd, - shell=True, - check=True, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - ) + self.show_container_logs(log_cmd, active_ps) return True @abstractmethod diff --git a/src/other_engines/engines/oxigraph.py b/src/other_engines/engines/oxigraph.py index 4d5af3ef..c5d585ec 100644 --- a/src/other_engines/engines/oxigraph.py +++ b/src/other_engines/engines/oxigraph.py @@ -27,7 +27,7 @@ def get_config_arguments(self, command: str) -> dict[str : list[str]]: "index": [ "input_files", ], - "runtime": ["system", "image", "index_container"], + "runtime": ["system", "image", "index_container", "index_cmd"], } if command == "start": return { @@ -41,24 +41,9 @@ def get_config_arguments(self, command: str) -> dict[str : list[str]]: "image", "server_container", "index_container", + "start_cmd", ], } - if command == "log": - return { - "data": ["name"], - "runtime": [ - "system", - "image", - "server_container", - "index_container", - ], - } - if command == "stop": - return { - "data": ["name"], - "server": ["port"], - "runtime": ["server_container"], - } raise ValueError( f"Couldn't fetch relevant Configfile arguments for {command}. " f"The command must be one of {self.commands.keys()}" @@ -69,13 +54,15 @@ def index_command(self, args) -> bool: system = args.system input_files = args.input_files index_container = args.index_container + run_subcommand = "run --rm" + if not args.run_in_foreground: + run_subcommand += " -d" index_cmd = f"load --location /index --file /index/{input_files}" - # index_cmd += f" > {dataset}.index-log.txt 2>&1" index_cmd = Containerize().containerize_command( cmd=index_cmd, container_system=system, - run_subcommand="run -d --rm", - image_name=self.image, + run_subcommand=run_subcommand, + image_name=args.image, container_name=index_container, volumes=[("$(pwd)", "/index")], use_bash=False, @@ -108,11 +95,10 @@ def index_command(self, args) -> bool: # Run the index command. try: - log.info( - "Run `qoxigraph log` to see the progress of index command " - "after this command terminates" - ) run_command(index_cmd, show_output=True) + if not args.run_in_foreground: + log_cmd = f"{system} logs -f {index_container}" + self.show_container_logs(log_cmd, "index") except Exception as e: log.error(f"Building the index failed: {e}") return False @@ -121,8 +107,8 @@ def index_command(self, args) -> bool: def start_command(self, args) -> bool: """ - Start the server for Oxigraph (requires that you have built an index with - `qoxigraph index` before) + Start the server for Oxigraph (requires that you have built an index + with `qoxigraph index` before) (Runs in a container and in background) """ system = args.system dataset = args.name @@ -157,12 +143,15 @@ def start_command(self, args) -> bool: return False port = int(args.port) + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" start_cmd = "serve-read-only --location /index --bind=0.0.0.0:7878" start_cmd = Containerize().containerize_command( cmd=start_cmd, container_system=system, - run_subcommand="run -d --restart=unless-stopped", - image_name=self.image, + run_subcommand=run_subcommand, + image_name=args.image, container_name=server_container, volumes=[("$(pwd)", "/index")], ports=[(port, 7878)], @@ -177,16 +166,15 @@ def start_command(self, args) -> bool: # Run the start command. try: run_command(start_cmd, show_output=True) - log.info( - "Follow the server log by running `qoxigraph log` until " - "the server is ready. (Ctrl-C stops following the log, " - "but not the server)" - ) log.info( f"Oxigraph server webapp for {dataset} will be available at " - f"http://localhost:{port} and the sparql endpoint for " - f"queries is http://localhost:{port}/query" + f"http://{args.host_name}:{port} and the sparql endpoint for " + f"queries is http://{args.host_name}:{port}/query" ) + log.info("") + if not args.run_in_foreground: + log_cmd = f"{system} logs -f {server_container}" + self.show_container_logs(log_cmd, "server") except Exception as e: log.error(f"Starting the Oxigraph server failed: {e}") return False From 9501d32076b8d56d94b2d9a011084fbb69c85295 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Wed, 12 Feb 2025 01:27:47 +0100 Subject: [PATCH 09/11] Moved general config arguments to config.py, fixed run_command show_output and some general fixes --- src/other_engines/config.py | 3 +- src/other_engines/engine.py | 40 +++++++++++++++++++++------ src/other_engines/engines/oxigraph.py | 4 --- src/other_engines/main.py | 6 ++-- src/qlever/util.py | 2 +- 5 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/other_engines/config.py b/src/other_engines/config.py index e21d3da4..e4b29e81 100644 --- a/src/other_engines/config.py +++ b/src/other_engines/config.py @@ -83,6 +83,7 @@ def get_default_config_value(self, arg_name: str, config): return f"{self.engine_name.lower()}.index.{name}" if arg_name == "server_container": return f"{self.engine_name.lower()}.server.{name}" + return None def add_subparser_for_command( self, @@ -182,7 +183,7 @@ def parse_args(self): ) if not argcomplete_enabled and not argcomplete_check_off: log.info("") - log.warn( + log.warning( f"To enable autocompletion, run the following command, " f"and consider adding it to your `.bashrc` or `.zshrc`:" f"\n\n" diff --git a/src/other_engines/engine.py b/src/other_engines/engine.py index 030458fe..add98ace 100644 --- a/src/other_engines/engine.py +++ b/src/other_engines/engine.py @@ -14,7 +14,7 @@ from qlever.commands.stop import stop_container from qlever.containerize import Containerize from qlever.log import log -from qlever.util import get_random_string +from qlever.util import get_random_string, run_command class SparqlEngine(ABC): @@ -44,7 +44,7 @@ def get_command_dict(self) -> dict[str, str]: Command description is taken from the command function docstring. """ command_dict = {} - for name, method in inspect.getmembers( + for name, _ in inspect.getmembers( self.__class__, predicate=inspect.isfunction ): if name.endswith("_command"): @@ -80,8 +80,33 @@ def get_config_arguments(self, command: str) -> dict[str : list[str]]: subset of the names of `all_arguments` defined in configfile.py. Only these arguments can then be used in the respective command method. """ + if command == "setup-config": + return {} + + if command == "get-data": + return {"data": ["name", "get_data_cmd"], "index": ["input_files"]} + + if command == "log": + return { + "data": ["name"], + "runtime": [ + "system", + "image", + "server_container", + "index_container", + ], + } + + if command == "stop": + return { + "data": ["name"], + "server": ["port"], + "runtime": ["server_container"], + } + if command in ("example-queries", "query"): return {"server": ["port"]} + return None def additional_arguments(self, command: str, subparser) -> None: @@ -284,18 +309,18 @@ def log_command(self, args) -> bool: return True @abstractmethod - def index_command(self) -> bool: + def index_command(self, args) -> bool: """ Build the index for a given RDF dataset + (Runs in a container and in background) """ - pass @abstractmethod - def start_command(self) -> bool: + def start_command(self, args) -> bool: """ Start the server for given Engine + (Runs in a container and in background) """ - pass def stop_command(self, args) -> bool: """ @@ -309,8 +334,7 @@ def stop_command(self, args) -> bool: return True # First check if container is running and if yes, stop and remove it - if stop_container(server_container): - return True + return stop_container(server_container) def example_queries_command(self, args) -> bool: """ diff --git a/src/other_engines/engines/oxigraph.py b/src/other_engines/engines/oxigraph.py index c5d585ec..f453ac4f 100644 --- a/src/other_engines/engines/oxigraph.py +++ b/src/other_engines/engines/oxigraph.py @@ -17,10 +17,6 @@ def get_config_arguments(self, command: str) -> dict[str : list[str]]: config_args = super().get_config_arguments(command) if config_args is not None: return config_args - if command == "setup-config": - return {} - if command == "get-data": - return {"data": ["name", "get_data_cmd"], "index": ["input_files"]} if command == "index": return { "data": ["name", "format"], diff --git a/src/other_engines/main.py b/src/other_engines/main.py index 1c95931e..7409ad9d 100644 --- a/src/other_engines/main.py +++ b/src/other_engines/main.py @@ -29,7 +29,7 @@ def main(): raise Exception( f"Could not import module {module_path} " f"for engine {selected_engine}: {e}" - ) + ) from e engine_class = getattr(module, engine_class_name)() @@ -50,9 +50,9 @@ def main(): log.info("") log.info(colored(f"Command: {command}", attrs=["bold"])) log.info("") - commandWasSuccesful = getattr(engine_class, command)(args) + command_successful = getattr(engine_class, command)(args) log.info("") - if not commandWasSuccesful: + if not command_successful: exit(1) except KeyboardInterrupt: log.info("") diff --git a/src/qlever/util.py b/src/qlever/util.py index af3a25c6..3360f44c 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -50,7 +50,7 @@ def run_command( "shell": True, "text": True, "stdout": None if show_output else subprocess.PIPE, - "stderr": subprocess.PIPE, + "stderr": None if show_output else subprocess.PIPE, } # With `Popen`, the command runs in the current shell and a process object From 41cb4bd63eaab5464feaf7c0774dcdef1a593780 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 20 Feb 2025 01:09:55 +0100 Subject: [PATCH 10/11] Extracted script_name and configfile_name as class attributes --- src/other_engines/config.py | 12 ++++---- src/other_engines/engine.py | 40 +++++++++++++++------------ src/other_engines/engines/oxigraph.py | 14 +++++----- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/other_engines/config.py b/src/other_engines/config.py index e4b29e81..3d7a39c1 100644 --- a/src/other_engines/config.py +++ b/src/other_engines/config.py @@ -148,7 +148,7 @@ def argument_error(prefix: str): kwargs_copy["default"] = config_value kwargs_copy["required"] = False kwargs_copy["help"] += ( - f" [default, from {self.engine_name}file: " + f" [default, from {self.engine.configfile_name}: " f"{config_value}]" ) else: @@ -187,12 +187,12 @@ def parse_args(self): f"To enable autocompletion, run the following command, " f"and consider adding it to your `.bashrc` or `.zshrc`:" f"\n\n" - f'eval "$(register-python-argcomplete q{self.engine_name.lower()})"' + f'eval "$(register-python-argcomplete {self.engine.script_name})"' f" && export {self.engine_name.upper()}_ARGCOMPLETE_ENABLED=1" ) log.info("") - configfile_path = Path(f"{self.engine_name}file") + configfile_path = self.engine.configfile_path configfile_exists = configfile_path.is_file() if configfile_exists and not autocomplete_mode: @@ -201,7 +201,7 @@ def parse_args(self): except Exception as e: log.info("") log.error( - f"Error parsing {self.engine_name}file `{configfile_path}`" + f"Error parsing {self.engine.configfile_name} `{configfile_path}`" f": {e}" ) log.info("") @@ -211,7 +211,7 @@ def parse_args(self): parser = argparse.ArgumentParser( description=colored( - f"This is the q{self.engine_name.lower()} command line tool, " + f"This is the {self.engine.script_name} command line tool, " f"it's all you need to work with {self.engine_name} in a " f"{' or '.join(Containerize.supported_systems())} " "container environment", @@ -243,7 +243,7 @@ def parse_args(self): if not configfile_exists: log.warning( f"Invoking command `{args.command}` without a " - f"{self.engine_name}file. You have to specify all " + f"{self.engine.configfile_name}. You have to specify all " "required arguments on the command line. " "This is possible, but not recommended." ) diff --git a/src/other_engines/engine.py b/src/other_engines/engine.py index add98ace..4eb68e20 100644 --- a/src/other_engines/engine.py +++ b/src/other_engines/engine.py @@ -14,7 +14,7 @@ from qlever.commands.stop import stop_container from qlever.containerize import Containerize from qlever.log import log -from qlever.util import get_random_string, run_command +from qlever.util import run_command class SparqlEngine(ABC): @@ -31,10 +31,12 @@ class SparqlEngine(ABC): """ def __init__(self, engine_name: str) -> None: + self.script_name = f"q{engine_name.lower()}" + self.configfile_name = f"{engine_name}file" self.engine_name = engine_name self.commands = self.get_command_dict() self.configfiles_path = Path(__file__).parent / "Configfiles" - self.configfile_path = Path(f"{self.engine_name}file") + self.configfile_path = Path(self.configfile_name) def get_command_dict(self) -> dict[str, str]: """ @@ -51,7 +53,7 @@ def get_command_dict(self) -> dict[str, str]: docstring = inspect.getdoc(getattr(self.__class__, name)) or "" clean_docstring = re.sub(r"\s+", " ", docstring.strip()) clean_docstring = clean_docstring.replace( - "Configfile", f"{self.engine_name}file" + "Configfile", self.configfile_name ) command_name = name[: -len("_command")].replace("_", "-") command_dict[command_name] = clean_docstring @@ -79,6 +81,9 @@ def get_config_arguments(self, command: str) -> dict[str : list[str]]: Return the arguments relevant for the passed command. This must be a subset of the names of `all_arguments` defined in configfile.py. Only these arguments can then be used in the respective command method. + In the respective engine implementation classes, command-specific + config arguments can be overriden by simply calling this super function + and modifying or redefining the arguments as necessary. """ if command == "setup-config": return {} @@ -114,6 +119,9 @@ def additional_arguments(self, command: str, subparser) -> None: Add additional command-specific arguments (which are not in `configfile.all_arguments` and cannot be specified in the Configfile) to the given `subparser`. + In the respective engine implementation classes, command-specific + additional arguments can be extended by simply calling this super + function and adding arguments for more commands. """ configfile_names = [ p.name.split(".")[1] @@ -127,8 +135,8 @@ def additional_arguments(self, command: str, subparser) -> None: nargs="?", default="default", help=( - f"The name of the pre-configured {self.engine_name}" - "file to create [default = default]" + f"The name of the pre-configured {self.configfile_name} " + "to create [default = default]" ), ) if command == "log": @@ -187,7 +195,7 @@ def show(self, command_description: str, only_show: bool = False): log.info("") if only_show: log.info( - f'You called "q{self.engine_name.lower()} ... --show", ' + f'You called "{self.script_name} ... --show", ' "therefore the command is only shown, but not executed " '(omit the "--show" to execute it)' ) @@ -216,11 +224,7 @@ def setup_config_command(self, args) -> bool: configfile_path = ( self.configfiles_path / f"Configfile.{args.config_name}" ) - setup_config_cmd = ( - f"cat {configfile_path}" - f" | sed -E 's/(^ACCESS_TOKEN.*)/\\1_{get_random_string(12)}/'" - ) - setup_config_cmd += f"> {self.engine_name}file" + setup_config_cmd = f"cat {configfile_path} > {self.configfile_name}" self.show(setup_config_cmd, only_show=args.show) if args.show: return True @@ -228,13 +232,13 @@ def setup_config_command(self, args) -> bool: # If there is already a Configfile in the current directory, exit. if self.configfile_path.is_file(): log.error( - f"`{self.engine_name}file` already exists in current directory" + f"`{self.configfile_name}` already exists in current directory" ) log.info("") log.info( - f"If you want to create a new {self.engine_name}file using " - f"`q{self.engine_name.lower()} setup-config`, " - f"delete the existing {self.engine_name}file first" + f"If you want to create a new {self.configfile_name} using " + f"`{self.script_name} setup-config`, " + f"delete the existing {self.configfile_name} first" ) return False @@ -255,7 +259,7 @@ def setup_config_command(self, args) -> bool: # If we get here, everything went well. log.info( - f'Created {self.engine_name}file for config "{args.config_name}"' + f'Created {self.configfile_name} for config "{args.config_name}"' f" in current directory" ) return True @@ -294,8 +298,8 @@ def log_command(self, args) -> bool: if log_cmd is None: log.info( f"No running index or start {system} container found!" - f"Are you sure you called `q{self.engine_name.lower()} index` " - f"or `q{self.engine_name.lower()} start` " + f"Are you sure you called `{self.script_name} index` " + f"or `{self.script_name} start` " "and have a process running?" ) return False diff --git a/src/other_engines/engines/oxigraph.py b/src/other_engines/engines/oxigraph.py index f453ac4f..c7f13ef8 100644 --- a/src/other_engines/engines/oxigraph.py +++ b/src/other_engines/engines/oxigraph.py @@ -75,8 +75,8 @@ def index_command(self, args) -> bool: log.error(f'No file matching "{pattern}" found') log.info("") log.info( - "Did you call `qoxigraph get-data`? If you did, check " - "GET_DATA_CMD and INPUT_FILES in the Oxigraphfile" + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Oxigraphfile" ) return False @@ -104,7 +104,7 @@ def index_command(self, args) -> bool: def start_command(self, args) -> bool: """ Start the server for Oxigraph (requires that you have built an index - with `qoxigraph index` before) (Runs in a container and in background) + before) (Runs in a container and in background) """ system = args.system dataset = args.name @@ -116,7 +116,7 @@ def start_command(self, args) -> bool: log.info( f"{system} container {index_container} is still up, " "which means that data loading is in progress. Please wait...\n" - f"Check status of {index_container} with `qoxigraph log`" + f"Check status of {index_container} with `{self.script_name} log`" ) return False @@ -124,7 +124,7 @@ def start_command(self, args) -> bool: log.info( f"{system} container {server_container} exists, " f"which means that server for {dataset} is already running. \n" - f"Stop the container {server_container} with `qoxigraph stop` " + f"Stop the container {server_container} with `{self.script_name} stop` " "first before starting a new one." ) return False @@ -133,8 +133,8 @@ def start_command(self, args) -> bool: if len([p.name for p in Path.cwd().glob("*.sst")]) == 0: log.info( f"No Oxigraph index files for {dataset} found! " - "Did you call `qoxigraph index`? If you did, check if .sst " - "index files are present in current working directory." + f"Did you call `{self.script_name} index`? If you did, check " + "if .sst index files are present in current working directory." ) return False From 03433c38fc99985f8c3332a6a97aa3d807df98a4 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Thu, 20 Feb 2025 19:10:16 +0100 Subject: [PATCH 11/11] Add `--get` option to `qlever query` command --- src/qlever/commands/query.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/qlever/commands/query.py b/src/qlever/commands/query.py index d31c5ab1..93d98f31 100644 --- a/src/qlever/commands/query.py +++ b/src/qlever/commands/query.py @@ -76,6 +76,12 @@ def additional_arguments(self, subparser) -> None: default="text/tab-separated-values", help="Accept header for the SPARQL query", ) + subparser.add_argument( + "--get", + action="store_true", + default=False, + help="Use GET request instead of POST", + ) subparser.add_argument( "--no-time", action="store_true", @@ -111,8 +117,9 @@ def execute(self, args) -> bool: if args.sparql_endpoint else f"localhost:{args.port}" ) + curl_args = "-Gs" if args.get else "-s" curl_cmd = ( - f"curl -s {sparql_endpoint}" + f"curl {curl_args} {sparql_endpoint}" f' -H "Accept: {args.accept}"' f" --data-urlencode query={shlex.quote(args.query)}" f"{curl_cmd_additions}"