diff --git a/pyproject.toml b/pyproject.toml index 87238c2c..6c65e507 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,10 +28,11 @@ Github = "https://github.com/ad-freiburg/qlever" [project.scripts] "qlever" = "qlever.qlever_main:main" "qlever-old" = "qlever.qlever_old:main" +"qoxigraph" = "other_engines.main:main" [tool.setuptools] license-files = ["LICENSE"] -package-data = { "qlever" = ["Qleverfiles/*"] } +package-data = { "qlever" = ["Qleverfiles/*"], "qoxigraph" = ["Configfiles/*"] } [tool.pytest.ini_options] pythonpath = ["src"] diff --git a/src/other_engines/Configfiles/Configfile.dblp b/src/other_engines/Configfiles/Configfile.dblp new file mode 100644 index 00000000..bdf40982 --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.dblp @@ -0,0 +1,20 @@ +# Configfile for DBLP, use with https://github.com/ad-freiburg/qlever-control + +[data] +NAME = dblp +DATA_TARFILE = dblp_KG_with_associated_data.tar +GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE} +GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE} +VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE") +DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL} (version ${VERSION}) +FORMAT = ttl + +[index] +INPUT_FILES = *.gz + +[server] +PORT = 7015 + +[runtime] +SYSTEM = docker + diff --git a/src/other_engines/Configfiles/Configfile.default b/src/other_engines/Configfiles/Configfile.default new file mode 100644 index 00000000..2e622560 --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.default @@ -0,0 +1,29 @@ +# Default Configfile, use with https://github.com/ad-freiburg/qlever-control +# +# If you have never seen a Configfile before, we recommend that you first look +# at the example Configfiles on http://qlever.cs.uni-freiburg.de/qlever-control/ +# src/other_engines/Configfiles . Or execute `q{engine_name} setup-config ` on the +# command line to obtain the example Configfiles for . + +# As a minimum, each dataset needs a name. If you want `q{engine_name} get-data` to do +# something meaningful, you need to define GET_DATA_CMD. Otherwise, you need to +# generate (or download or copy from somewhere) the input files yourself. Each +# dataset should have a short DESCRIPTION, ideally with a date. +[data] +NAME = +GET_DATA_CMD = +DESCRIPTION = + +# The format for INPUT_FILES should be such that `ls ${INPUT_FILES}` lists all +# input files. +[index] +INPUT_FILES = *.ttl + +# The server listens on PORT. +[server] +PORT = 8888 + +# Use SYSTEM = docker to run inside a docker container +[runtime] +SYSTEM = docker + diff --git a/src/other_engines/Configfiles/Configfile.imdb b/src/other_engines/Configfiles/Configfile.imdb new file mode 100644 index 00000000..acb1ae39 --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.imdb @@ -0,0 +1,20 @@ +# Configfile for IMDB, use with https://github.com/ad-freiburg/qlever-control + +[data] +NAME = imdb +IMDB_DATA_URL = https://datasets.imdbws.com +GET_PREFIXES = echo "@prefix imdb: ." +GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE} +GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE} +GET_DATA_CMD = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl +DESCRIPTION = RDF data derived from ${IMDB_DATA_URL} + +[index] +INPUT_FILES = ${data:NAME}.ttl + +[server] +PORT = 7029 + +[runtime] +SYSTEM = docker + diff --git a/src/other_engines/Configfiles/Configfile.olympics b/src/other_engines/Configfiles/Configfile.olympics new file mode 100644 index 00000000..8b80d06f --- /dev/null +++ b/src/other_engines/Configfiles/Configfile.olympics @@ -0,0 +1,16 @@ +# Configfile for Olympics, use with https://github.com/ad-freiburg/qlever-control + +[data] +NAME = olympics +BASE_URL = https://github.com/wallscope/olympics-rdf +GET_DATA_CMD = curl -sLo olympics.zip -C - ${BASE_URL}/raw/master/data/olympics-nt-nodup.zip && unzip -q -o olympics.zip && rm olympics.zip +DESCRIPTION = 120 Years of Olympics, data from ${BASE_URL} + +[index] +INPUT_FILES = olympics.nt + +[server] +PORT = 7887 + +[runtime] +SYSTEM = docker diff --git a/src/other_engines/__init__.py b/src/other_engines/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/other_engines/config.py b/src/other_engines/config.py new file mode 100644 index 00000000..3d7a39c1 --- /dev/null +++ b/src/other_engines/config.py @@ -0,0 +1,251 @@ +from __future__ import annotations + +import argparse +import os +from pathlib import Path + +import argcomplete +from termcolor import colored + +from other_engines.engine import SparqlEngine +from qlever.containerize import Containerize +from qlever.log import log, log_levels +from qlever.qleverfile import Qleverfile + + +def all_arguments(): + """ + Take all existing arguments from Qleverfile + and add/replace the ones that are new/different from QLever + """ + + def arg(*args, **kwargs): + return (args, kwargs) + + all_args = Qleverfile.all_arguments() + all_args["runtime"]["index_cmd"] = arg( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the index command in the foreground " + "(default: run in the background with `docker run -d`)" + ), + ) + all_args["runtime"]["start_cmd"] = arg( + "--run-in-foreground", + action="store_true", + default=False, + help=( + "Run the server in the foreground " + "(default: run in the background with `docker run -d`)" + ), + ) + all_args["runtime"]["system"] = arg( + "--system", + type=str, + choices=Containerize.supported_systems(), + default="docker", + help=( + "Which system to use to run commands like `index` " + "or `start` in a container" + ), + ) + all_args["runtime"]["index_container"] = arg( + "--index-container", + type=str, + help="The name of the container used by the index command", + ) + all_args["runtime"]["server_container"] = arg( + "--server-container", + type=str, + help="The name of the container used by the start command", + ) + return all_args + + +class ArgumentsManager: + SPECIAL_ARGS = ["image", "index_container", "server_container"] + + def __init__(self, engine: SparqlEngine) -> None: + self.engine = engine + self.engine_name = engine.engine_name + self.commands = engine.commands + + def get_default_config_value(self, arg_name: str, config): + """ + Get default values for SPECIAL_ARGS + """ + name = config["data"]["name"] + if arg_name == "image": + return self.engine.image + if arg_name == "index_container": + return f"{self.engine_name.lower()}.index.{name}" + if arg_name == "server_container": + return f"{self.engine_name.lower()}.server.{name}" + return None + + def add_subparser_for_command( + self, + subparsers, + command_name: str, + description: str, + config=None, + ) -> None: + """ + Add subparser for the given command. Take the arguments from + `self.engine.get_config_arguments()` and report an error if + one of them is not contained in `all_arguments`. Overwrite the + default values with the values from `config` if specified. + """ + + arg_names = self.engine.get_config_arguments(command_name) + all_configfile_args = all_arguments() + + def argument_error(prefix: str): + log.info("") + log.error( + f"{prefix} in `other_engines.configfile.all_arguments()` " + f"for command `{command_name}`" + ) + log.info("") + log.info( + f"Value of `get_config_arguments_for_command` " + f"`{command_name}`:" + ) + log.info("") + log.info(f"{arg_names}") + log.info("") + exit(1) + + # Add the subparser. + subparser = subparsers.add_parser( + command_name, description=description, help=description + ) + + # Add the arguments relevant for the command. + for section in arg_names: + if section not in all_configfile_args: + argument_error(f"Section `{section}` not found") + for arg_name in arg_names[section]: + if arg_name not in all_configfile_args[section]: + argument_error( + f"Argument `{arg_name}` of section " + f"`{section}` not found" + ) + args, kwargs = all_configfile_args[section][arg_name] + kwargs_copy = kwargs.copy() + # If `configfile_config` is given, add info about default + # values to the help string. + if config is not None: + default_value = kwargs.get("default", None) + config_value = ( + config.get(section, arg_name, fallback=None) + if arg_name not in self.SPECIAL_ARGS + else self.get_default_config_value(arg_name, config) + ) + if config_value is not None: + kwargs_copy["default"] = config_value + kwargs_copy["required"] = False + kwargs_copy["help"] += ( + f" [default, from {self.engine.configfile_name}: " + f"{config_value}]" + ) + else: + kwargs_copy["help"] += f" [default: {default_value}]" + subparser.add_argument(*args, **kwargs_copy) + + # Additional arguments that are shared by all commands. + self.engine.additional_arguments(command_name, subparser) + subparser.add_argument( + "--show", + action="store_true", + default=False, + help="Only show what would be executed, but don't execute it", + ) + subparser.add_argument( + "--log-level", + choices=log_levels.keys(), + default="INFO", + help="Set the log level", + ) + + def parse_args(self): + # Determine whether we are in autocomplete mode or not. + autocomplete_mode = "COMP_LINE" in os.environ + + # Check if the user has registered this script for argcomplete. + argcomplete_check_off = os.environ.get( + f"{self.engine_name.upper()}_ARGCOMPLETE_CHECK_OFF" + ) + argcomplete_enabled = os.environ.get( + f"{self.engine_name.upper()}_ARGCOMPLETE_ENABLED" + ) + if not argcomplete_enabled and not argcomplete_check_off: + log.info("") + log.warning( + f"To enable autocompletion, run the following command, " + f"and consider adding it to your `.bashrc` or `.zshrc`:" + f"\n\n" + f'eval "$(register-python-argcomplete {self.engine.script_name})"' + f" && export {self.engine_name.upper()}_ARGCOMPLETE_ENABLED=1" + ) + log.info("") + + configfile_path = self.engine.configfile_path + configfile_exists = configfile_path.is_file() + + if configfile_exists and not autocomplete_mode: + try: + config = Qleverfile.read_qleverfile(configfile_path) + except Exception as e: + log.info("") + log.error( + f"Error parsing {self.engine.configfile_name} `{configfile_path}`" + f": {e}" + ) + log.info("") + exit(1) + else: + config = None + + parser = argparse.ArgumentParser( + description=colored( + f"This is the {self.engine.script_name} command line tool, " + f"it's all you need to work with {self.engine_name} in a " + f"{' or '.join(Containerize.supported_systems())} " + "container environment", + attrs=["bold"], + ) + ) + subparsers = parser.add_subparsers(dest="command") + subparsers.required = True + for command_name, description in self.commands.items(): + self.add_subparser_for_command( + subparsers=subparsers, + command_name=command_name, + description=description, + config=config, + ) + + argcomplete.autocomplete(parser, always_complete_options="long") + + # If called without arguments, show the help message. + if len(os.sys.argv) == 1: + parser.print_help() + exit(0) + + args = parser.parse_args() + + # If the command says that we should have a Qleverfile, but we don't, + # issue a warning. + if self.engine.command_should_have_configfile(args.command): + if not configfile_exists: + log.warning( + f"Invoking command `{args.command}` without a " + f"{self.engine.configfile_name}. You have to specify all " + "required arguments on the command line. " + "This is possible, but not recommended." + ) + + return args diff --git a/src/other_engines/engine.py b/src/other_engines/engine.py new file mode 100644 index 00000000..4eb68e20 --- /dev/null +++ b/src/other_engines/engine.py @@ -0,0 +1,355 @@ +from __future__ import annotations + +import inspect +import re +import subprocess +from abc import ABC, abstractmethod +from pathlib import Path + +from termcolor import colored + +from qlever.commands.example_queries import ExampleQueriesCommand +from qlever.commands.get_data import GetDataCommand +from qlever.commands.query import QueryCommand +from qlever.commands.stop import stop_container +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class SparqlEngine(ABC): + """ + A base class for SparqlEngine != QLever + The class holds engine_name and a dict {command_name: command_description} + The command names and description are automatically taken from functions + that end with "_command" and their docstring. + !! Make sure to have trailing __command only for functions that represent + a command being executed. + Most common functions that are shared between different engines are + implemented here, but can be overriden in child classes. + Only container based setup is supported for now! + """ + + def __init__(self, engine_name: str) -> None: + self.script_name = f"q{engine_name.lower()}" + self.configfile_name = f"{engine_name}file" + self.engine_name = engine_name + self.commands = self.get_command_dict() + self.configfiles_path = Path(__file__).parent / "Configfiles" + self.configfile_path = Path(self.configfile_name) + + def get_command_dict(self) -> dict[str, str]: + """ + Get a dictionary for all commands supported by this Engine + {command_name: command_description} + Command name is taken from command functions without the "_command" + Command description is taken from the command function docstring. + """ + command_dict = {} + for name, _ in inspect.getmembers( + self.__class__, predicate=inspect.isfunction + ): + if name.endswith("_command"): + docstring = inspect.getdoc(getattr(self.__class__, name)) or "" + clean_docstring = re.sub(r"\s+", " ", docstring.strip()) + clean_docstring = clean_docstring.replace( + "Configfile", self.configfile_name + ) + command_name = name[: -len("_command")].replace("_", "-") + command_dict[command_name] = clean_docstring + return command_dict + + def command_should_have_configfile(self, command: str) -> bool: + """ + Return `True` if the command should have a Configfile, `False` + otherwise. If a command should have a Configfile, but none is + specified, the command can still be executed if all the required + arguments are specified on the command line, but there will be warning. + """ + cmds_that_need_configfile = [ + "get-data", + "index", + "start", + "stop", + "log", + ] + return command in cmds_that_need_configfile + + @abstractmethod + def get_config_arguments(self, command: str) -> dict[str : list[str]]: + """ + Return the arguments relevant for the passed command. This must be a + subset of the names of `all_arguments` defined in configfile.py. + Only these arguments can then be used in the respective command method. + In the respective engine implementation classes, command-specific + config arguments can be overriden by simply calling this super function + and modifying or redefining the arguments as necessary. + """ + if command == "setup-config": + return {} + + if command == "get-data": + return {"data": ["name", "get_data_cmd"], "index": ["input_files"]} + + if command == "log": + return { + "data": ["name"], + "runtime": [ + "system", + "image", + "server_container", + "index_container", + ], + } + + if command == "stop": + return { + "data": ["name"], + "server": ["port"], + "runtime": ["server_container"], + } + + if command in ("example-queries", "query"): + return {"server": ["port"]} + + return None + + def additional_arguments(self, command: str, subparser) -> None: + """ + Add additional command-specific arguments (which are not in + `configfile.all_arguments` and cannot be specified in the Configfile) + to the given `subparser`. + In the respective engine implementation classes, command-specific + additional arguments can be extended by simply calling this super + function and adding arguments for more commands. + """ + configfile_names = [ + p.name.split(".")[1] + for p in self.configfiles_path.glob("Configfile.*") + ] + if command == "setup-config": + subparser.add_argument( + "config_name", + type=str, + choices=configfile_names, + nargs="?", + default="default", + help=( + f"The name of the pre-configured {self.configfile_name} " + "to create [default = default]" + ), + ) + if command == "log": + subparser.add_argument( + "--tail-num-lines", + type=int, + default=20, + help=( + "Show this many of the last lines of the log " + "file [default = 20]" + ), + ) + subparser.add_argument( + "--from-beginning", + action="store_true", + default=False, + help="Show all lines of the log file [default = False]", + ) + subparser.add_argument( + "--no-follow", + action="store_true", + default=False, + help="Don't follow the log file [default = False]", + ) + if command == "example-queries": + subparser.add_argument( + "--ui_config", + type=str, + choices=configfile_names, + nargs="?", + default="default", + help=( + "The name of the pre-configured QLever ui_config " + "to use to get example queries [default = default]" + ), + ) + ExampleQueriesCommand().additional_arguments(subparser) + if command == "query": + subparser.add_argument( + "--access-token", + type=str, + help=( + "QLever access_token to send privileged commands " + "to the server" + ), + ) + QueryCommand().additional_arguments(subparser) + + def show(self, command_description: str, only_show: bool = False): + """ + Helper function that shows the command line or description of an + action, together with an explanation. + """ + + log.info(colored(command_description, "blue")) + log.info("") + if only_show: + log.info( + f'You called "{self.script_name} ... --show", ' + "therefore the command is only shown, but not executed " + '(omit the "--show" to execute it)' + ) + + @staticmethod + def show_container_logs(log_cmd: str, active_ps: str) -> None: + """ + Execute a container logs command and show the output for a given + active process active_ps + """ + log.info( + f"Showing logs for {active_ps} command. Press Ctrl-C to stop " + f"following (will not stop the {active_ps} process)" + ) + + try: + run_command(log_cmd, show_output=True) + except Exception as e: + log.error(f"Cannot display container logs - {e}") + + def setup_config_command(self, args) -> bool: + """ + Get a pre-configured Configfile for the given engine and config_name + """ + # Construct the command line and show it. + configfile_path = ( + self.configfiles_path / f"Configfile.{args.config_name}" + ) + setup_config_cmd = f"cat {configfile_path} > {self.configfile_name}" + self.show(setup_config_cmd, only_show=args.show) + if args.show: + return True + + # If there is already a Configfile in the current directory, exit. + if self.configfile_path.is_file(): + log.error( + f"`{self.configfile_name}` already exists in current directory" + ) + log.info("") + log.info( + f"If you want to create a new {self.configfile_name} using " + f"`{self.script_name} setup-config`, " + f"delete the existing {self.configfile_name} first" + ) + return False + + # Copy the Configfile to the current directory. + try: + subprocess.run( + setup_config_cmd, + shell=True, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + ) + except Exception as e: + log.error( + f'Could not copy "{configfile_path}" to current directory: {e}' + ) + return False + + # If we get here, everything went well. + log.info( + f'Created {self.configfile_name} for config "{args.config_name}"' + f" in current directory" + ) + return True + + def get_data_command(self, args) -> bool: + """ + Get data using the GET_DATA_CMD in the Configfile + """ + GetDataCommand.show = self.show + return GetDataCommand().execute(args) + + def log_command(self, args) -> bool: + """ + Show the last lines of the index/server container log and follow it + """ + system = args.system + index_container = args.index_container + server_container = args.server_container + + log_cmd = f"{system} logs " + + if not args.from_beginning: + log_cmd += f"-n {args.tail_num_lines} " + if not args.no_follow: + log_cmd += "-f " + + if Containerize().is_running(system, index_container): + log_cmd += index_container + active_ps = "index" + elif Containerize().is_running(system, server_container): + log_cmd += server_container + active_ps = "start" + else: + log_cmd = None + + if log_cmd is None: + log.info( + f"No running index or start {system} container found!" + f"Are you sure you called `{self.script_name} index` " + f"or `{self.script_name} start` " + "and have a process running?" + ) + return False + + # Show the command line. + self.show(log_cmd, only_show=args.show) + if args.show: + return True + + self.show_container_logs(log_cmd, active_ps) + return True + + @abstractmethod + def index_command(self, args) -> bool: + """ + Build the index for a given RDF dataset + (Runs in a container and in background) + """ + + @abstractmethod + def start_command(self, args) -> bool: + """ + Start the server for given Engine + (Runs in a container and in background) + """ + + def stop_command(self, args) -> bool: + """ + Stop the server by stopping and removing the server container + """ + server_container = args.server_container + + description = f"Checking for container with name {server_container}" + self.show(description, only_show=args.show) + if args.show: + return True + + # First check if container is running and if yes, stop and remove it + return stop_container(server_container) + + def example_queries_command(self, args) -> bool: + """ + Execute queries against a SPARQL endpoint and get runtime information + """ + ExampleQueriesCommand.show = self.show + return ExampleQueriesCommand().execute(args) + + def query_command(self, args) -> bool: + """ + Send a query to a SPARQL endpoint + """ + QueryCommand.show = self.show + return QueryCommand().execute(args) diff --git a/src/other_engines/engines/__init__.py b/src/other_engines/engines/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/other_engines/engines/oxigraph.py b/src/other_engines/engines/oxigraph.py new file mode 100644 index 00000000..c7f13ef8 --- /dev/null +++ b/src/other_engines/engines/oxigraph.py @@ -0,0 +1,178 @@ +import glob +import shlex +from pathlib import Path + +from other_engines.engine import SparqlEngine +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import run_command + + +class Oxigraph(SparqlEngine): + def __init__(self) -> None: + super().__init__(engine_name="Oxigraph") + self.image = "ghcr.io/oxigraph/oxigraph" + + def get_config_arguments(self, command: str) -> dict[str : list[str]]: + config_args = super().get_config_arguments(command) + if config_args is not None: + return config_args + if command == "index": + return { + "data": ["name", "format"], + "index": [ + "input_files", + ], + "runtime": ["system", "image", "index_container", "index_cmd"], + } + if command == "start": + return { + "data": ["name", "description"], + "server": [ + "host_name", + "port", + ], + "runtime": [ + "system", + "image", + "server_container", + "index_container", + "start_cmd", + ], + } + raise ValueError( + f"Couldn't fetch relevant Configfile arguments for {command}. " + f"The command must be one of {self.commands.keys()}" + ) + + def index_command(self, args) -> bool: + # Run the command in a container (if so desired). + system = args.system + input_files = args.input_files + index_container = args.index_container + run_subcommand = "run --rm" + if not args.run_in_foreground: + run_subcommand += " -d" + index_cmd = f"load --location /index --file /index/{input_files}" + index_cmd = Containerize().containerize_command( + cmd=index_cmd, + container_system=system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=index_container, + volumes=[("$(pwd)", "/index")], + use_bash=False, + ) + + # Show the command line. + self.show(index_cmd, only_show=args.show) + if args.show: + return True + + # Check if all of the input files exist. + for pattern in shlex.split(input_files): + if len(glob.glob(pattern)) == 0: + log.error(f'No file matching "{pattern}" found') + log.info("") + log.info( + f"Did you call `{self.script_name} get-data`? If you did, " + "check GET_DATA_CMD and INPUT_FILES in the Oxigraphfile" + ) + return False + + if len([p.name for p in Path.cwd().glob("*.sst")]) != 0: + log.error( + "Index files (*.sst) found in current directory " + "which shows presence of a previous index" + ) + log.info("") + log.info("Aborting the index operation...") + return False + + # Run the index command. + try: + run_command(index_cmd, show_output=True) + if not args.run_in_foreground: + log_cmd = f"{system} logs -f {index_container}" + self.show_container_logs(log_cmd, "index") + except Exception as e: + log.error(f"Building the index failed: {e}") + return False + + return True + + def start_command(self, args) -> bool: + """ + Start the server for Oxigraph (requires that you have built an index + before) (Runs in a container and in background) + """ + system = args.system + dataset = args.name + + # Check if index and server container still running + index_container = args.index_container + server_container = args.server_container + if Containerize().is_running(system, index_container): + log.info( + f"{system} container {index_container} is still up, " + "which means that data loading is in progress. Please wait...\n" + f"Check status of {index_container} with `{self.script_name} log`" + ) + return False + + if Containerize().is_running(system, server_container): + log.info( + f"{system} container {server_container} exists, " + f"which means that server for {dataset} is already running. \n" + f"Stop the container {server_container} with `{self.script_name} stop` " + "first before starting a new one." + ) + return False + + # Check if index files (*.sst) present in cwd + if len([p.name for p in Path.cwd().glob("*.sst")]) == 0: + log.info( + f"No Oxigraph index files for {dataset} found! " + f"Did you call `{self.script_name} index`? If you did, check " + "if .sst index files are present in current working directory." + ) + return False + + port = int(args.port) + run_subcommand = "run --restart=unless-stopped" + if not args.run_in_foreground: + run_subcommand += " -d" + start_cmd = "serve-read-only --location /index --bind=0.0.0.0:7878" + start_cmd = Containerize().containerize_command( + cmd=start_cmd, + container_system=system, + run_subcommand=run_subcommand, + image_name=args.image, + container_name=server_container, + volumes=[("$(pwd)", "/index")], + ports=[(port, 7878)], + use_bash=False, + ) + + # Show the command line. + self.show(start_cmd, only_show=args.show) + if args.show: + return True + + # Run the start command. + try: + run_command(start_cmd, show_output=True) + log.info( + f"Oxigraph server webapp for {dataset} will be available at " + f"http://{args.host_name}:{port} and the sparql endpoint for " + f"queries is http://{args.host_name}:{port}/query" + ) + log.info("") + if not args.run_in_foreground: + log_cmd = f"{system} logs -f {server_container}" + self.show_container_logs(log_cmd, "server") + except Exception as e: + log.error(f"Starting the Oxigraph server failed: {e}") + return False + + return True diff --git a/src/other_engines/main.py b/src/other_engines/main.py new file mode 100644 index 00000000..7409ad9d --- /dev/null +++ b/src/other_engines/main.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# PYTHON_ARGCOMPLETE_OK + +# Copyright 2024, University of Freiburg, +# Chair of Algorithms and Data Structures +# Author: Tanmay Garg + +from __future__ import annotations + +import sys +import traceback +from importlib import import_module +from pathlib import Path + +from termcolor import colored + +from other_engines.config import ArgumentsManager +from qlever.config import ConfigException +from qlever.log import log, log_levels + + +def main(): + selected_engine = Path(sys.argv[0]).stem[1:] + engine_class_name = selected_engine.capitalize() + module_path = f"other_engines.engines.{selected_engine}" + try: + module = import_module(module_path) + except ImportError as e: + raise Exception( + f"Could not import module {module_path} " + f"for engine {selected_engine}: {e}" + ) from e + + engine_class = getattr(module, engine_class_name)() + + # Parse the command line arguments and read the Configfile + try: + engine_config = ArgumentsManager(engine=engine_class) + args = engine_config.parse_args() + except ConfigException as e: + log.error(e) + log.info("") + log.info(traceback.format_exc()) + exit(1) + + # Execute the command. + command = f"{args.command.replace('-', '_')}_command" + log.setLevel(log_levels[args.log_level]) + try: + log.info("") + log.info(colored(f"Command: {command}", attrs=["bold"])) + log.info("") + command_successful = getattr(engine_class, command)(args) + log.info("") + if not command_successful: + exit(1) + except KeyboardInterrupt: + log.info("") + log.info("Ctrl-C pressed, exiting ...") + log.info("") + exit(1) + except Exception as e: + log.error(f"An unexpected error occurred: {e}") + log.info("") + log.info(traceback.format_exc()) + exit(1) diff --git a/src/qlever/commands/query.py b/src/qlever/commands/query.py index d31c5ab1..93d98f31 100644 --- a/src/qlever/commands/query.py +++ b/src/qlever/commands/query.py @@ -76,6 +76,12 @@ def additional_arguments(self, subparser) -> None: default="text/tab-separated-values", help="Accept header for the SPARQL query", ) + subparser.add_argument( + "--get", + action="store_true", + default=False, + help="Use GET request instead of POST", + ) subparser.add_argument( "--no-time", action="store_true", @@ -111,8 +117,9 @@ def execute(self, args) -> bool: if args.sparql_endpoint else f"localhost:{args.port}" ) + curl_args = "-Gs" if args.get else "-s" curl_cmd = ( - f"curl -s {sparql_endpoint}" + f"curl {curl_args} {sparql_endpoint}" f' -H "Accept: {args.accept}"' f" --data-urlencode query={shlex.quote(args.query)}" f"{curl_cmd_additions}" diff --git a/src/qlever/containerize.py b/src/qlever/containerize.py index 42a8f13e..43ec47d2 100644 --- a/src/qlever/containerize.py +++ b/src/qlever/containerize.py @@ -40,6 +40,7 @@ def containerize_command( volumes: list[tuple[str, str]] = [], ports: list[tuple[int, int]] = [], working_directory: Optional[str] = None, + use_bash: bool = True, ) -> str: """ Get the command to run `cmd` with the given `container_system` and the @@ -80,11 +81,17 @@ def containerize_command( f"{volume_options}" f"{port_options}" f"{working_directory_option}" + f" --name {container_name}" f" --init" - f" --entrypoint bash" - f" --name {container_name} {image_name}" - f" -c {shlex.quote(cmd)}" ) + if use_bash: + containerized_cmd += ( + f" --entrypoint bash {image_name} -c {shlex.quote(cmd)}" + ) + else: + containerized_cmd += ( + f" {image_name} {cmd}" + ) return containerized_cmd @staticmethod @@ -92,7 +99,8 @@ def is_running(container_system: str, container_name: str) -> bool: # Note: the `{{{{` and `}}}}` result in `{{` and `}}`, respectively. containers = ( run_command( - f'{container_system} ps --format="{{{{.Names}}}}"', return_output=True + f'{container_system} ps --format="{{{{.Names}}}}"', + return_output=True, ) .strip() .splitlines() @@ -100,7 +108,9 @@ def is_running(container_system: str, container_name: str) -> bool: return container_name in containers @staticmethod - def stop_and_remove_container(container_system: str, container_name: str) -> bool: + def stop_and_remove_container( + container_system: str, container_name: str + ) -> bool: """ Stop the container with the given name using the given system. Return `True` if a container with that name was found and stopped, `False` diff --git a/src/qlever/qleverfile.py b/src/qlever/qleverfile.py index a269a889..3ff20668 100644 --- a/src/qlever/qleverfile.py +++ b/src/qlever/qleverfile.py @@ -336,17 +336,10 @@ def arg(*args, **kwargs): return all_args @staticmethod - def read(qleverfile_path): + def read_qleverfile(qleverfile_path): """ - Read the given Qleverfile (the function assumes that it exists) and - return a `ConfigParser` object with all the options and their values. - - NOTE: The keys have the same hierarchical structure as the keys in - `all_arguments()`. The Qleverfile may contain options that are not - defined in `all_arguments()`. They can be used as temporary variables - to define other options, but cannot be accessed by the commands later. + Read the Qleverfile using ConfigParser and return cleaned config """ - # Read the Qleverfile. defaults = {"random": "83724324hztz", "version": "01.01.01"} config = ConfigParser( @@ -383,6 +376,22 @@ def read(qleverfile_path): exit(1) config[section][option] = value + return config + + @staticmethod + def read(qleverfile_path): + """ + Read the given Qleverfile (the function assumes that it exists) and + return a `ConfigParser` object with all the options and their values. + + NOTE: The keys have the same hierarchical structure as the keys in + `all_arguments()`. The Qleverfile may contain options that are not + defined in `all_arguments()`. They can be used as temporary variables + to define other options, but cannot be accessed by the commands later. + """ + + config = Qleverfile.read_qleverfile(qleverfile_path) + # Make sure that all the sections are there. for section in ["data", "index", "server", "runtime", "ui"]: if section not in config: diff --git a/src/qlever/util.py b/src/qlever/util.py index af3a25c6..3360f44c 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -50,7 +50,7 @@ def run_command( "shell": True, "text": True, "stdout": None if show_output else subprocess.PIPE, - "stderr": subprocess.PIPE, + "stderr": None if show_output else subprocess.PIPE, } # With `Popen`, the command runs in the current shell and a process object