ad-freiburg · tanmay-9 · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,10 +28,11 @@ Github = "https://github.com/ad-freiburg/qlever"
 [project.scripts]
 "qlever" = "qlever.qlever_main:main"
 "qlever-old" = "qlever.qlever_old:main"
+"qoxigraph" = "other_engines.main:main"
 
 [tool.setuptools]
 license-files = ["LICENSE"]
-package-data = { "qlever" = ["Qleverfiles/*"] }
+package-data = { "qlever" = ["Qleverfiles/*"], "qoxigraph" = ["Configfiles/*"] }
 
 [tool.pytest.ini_options]
 pythonpath = ["src"]

diff --git a/src/other_engines/Configfiles/Configfile.dblp b/src/other_engines/Configfiles/Configfile.dblp
@@ -0,0 +1,20 @@
+# Configfile for DBLP, use with https://github.com/ad-freiburg/qlever-control
+
+[data]
+NAME = dblp
+DATA_TARFILE = dblp_KG_with_associated_data.tar
+GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE}
+GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE}
+VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
+DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL} (version ${VERSION})
+FORMAT = ttl
+
+[index]
+INPUT_FILES = *.gz
+
+[server]
+PORT = 7015
+
+[runtime]
+SYSTEM = docker
+
diff --git a/src/other_engines/Configfiles/Configfile.default b/src/other_engines/Configfiles/Configfile.default
@@ -0,0 +1,29 @@
+# Default Configfile, use with https://github.com/ad-freiburg/qlever-control
+#
+# If you have never seen a Configfile before, we recommend that you first look
+# at the example Configfiles on http://qlever.cs.uni-freiburg.de/qlever-control/
+# src/other_engines/Configfiles . Or execute `q{engine_name} setup-config <dataset>` on the
+# command line to obtain the example Configfiles for <dataset>.
+
+# As a minimum, each dataset needs a name. If you want `q{engine_name} get-data` to do
+# something meaningful, you need to define GET_DATA_CMD. Otherwise, you need to 
+# generate (or download or copy from somewhere) the input files yourself. Each
+# dataset should have a short DESCRIPTION, ideally with a date.
+[data]
+NAME         =
+GET_DATA_CMD =
+DESCRIPTION  = 
+
+# The format for INPUT_FILES should be such that `ls ${INPUT_FILES}` lists all
+# input files. 
+[index]
+INPUT_FILES     = *.ttl
+
+# The server listens on PORT. 
+[server]
+PORT         = 8888
+
+# Use SYSTEM = docker to run inside a docker container
+[runtime]
+SYSTEM = docker
+
diff --git a/src/other_engines/Configfiles/Configfile.imdb b/src/other_engines/Configfiles/Configfile.imdb
@@ -0,0 +1,20 @@
+# Configfile for IMDB, use with https://github.com/ad-freiburg/qlever-control
+
+[data]
+NAME             = imdb
+IMDB_DATA_URL    = https://datasets.imdbws.com
+GET_PREFIXES     = echo "@prefix imdb: <https://www.imdb.com/> ."
+GET_IMDB_BASICS  = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
+GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE}
+GET_DATA_CMD     = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl
+DESCRIPTION      = RDF data derived from ${IMDB_DATA_URL}
+
+[index]
+INPUT_FILES     = ${data:NAME}.ttl
+
+[server]
+PORT               = 7029
+
+[runtime]
+SYSTEM = docker
+
diff --git a/src/other_engines/Configfiles/Configfile.olympics b/src/other_engines/Configfiles/Configfile.olympics
@@ -0,0 +1,16 @@
+# Configfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
+
+[data]
+NAME              = olympics
+BASE_URL          = https://github.com/wallscope/olympics-rdf
+GET_DATA_CMD      = curl -sLo olympics.zip -C - ${BASE_URL}/raw/master/data/olympics-nt-nodup.zip && unzip -q -o olympics.zip && rm olympics.zip
+DESCRIPTION       = 120 Years of Olympics, data from ${BASE_URL}
+
+[index]
+INPUT_FILES     = olympics.nt
+
+[server]
+PORT               = 7887
+
+[runtime]
+SYSTEM = docker
diff --git a/src/other_engines/__init__.py b/src/other_engines/__init__.py
diff --git a/src/other_engines/config.py b/src/other_engines/config.py
@@ -0,0 +1,251 @@
+from __future__ import annotations
+
+import argparse
+import os
+from pathlib import Path
+
+import argcomplete
+from termcolor import colored
+
+from other_engines.engine import SparqlEngine
+from qlever.containerize import Containerize
+from qlever.log import log, log_levels
+from qlever.qleverfile import Qleverfile
+
+
+def all_arguments():
+    """
+    Take all existing arguments from Qleverfile
+    and add/replace the ones that are new/different from QLever
+    """
+
+    def arg(*args, **kwargs):
+        return (args, kwargs)
+
+    all_args = Qleverfile.all_arguments()
+    all_args["runtime"]["index_cmd"] = arg(
+        "--run-in-foreground",
+        action="store_true",
+        default=False,
+        help=(
+            "Run the index command in the foreground "
+            "(default: run in the background with `docker run -d`)"
+        ),
+    )
+    all_args["runtime"]["start_cmd"] = arg(
+        "--run-in-foreground",
+        action="store_true",
+        default=False,
+        help=(
+            "Run the server in the foreground "
+            "(default: run in the background with `docker run -d`)"
+        ),
+    )
+    all_args["runtime"]["system"] = arg(
+        "--system",
+        type=str,
+        choices=Containerize.supported_systems(),
+        default="docker",
+        help=(
+            "Which system to use to run commands like `index` "
+            "or `start` in a container"
+        ),
+    )
+    all_args["runtime"]["index_container"] = arg(
+        "--index-container",
+        type=str,
+        help="The name of the container used by the index command",
+    )
+    all_args["runtime"]["server_container"] = arg(
+        "--server-container",
+        type=str,
+        help="The name of the container used by the start command",
+    )
+    return all_args
+
+
+class ArgumentsManager:
+    SPECIAL_ARGS = ["image", "index_container", "server_container"]
+
+    def __init__(self, engine: SparqlEngine) -> None:
+        self.engine = engine
+        self.engine_name = engine.engine_name
+        self.commands = engine.commands
+
+    def get_default_config_value(self, arg_name: str, config):
+        """
+        Get default values for SPECIAL_ARGS
+        """
+        name = config["data"]["name"]
+        if arg_name == "image":
+            return self.engine.image
+        if arg_name == "index_container":
+            return f"{self.engine_name.lower()}.index.{name}"
+        if arg_name == "server_container":
+            return f"{self.engine_name.lower()}.server.{name}"
+        return None
+
+    def add_subparser_for_command(
+        self,
+        subparsers,
+        command_name: str,
+        description: str,
+        config=None,
+    ) -> None:
+        """
+        Add subparser for the given command. Take the arguments from
+        `self.engine.get_config_arguments()` and report an error if
+        one of them is not contained in `all_arguments`. Overwrite the
+        default values with the values from `config` if specified.
+        """
+
+        arg_names = self.engine.get_config_arguments(command_name)
+        all_configfile_args = all_arguments()
+
+        def argument_error(prefix: str):
+            log.info("")
+            log.error(
+                f"{prefix} in `other_engines.configfile.all_arguments()` "
+                f"for command `{command_name}`"
+            )
+            log.info("")
+            log.info(
+                f"Value of `get_config_arguments_for_command` "
+                f"`{command_name}`:"
+            )
+            log.info("")
+            log.info(f"{arg_names}")
+            log.info("")
+            exit(1)
+
+        # Add the subparser.
+        subparser = subparsers.add_parser(
+            command_name, description=description, help=description
+        )
+
+        # Add the arguments relevant for the command.
+        for section in arg_names:
+            if section not in all_configfile_args:
+                argument_error(f"Section `{section}` not found")
+            for arg_name in arg_names[section]:
+                if arg_name not in all_configfile_args[section]:
+                    argument_error(
+                        f"Argument `{arg_name}` of section "
+                        f"`{section}` not found"
+                    )
+                args, kwargs = all_configfile_args[section][arg_name]
+                kwargs_copy = kwargs.copy()
+                # If `configfile_config` is given, add info about default
+                # values to the help string.
+                if config is not None:
+                    default_value = kwargs.get("default", None)
+                    config_value = (
+                        config.get(section, arg_name, fallback=None)
+                        if arg_name not in self.SPECIAL_ARGS
+                        else self.get_default_config_value(arg_name, config)
+                    )
+                    if config_value is not None:
+                        kwargs_copy["default"] = config_value
+                        kwargs_copy["required"] = False
+                        kwargs_copy["help"] += (
+                            f" [default, from {self.engine.configfile_name}: "
+                            f"{config_value}]"
+                        )
+                    else:
+                        kwargs_copy["help"] += f" [default: {default_value}]"
+                subparser.add_argument(*args, **kwargs_copy)
+
+        # Additional arguments that are shared by all commands.
+        self.engine.additional_arguments(command_name, subparser)
+        subparser.add_argument(
+            "--show",
+            action="store_true",
+            default=False,
+            help="Only show what would be executed, but don't execute it",
+        )
+        subparser.add_argument(
+            "--log-level",
+            choices=log_levels.keys(),
+            default="INFO",
+            help="Set the log level",
+        )
+
+    def parse_args(self):
+        # Determine whether we are in autocomplete mode or not.
+        autocomplete_mode = "COMP_LINE" in os.environ
+
+        # Check if the user has registered this script for argcomplete.
+        argcomplete_check_off = os.environ.get(
+            f"{self.engine_name.upper()}_ARGCOMPLETE_CHECK_OFF"
+        )
+        argcomplete_enabled = os.environ.get(
+            f"{self.engine_name.upper()}_ARGCOMPLETE_ENABLED"
+        )
+        if not argcomplete_enabled and not argcomplete_check_off:
+            log.info("")
+            log.warning(
+                f"To enable autocompletion, run the following command, "
+                f"and consider adding it to your `.bashrc` or `.zshrc`:"
+                f"\n\n"
+                f'eval "$(register-python-argcomplete {self.engine.script_name})"'
+                f" && export {self.engine_name.upper()}_ARGCOMPLETE_ENABLED=1"
+            )
+            log.info("")
+
+        configfile_path = self.engine.configfile_path
+        configfile_exists = configfile_path.is_file()
+
+        if configfile_exists and not autocomplete_mode:
+            try:
+                config = Qleverfile.read_qleverfile(configfile_path)
+            except Exception as e:
+                log.info("")
+                log.error(
+                    f"Error parsing {self.engine.configfile_name} `{configfile_path}`"
+                    f": {e}"
+                )
+                log.info("")
+                exit(1)
+        else:
+            config = None
+
+        parser = argparse.ArgumentParser(
+            description=colored(
+                f"This is the {self.engine.script_name} command line tool, "
+                f"it's all you need to work with {self.engine_name} in a "
+                f"{' or '.join(Containerize.supported_systems())} "
+                "container environment",
+                attrs=["bold"],
+            )
+        )
+        subparsers = parser.add_subparsers(dest="command")
+        subparsers.required = True
+        for command_name, description in self.commands.items():
+            self.add_subparser_for_command(
+                subparsers=subparsers,
+                command_name=command_name,
+                description=description,
+                config=config,
+            )
+
+        argcomplete.autocomplete(parser, always_complete_options="long")
+
+        # If called without arguments, show the help message.
+        if len(os.sys.argv) == 1:
+            parser.print_help()
+            exit(0)
+
+        args = parser.parse_args()
+
+        # If the command says that we should have a Qleverfile, but we don't,
+        # issue a warning.
+        if self.engine.command_should_have_configfile(args.command):
+            if not configfile_exists:
+                log.warning(
+                    f"Invoking command `{args.command}` without a "
+                    f"{self.engine.configfile_name}. You have to specify all "
+                    "required arguments on the command line. "
+                    "This is possible, but not recommended."
+                )
+
+        return args