Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add setup scripts for other engines #127

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ Github = "https://github.com/ad-freiburg/qlever"
[project.scripts]
"qlever" = "qlever.qlever_main:main"
"qlever-old" = "qlever.qlever_old:main"
"qoxigraph" = "other_engines.main:main"

[tool.setuptools]
license-files = ["LICENSE"]
package-data = { "qlever" = ["Qleverfiles/*"] }
package-data = { "qlever" = ["Qleverfiles/*"], "qoxigraph" = ["Configfiles/*"] }

[tool.pytest.ini_options]
pythonpath = ["src"]
Expand Down
20 changes: 20 additions & 0 deletions src/other_engines/Configfiles/Configfile.dblp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Configfile for DBLP, use with https://github.com/ad-freiburg/qlever-control

[data]
NAME = dblp
DATA_TARFILE = dblp_KG_with_associated_data.tar
GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE}
GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE}
VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL} (version ${VERSION})
FORMAT = ttl

[index]
INPUT_FILES = *.gz

[server]
PORT = 7015

[runtime]
SYSTEM = docker

29 changes: 29 additions & 0 deletions src/other_engines/Configfiles/Configfile.default
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Default Configfile, use with https://github.com/ad-freiburg/qlever-control
#
# If you have never seen a Configfile before, we recommend that you first look
# at the example Configfiles on http://qlever.cs.uni-freiburg.de/qlever-control/
# src/other_engines/Configfiles . Or execute `q{engine_name} setup-config <dataset>` on the
# command line to obtain the example Configfiles for <dataset>.

# As a minimum, each dataset needs a name. If you want `q{engine_name} get-data` to do
# something meaningful, you need to define GET_DATA_CMD. Otherwise, you need to
# generate (or download or copy from somewhere) the input files yourself. Each
# dataset should have a short DESCRIPTION, ideally with a date.
[data]
NAME =
GET_DATA_CMD =
DESCRIPTION =

# The format for INPUT_FILES should be such that `ls ${INPUT_FILES}` lists all
# input files.
[index]
INPUT_FILES = *.ttl

# The server listens on PORT.
[server]
PORT = 8888

# Use SYSTEM = docker to run inside a docker container
[runtime]
SYSTEM = docker

20 changes: 20 additions & 0 deletions src/other_engines/Configfiles/Configfile.imdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Configfile for IMDB, use with https://github.com/ad-freiburg/qlever-control

[data]
NAME = imdb
IMDB_DATA_URL = https://datasets.imdbws.com
GET_PREFIXES = echo "@prefix imdb: <https://www.imdb.com/> ."
GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE}
GET_DATA_CMD = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl
DESCRIPTION = RDF data derived from ${IMDB_DATA_URL}

[index]
INPUT_FILES = ${data:NAME}.ttl

[server]
PORT = 7029

[runtime]
SYSTEM = docker

16 changes: 16 additions & 0 deletions src/other_engines/Configfiles/Configfile.olympics
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Configfile for Olympics, use with https://github.com/ad-freiburg/qlever-control

[data]
NAME = olympics
BASE_URL = https://github.com/wallscope/olympics-rdf
GET_DATA_CMD = curl -sLo olympics.zip -C - ${BASE_URL}/raw/master/data/olympics-nt-nodup.zip && unzip -q -o olympics.zip && rm olympics.zip
DESCRIPTION = 120 Years of Olympics, data from ${BASE_URL}

[index]
INPUT_FILES = olympics.nt

[server]
PORT = 7887

[runtime]
SYSTEM = docker
Empty file added src/other_engines/__init__.py
Empty file.
251 changes: 251 additions & 0 deletions src/other_engines/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
from __future__ import annotations

import argparse
import os
from pathlib import Path

import argcomplete
from termcolor import colored

from other_engines.engine import SparqlEngine
from qlever.containerize import Containerize
from qlever.log import log, log_levels
from qlever.qleverfile import Qleverfile


def all_arguments():
"""
Take all existing arguments from Qleverfile
and add/replace the ones that are new/different from QLever
"""

def arg(*args, **kwargs):
return (args, kwargs)

all_args = Qleverfile.all_arguments()
all_args["runtime"]["index_cmd"] = arg(
"--run-in-foreground",
action="store_true",
default=False,
help=(
"Run the index command in the foreground "
"(default: run in the background with `docker run -d`)"
),
)
all_args["runtime"]["start_cmd"] = arg(
"--run-in-foreground",
action="store_true",
default=False,
help=(
"Run the server in the foreground "
"(default: run in the background with `docker run -d`)"
),
)
all_args["runtime"]["system"] = arg(
"--system",
type=str,
choices=Containerize.supported_systems(),
default="docker",
help=(
"Which system to use to run commands like `index` "
"or `start` in a container"
),
)
all_args["runtime"]["index_container"] = arg(
"--index-container",
type=str,
help="The name of the container used by the index command",
)
all_args["runtime"]["server_container"] = arg(
"--server-container",
type=str,
help="The name of the container used by the start command",
)
return all_args


class ArgumentsManager:
SPECIAL_ARGS = ["image", "index_container", "server_container"]

def __init__(self, engine: SparqlEngine) -> None:
self.engine = engine
self.engine_name = engine.engine_name
self.commands = engine.commands

def get_default_config_value(self, arg_name: str, config):
"""
Get default values for SPECIAL_ARGS
"""
name = config["data"]["name"]
if arg_name == "image":
return self.engine.image
if arg_name == "index_container":
return f"{self.engine_name.lower()}.index.{name}"
if arg_name == "server_container":
return f"{self.engine_name.lower()}.server.{name}"
return None

def add_subparser_for_command(
self,
subparsers,
command_name: str,
description: str,
config=None,
) -> None:
"""
Add subparser for the given command. Take the arguments from
`self.engine.get_config_arguments()` and report an error if
one of them is not contained in `all_arguments`. Overwrite the
default values with the values from `config` if specified.
"""

arg_names = self.engine.get_config_arguments(command_name)
all_configfile_args = all_arguments()

def argument_error(prefix: str):
log.info("")
log.error(
f"{prefix} in `other_engines.configfile.all_arguments()` "
f"for command `{command_name}`"
)
log.info("")
log.info(
f"Value of `get_config_arguments_for_command` "
f"`{command_name}`:"
)
log.info("")
log.info(f"{arg_names}")
log.info("")
exit(1)

# Add the subparser.
subparser = subparsers.add_parser(
command_name, description=description, help=description
)

# Add the arguments relevant for the command.
for section in arg_names:
if section not in all_configfile_args:
argument_error(f"Section `{section}` not found")
for arg_name in arg_names[section]:
if arg_name not in all_configfile_args[section]:
argument_error(
f"Argument `{arg_name}` of section "
f"`{section}` not found"
)
args, kwargs = all_configfile_args[section][arg_name]
kwargs_copy = kwargs.copy()
# If `configfile_config` is given, add info about default
# values to the help string.
if config is not None:
default_value = kwargs.get("default", None)
config_value = (
config.get(section, arg_name, fallback=None)
if arg_name not in self.SPECIAL_ARGS
else self.get_default_config_value(arg_name, config)
)
if config_value is not None:
kwargs_copy["default"] = config_value
kwargs_copy["required"] = False
kwargs_copy["help"] += (
f" [default, from {self.engine.configfile_name}: "
f"{config_value}]"
)
else:
kwargs_copy["help"] += f" [default: {default_value}]"
subparser.add_argument(*args, **kwargs_copy)

# Additional arguments that are shared by all commands.
self.engine.additional_arguments(command_name, subparser)
subparser.add_argument(
"--show",
action="store_true",
default=False,
help="Only show what would be executed, but don't execute it",
)
subparser.add_argument(
"--log-level",
choices=log_levels.keys(),
default="INFO",
help="Set the log level",
)

def parse_args(self):
# Determine whether we are in autocomplete mode or not.
autocomplete_mode = "COMP_LINE" in os.environ

# Check if the user has registered this script for argcomplete.
argcomplete_check_off = os.environ.get(
f"{self.engine_name.upper()}_ARGCOMPLETE_CHECK_OFF"
)
argcomplete_enabled = os.environ.get(
f"{self.engine_name.upper()}_ARGCOMPLETE_ENABLED"
)
if not argcomplete_enabled and not argcomplete_check_off:
log.info("")
log.warning(
f"To enable autocompletion, run the following command, "
f"and consider adding it to your `.bashrc` or `.zshrc`:"
f"\n\n"
f'eval "$(register-python-argcomplete {self.engine.script_name})"'
f" && export {self.engine_name.upper()}_ARGCOMPLETE_ENABLED=1"
)
log.info("")

configfile_path = self.engine.configfile_path
configfile_exists = configfile_path.is_file()

if configfile_exists and not autocomplete_mode:
try:
config = Qleverfile.read_qleverfile(configfile_path)
except Exception as e:
log.info("")
log.error(
f"Error parsing {self.engine.configfile_name} `{configfile_path}`"
f": {e}"
)
log.info("")
exit(1)
else:
config = None

parser = argparse.ArgumentParser(
description=colored(
f"This is the {self.engine.script_name} command line tool, "
f"it's all you need to work with {self.engine_name} in a "
f"{' or '.join(Containerize.supported_systems())} "
"container environment",
attrs=["bold"],
)
)
subparsers = parser.add_subparsers(dest="command")
subparsers.required = True
for command_name, description in self.commands.items():
self.add_subparser_for_command(
subparsers=subparsers,
command_name=command_name,
description=description,
config=config,
)

argcomplete.autocomplete(parser, always_complete_options="long")

# If called without arguments, show the help message.
if len(os.sys.argv) == 1:
parser.print_help()
exit(0)

args = parser.parse_args()

# If the command says that we should have a Qleverfile, but we don't,
# issue a warning.
if self.engine.command_should_have_configfile(args.command):
if not configfile_exists:
log.warning(
f"Invoking command `{args.command}` without a "
f"{self.engine.configfile_name}. You have to specify all "
"required arguments on the command line. "
"This is possible, but not recommended."
)

return args
Loading