Skip to content

Commit

Permalink
Fix Qleverfile for OSM Planet (#135)
Browse files Browse the repository at this point in the history
Add `ULIMT = 10000`. Update memory settings to more modest ones. Replace the error-prone `curl` by the more robust `wget`. Update meta information in comments at the top of the `Qleverfile`. Fixes #129
  • Loading branch information
hannahbast authored Feb 13, 2025
1 parent ba2823d commit d65235b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "qlever"
description = "Script for using the QLever SPARQL engine."
version = "0.5.17"
version = "0.5.18"
authors = [
{ name = "Hannah Bast", email = "[email protected]" }
]
Expand Down
22 changes: 11 additions & 11 deletions src/qlever/Qleverfiles/Qleverfile.osm-planet
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
# Qleverfile for OSM Planet, use with the qlever script (pip install qlever)
# Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
#
# qlever get-data # downloads .ttl.bz2 file of ~ 400 GB with ~ 100 B triples
# qlever index # takes ~12 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
# qlever start # takes a few seconds
# qlever get-data # downloads ~400 GB (ttl.bz2), ~100 B triples
# qlever index # ~20 hours, ~60 GB RAM, ~1.5 TB index size on disk
# qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
#
# For the OSM data of a single country, do `qlever setup-config osm-country`
# and edit the Qleverfile to specify the country,
# Measured on an AMD Ryzen 9 7950X with 128 GB RAM and 2 x 8 TB NVMe (04.01.2025)

[data]
NAME = osm-planet
DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${DATA_URL} | tee ${NAME}.download-log.txt
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
DESCRIPTION = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)

Expand All @@ -21,14 +20,15 @@ PARALLEL_PARSING = true
PARSER_BUFFER_SIZE = 100M
STXXL_MEMORY = 40G
SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
ULIMIT = 10000

[server]
PORT = 7007
ACCESS_TOKEN = ${data:NAME}
MEMORY_FOR_QUERIES = 60G
CACHE_MAX_SIZE = 40G
CACHE_MAX_SIZE_SINGLE_ENTRY = 30G
TIMEOUT = 300s
MEMORY_FOR_QUERIES = 40G
CACHE_MAX_SIZE = 20G
CACHE_MAX_SIZE_SINGLE_ENTRY = 20G
TIMEOUT = 600s

[runtime]
SYSTEM = docker
Expand Down

0 comments on commit d65235b

Please sign in to comment.