From 64dcec384bbd6a033ce8d65353e9f79b1ec01652 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 29 Nov 2024 14:43:45 +0100 Subject: [PATCH] get/get_many: add raise_missing=True param --- src/borg/legacyrepository.py | 11 +++++++---- src/borg/remote.py | 32 +++++++++++++++++++------------- src/borg/repository.py | 11 +++++++---- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/borg/legacyrepository.py b/src/borg/legacyrepository.py index 06e166532d..0ff463a86e 100644 --- a/src/borg/legacyrepository.py +++ b/src/borg/legacyrepository.py @@ -1211,18 +1211,21 @@ def list(self, limit=None, marker=None): self.index = self.open_index(self.get_transaction_id()) return [id_ for id_, _ in islice(self.index.iteritems(marker=marker), limit)] - def get(self, id, read_data=True): + def get(self, id, read_data=True, raise_missing=True): if not self.index: self.index = self.open_index(self.get_transaction_id()) try: in_index = NSIndex1Entry(*(self.index[id][:2])) # legacy: index entries have no size element return self.io.read(in_index.segment, in_index.offset, id, read_data=read_data) except KeyError: - raise self.ObjectNotFound(id, self.path) from None + if raise_missing: + raise self.ObjectNotFound(id, self.path) from None + else: + return None - def get_many(self, ids, read_data=True, is_preloaded=False): + def get_many(self, ids, read_data=True, is_preloaded=False, raise_missing=True): for id_ in ids: - yield self.get(id_, read_data=read_data) + yield self.get(id_, read_data=read_data, raise_missing=raise_missing) def put(self, id, data, wait=True): """put a repo object diff --git a/src/borg/remote.py b/src/borg/remote.py index 03fd633f77..9a3ef70cfe 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -965,7 +965,7 @@ def handle_error(unpacked): self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: cmd, ARGS: args})) if not self.to_send and self.preload_ids: chunk_id = self.preload_ids.pop(0) - args = {"id": chunk_id} + args = {"id": chunk_id, "raise_missing": True} self.msgid += 1 self.chunkid_to_msgids.setdefault(chunk_id, []).append(self.msgid) self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: "get", ARGS: args})) @@ -1024,12 +1024,16 @@ def __len__(self): def list(self, limit=None, marker=None): """actual remoting is done via self.call in the @api decorator""" - def get(self, id, read_data=True): - for resp in self.get_many([id], read_data=read_data): + def get(self, id, read_data=True, raise_missing=True): + for resp in self.get_many([id], read_data=read_data, raise_missing=raise_missing): return resp - def get_many(self, ids, read_data=True, is_preloaded=False): - yield from self.call_many("get", [{"id": id, "read_data": read_data} for id in ids], is_preloaded=is_preloaded) + def get_many(self, ids, read_data=True, is_preloaded=False, raise_missing=True): + yield from self.call_many( + "get", + [{"id": id, "read_data": read_data, "raise_missing": raise_missing} for id in ids], + is_preloaded=is_preloaded, + ) @api(since=parse_version("1.0.0")) def put(self, id, data, wait=True): @@ -1131,11 +1135,11 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def get(self, key, read_data=True): - return next(self.get_many([key], read_data=read_data, cache=False)) + def get(self, key, read_data=True, raise_missing=True): + return next(self.get_many([key], read_data=read_data, raise_missing=raise_missing, cache=False)) - def get_many(self, keys, read_data=True, cache=True): - for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data)): + def get_many(self, keys, read_data=True, raise_missing=True, cache=True): + for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data, raise_missing=raise_missing)): yield self.transform(key, data) def log_instrumentation(self): @@ -1176,7 +1180,7 @@ def query_size_limit(self): def prefixed_key(self, key, complete): # just prefix another byte telling whether this key refers to a complete chunk - # or a without-data-metadata-only chunk (see also read_data param). + # or a without-data-metadata-only chunk (see, raise_missing=True also read_data param). prefix = b"\x01" if complete else b"\x00" return prefix + key @@ -1240,10 +1244,12 @@ def close(self): self.cache.clear() shutil.rmtree(self.basedir) - def get_many(self, keys, read_data=True, cache=True): + def get_many(self, keys, read_data=True, raise_missing=True, cache=True): # It could use different cache keys depending on read_data and cache full vs. meta-only chunks. unknown_keys = [key for key in keys if self.prefixed_key(key, complete=read_data) not in self.cache] - repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data)) + repository_iterator = zip( + unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data, raise_missing=raise_missing) + ) for key in keys: pkey = self.prefixed_key(key, complete=read_data) if pkey in self.cache: @@ -1261,7 +1267,7 @@ def get_many(self, keys, read_data=True, cache=True): else: # slow path: eviction during this get_many removed this key from the cache t0 = time.perf_counter() - data = self.repository.get(key, read_data=read_data) + data = self.repository.get(key, read_data=read_data, raise_missing=raise_missing) self.slow_lat += time.perf_counter() - t0 transformed = self.add_entry(key, data, cache, complete=read_data) self.slow_misses += 1 diff --git a/src/borg/repository.py b/src/borg/repository.py index 395e312962..5181a5c406 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -426,7 +426,7 @@ def list(self, limit=None, marker=None): # note: do not collect the marker id return result - def get(self, id, read_data=True): + def get(self, id, read_data=True, raise_missing=True): self._lock_refresh() id_hex = bin_to_hex(id) key = "data/" + id_hex @@ -453,11 +453,14 @@ def get(self, id, read_data=True): raise IntegrityError(f"Object too small [id {id_hex}]: expected {meta_size}, got {len(meta)} bytes") return hdr + meta except StoreObjectNotFound: - raise self.ObjectNotFound(id, str(self._location)) from None + if raise_missing: + raise self.ObjectNotFound(id, str(self._location)) from None + else: + return None - def get_many(self, ids, read_data=True, is_preloaded=False): + def get_many(self, ids, read_data=True, is_preloaded=False, raise_missing=True): for id_ in ids: - yield self.get(id_, read_data=read_data) + yield self.get(id_, read_data=read_data, raise_missing=raise_missing) def put(self, id, data, wait=True): """put a repo object