Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-46936: Fix cache exception when unexpected files present #1134

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/DM-46936.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug where DatastoreCacheManager would raise ValueError('badly formed hexadecimal UUID string') if files with unexpected names are present in the cache directory when trying to load a file from the cache.
22 changes: 18 additions & 4 deletions python/lsst/daf/butler/datastore/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ def _parse_cache_name(cached_location: str) -> tuple[uuid.UUID, str | None, str
ext = "." + root_ext.pop(0) if root_ext else None

parts = root.split("_")
id_ = uuid.UUID(parts.pop(0))
try:
id_ = uuid.UUID(parts.pop(0))
except ValueError as e:
raise InvalidCacheFilenameError(f"Invalid or missing ID in cache file name: {cached_location}") from e
component = parts.pop(0) if parts else None
return id_, component, ext

Expand Down Expand Up @@ -894,9 +897,14 @@ def scan_cache(self) -> None:
if file.relative_to(self._temp_exempt_directory) is not None:
continue

path_in_cache = self._register_cache_entry(file, can_exist=True)
if path_in_cache:
found.add(path_in_cache)
try:
path_in_cache = self._register_cache_entry(file, can_exist=True)
if path_in_cache:
found.add(path_in_cache)
except InvalidCacheFilenameError:
# Skip files that are in the cache directory, but were not
# created by us.
pass

# Find any files that were recorded in the cache but are no longer
# on disk. (something else cleared them out?)
Expand Down Expand Up @@ -1194,3 +1202,9 @@ def known_to_cache(self, ref: DatasetRef, extension: str | None = None) -> bool:

def __str__(self) -> str:
return f"{type(self).__name__}()"


class InvalidCacheFilenameError(Exception):
"""Raised when attempting to register a file in the cache with a name in
the incorrect format.
"""
Comment on lines +1207 to +1210
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe move it to the top, took me a minute to figure out where the name came from when I read the code above?

20 changes: 20 additions & 0 deletions tests/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1744,6 +1744,26 @@ def testExplicitCacheDir(self) -> None:
# Test that the cache directory is not marked temporary
self.assertFalse(cache_manager.cache_directory.isTemporary)

def testUnexpectedFilesInCacheDir(self) -> None:
"""Test for regression of a bug where extraneous files in a cache
directory would cause all cache lookups to raise an exception.
"""
config_str = f"""
cached:
root: '{self.root}'
cacheable:
metric0: true
"""

for filename in ["unexpected.txt", "unexpected", "un_expected", "un_expected.txt"]:
unexpected_file = os.path.join(self.root, filename)
with open(unexpected_file, "w") as fh:
fh.write("test")

cache_manager = self._make_cache_manager(config_str)
cache_manager.scan_cache()
self.assertCache(cache_manager)

def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
Expand Down
Loading