Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dependencies: Update to disk-objectstore~=1.0 #6132

Merged
merged 1 commit into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 20 additions & 25 deletions aiida/repository/backend/disk_object_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Implementation of the ``AbstractRepositoryBackend`` using the ``disk-objectstore`` as the backend."""
import contextlib
import dataclasses
import shutil
import typing as t

Expand Down Expand Up @@ -156,24 +157,29 @@ def maintain( # type: ignore[override] # pylint: disable=arguments-differ,too-ma
) -> dict:
"""Performs maintenance operations.

:param live:if True, will only perform operations that are safe to do while the repository is in use.
:param pack_loose:flag for forcing the packing of loose files.
:param do_repack:flag for forcing the re-packing of already packed files.
:param clean_storage:flag for forcing the cleaning of soft-deleted files from the repository.
:param do_vacuum:flag for forcing the vacuuming of the internal database when cleaning the repository.
:param compress:flag for compressing the data when packing loose files.
:return:a dictionary with information on the operations performed.
:param live: if True, will only perform operations that are safe to do while the repository is in use.
:param pack_loose: flag for forcing the packing of loose files.
:param do_repack: flag for forcing the re-packing of already packed files.
:param clean_storage: flag for forcing the cleaning of soft-deleted files from the repository.
:param do_vacuum: flag for forcing the vacuuming of the internal database when cleaning the repository.
:param compress: flag for compressing the data when packing loose files. Set to ``Compress.AUTO`` if ``True``.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I guess we only allow fully uncompressed or auto compression (no options for YES or KEEP), right?
I guess it's OK for the AiiDA interface in order to keep it simple and we can always point people to the Disk-ObjectStore API for more (or expand this API in the future)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we currently just have a boolean flag because that was all that was supported by the disk objectstore anyway. Don't think we should start making it even more complex now.

:return: a dictionary with information on the operations performed.
"""
from disk_objectstore import CompressMode

if live and (do_repack or clean_storage or do_vacuum):
overrides = {'do_repack': do_repack, 'clean_storage': clean_storage, 'do_vacuum': do_vacuum}
keys = ', '.join([key for key, override in overrides.items() if override is True]) # type: ignore
raise ValueError(f'The following overrides were enabled but cannot be if `live=True`: {keys}')

pack_loose = True if pack_loose is None else pack_loose

if compress is True:
compress = CompressMode.AUTO

if live:
do_repack = False
clean_storage = False
clean_storage = True if clean_storage is None else clean_storage
do_vacuum = False
else:
do_repack = True if do_repack is None else do_repack
Expand All @@ -182,15 +188,15 @@ def maintain( # type: ignore[override] # pylint: disable=arguments-differ,too-ma

with self._container as container:
if pack_loose:
files_numb = container.count_objects()['loose']
files_size = container.get_total_size()['total_size_loose'] * BYTES_TO_MB
files_numb = container.count_objects().loose
files_size = container.get_total_size().total_size_loose * BYTES_TO_MB
logger.report(f'Packing all loose files ({files_numb} files occupying {files_size} MB) ...')
if not dry_run:
container.pack_all_loose(compress=compress)

if do_repack:
files_numb = container.count_objects()['packed']
files_size = container.get_total_size()['total_size_packfiles_on_disk'] * BYTES_TO_MB
files_numb = container.count_objects().packed
files_size = container.get_total_size().total_size_packfiles_on_disk * BYTES_TO_MB
logger.report(f'Re-packing all pack files ({files_numb} files in packs, occupying {files_size} MB) ...')
if not dry_run:
container.repack()
Expand All @@ -211,24 +217,13 @@ def get_info( # type: ignore[override] # pylint: disable=arguments-differ
with self._container as container:
output_info['SHA-hash algorithm'] = container.hash_type
output_info['Compression algorithm'] = container.compression_algorithm
output_info['Objects'] = dataclasses.asdict(container.count_objects())

if not detailed:
return output_info

files_data = container.count_objects()
size_data = container.get_total_size()

output_info['Packs'] = files_data['pack_files']

output_info['Objects'] = {
'unpacked': files_data['loose'],
'packed': files_data['packed'],
}

output_info['Size (MB)'] = {
'unpacked': size_data['total_size_loose'] * BYTES_TO_MB,
'packed': size_data['total_size_packfiles_on_disk'] * BYTES_TO_MB,
'other': size_data['total_size_packindexes_on_disk'] * BYTES_TO_MB,
k: float(f'{v * BYTES_TO_MB:.2f}') for k, v in dataclasses.asdict(container.get_total_size()).items()
}

return output_info
2 changes: 1 addition & 1 deletion aiida/storage/psql_dos/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def get_repository(self) -> 'DiskObjectStoreRepositoryBackend':

from aiida.repository.backend import DiskObjectStoreRepositoryBackend

container = Container(str(get_filepath_container(self.profile)))
container = Container(get_filepath_container(self.profile))
return DiskObjectStoreRepositoryBackend(container=container)

@property
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- circus~=0.18.0
- click-spinner~=0.1.8
- click~=8.1
- disk-objectstore~=0.6.0
- disk-objectstore~=1.0
- docstring_parser
- get-annotations~=0.1
- python-graphviz~=0.19
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies = [
"circus~=0.18.0",
"click-spinner~=0.1.8",
"click~=8.1",
"disk-objectstore~=0.6.0",
"disk-objectstore~=1.0",
"docstring-parser",
"get-annotations~=0.1;python_version<'3.10'",
"graphviz~=0.19",
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-py-3.10.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ debugpy==1.6.7
decorator==5.1.1
defusedxml==0.7.1
deprecation==2.1.0
disk-objectstore==0.6.0
disk-objectstore==1.0.0
docstring-parser==0.15
docutils==0.16
emmet-core==0.57.1
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-py-3.11.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ debugpy==1.6.7
decorator==5.1.1
defusedxml==0.7.1
deprecation==2.1.0
disk-objectstore==0.6.0
disk-objectstore==1.0.0
docstring-parser==0.15
docutils==0.16
emmet-core==0.57.1
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-py-3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ debugpy==1.6.7
decorator==5.1.1
defusedxml==0.7.1
deprecation==2.1.0
disk-objectstore==0.6.0
disk-objectstore==1.0.0
docstring-parser==0.15
docutils==0.16
emmet-core==0.57.1
Expand Down
22 changes: 10 additions & 12 deletions tests/repository/backend/test_disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,26 +224,24 @@ def test_get_info(populated_repository):
assert repository_info['SHA-hash algorithm'] == 'sha256'
assert repository_info['Compression algorithm'] == 'zlib+1'

assert 'Packs' in repository_info
assert repository_info['Packs'] == 1

assert 'Objects' in repository_info
assert 'unpacked' in repository_info['Objects']
assert 'pack_files' in repository_info['Objects']
assert 'loose' in repository_info['Objects']
assert 'packed' in repository_info['Objects']
assert repository_info['Objects']['unpacked'] == 2
assert repository_info['Objects']['pack_files'] == 1
assert repository_info['Objects']['loose'] == 1
assert repository_info['Objects']['packed'] == 3

assert 'Size (MB)' in repository_info
assert 'unpacked' in repository_info['Size (MB)']
assert 'packed' in repository_info['Size (MB)']
assert 'other' in repository_info['Size (MB)']
assert 'total_size_loose' in repository_info['Size (MB)']
assert 'total_size_packed' in repository_info['Size (MB)']


#yapf: disable
@pytest.mark.parametrize(('kwargs', 'output_info'), (
(
{'live': True},
{'unpacked': 2, 'packed': 4}
{'unpacked': 0, 'packed': 4}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it expected that these numbers changed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I believe this is because the live maintenance now automatically includes packing and cleaning

),
(
{'live': False},
Expand All @@ -265,16 +263,16 @@ def test_get_info(populated_repository):
'clean_storage': False,
'do_vacuum': False,
},
{'unpacked': 2, 'packed': 3}
{'unpacked': 1, 'packed': 3}
),
))
# yapf: enable
def test_maintain(populated_repository, kwargs, output_info):
"""Test the ``maintain`` method."""
populated_repository.maintain(**kwargs)
file_info = populated_repository._container.count_objects() # pylint: disable=protected-access
assert file_info['loose'] == output_info['unpacked']
assert file_info['packed'] == output_info['packed']
assert file_info.loose == output_info['unpacked']
assert file_info.packed == output_info['packed']


@pytest.mark.parametrize('do_vacuum', [True, False])
Expand Down