From 396f92891e429ece1fe55257215518edc5767e1f Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 23 Oct 2024 12:53:22 +0200 Subject: [PATCH 01/14] feat: make collection helper more generic --- tdp/core/collection.py | 50 +++++++++++++----------------- tests/unit/core/test_collection.py | 8 ++--- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/tdp/core/collection.py b/tdp/core/collection.py index 0f54f0b4..962a656b 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -76,12 +76,13 @@ def __init__( check_collection_structure(self._path) self._inventory_reader = inventory_reader or InventoryReader() - self._dag_nodes = list(get_collection_dag_nodes(self._path)) - self._playbooks = get_collection_playbooks( - self._path, + self._dag_nodes = list(read_dag_directory(self.dag_directory)) + self._playbooks = read_playbooks_directory( + self.playbooks_directory, + self.name, inventory_reader=self._inventory_reader, ) - self._schemas = get_collection_schemas(self._path) + self._schemas = read_schema_directory(self.default_vars_directory) @staticmethod def from_path(path: PathLike) -> Collection: @@ -177,10 +178,8 @@ def check_collection_structure(path: Path) -> None: ) -def get_collection_schemas( - collection_path: Path, schemas_directory_name=SCHEMA_VARS_DIRECTORY_NAME -) -> list[ServiceCollectionSchema]: - """Get the schemas of a collection. +def read_schema_directory(directory_path: Path) -> list[ServiceCollectionSchema]: + """Read the schemas from a directory. This function is meant to be used only once during the initialization of a collection object. @@ -188,15 +187,13 @@ def get_collection_schemas( Invalid schemas are ignored. Args: - collection_path: Path to the collection. + directory_path: Path to the schema directory. Returns: Dictionary of schemas. """ schemas: list[ServiceCollectionSchema] = [] - for schema_path in (collection_path / schemas_directory_name).glob( - "*" + JSON_EXTENSION - ): + for schema_path in (directory_path).glob("*" + JSON_EXTENSION): try: schemas.append(ServiceCollectionSchema.from_path(schema_path)) except InvalidSchemaError as e: @@ -204,19 +201,19 @@ def get_collection_schemas( return schemas -def get_collection_playbooks( - collection_path: Path, - playbooks_directory_name=PLAYBOOKS_DIRECTORY_NAME, +def read_playbooks_directory( + directory_path: Path, + collection_name: str, inventory_reader: Optional[InventoryReader] = None, ) -> dict[str, Playbook]: - """Get the playbooks of a collection. + """Read the playbooks from a directory. This function is meant to be used only once during the initialization of a collection object. Args: - collection_path: Path to the collection. - playbook_directory: Name of the playbook directory. + directory_path: Path to the playbooks directory. + collection_name: Name of the collection. inventory_reader: Inventory reader. Returns: @@ -225,12 +222,10 @@ def get_collection_playbooks( return { playbook_path.stem: Playbook( playbook_path, - collection_path.name, + collection_name, read_hosts_from_playbook(playbook_path, inventory_reader), ) - for playbook_path in (collection_path / playbooks_directory_name).glob( - "*" + YML_EXTENSION - ) + for playbook_path in (directory_path).glob("*" + YML_EXTENSION) } @@ -255,19 +250,18 @@ def read_hosts_from_playbook( raise ValueError(f"Can't parse playbook {playbook_path}.") from e -def get_collection_dag_nodes( - collection_path: Path, dag_directory_name=DAG_DIRECTORY_NAME +def read_dag_directory( + directory_path: Path, ) -> Generator[TDPLibDagNodeModel, None, None]: - """Get the DAG nodes of a collection. + """Read the DAG files from a directory. Args: - collection_path: Path to the collection. - dag_directory_name: Name of the DAG directory. + directory_path: Path to the DAG directory. Returns: List of DAG nodes. """ - for dag_file in (collection_path / dag_directory_name).glob("*" + YML_EXTENSION): + for dag_file in (directory_path).glob("*" + YML_EXTENSION): yield from read_dag_file(dag_file) diff --git a/tests/unit/core/test_collection.py b/tests/unit/core/test_collection.py index 1c4a8269..04831934 100644 --- a/tests/unit/core/test_collection.py +++ b/tests/unit/core/test_collection.py @@ -12,10 +12,10 @@ PathDoesNotExistsError, PathIsNotADirectoryError, check_collection_structure, - get_collection_dag_nodes, - get_collection_playbooks, + read_dag_directory, read_dag_file, read_hosts_from_playbook, + read_playbooks_directory, ) from tdp.core.constants import ( DAG_DIRECTORY_NAME, @@ -108,7 +108,7 @@ def test_init_collection_playbooks(tmp_path: Path): command: echo "Hello, GitHub Copilot!" """ ) - playbooks = get_collection_playbooks(collection_path, playbook_directory) + playbooks = read_playbooks_directory(playbook_directory_path, collection_path.name) assert len(playbooks) == 2 assert "playbook1" in playbooks assert "playbook2" in playbooks @@ -217,7 +217,7 @@ def test_get_collection_dag_nodes(tmp_path: Path): - s1_c1_a """ ) - dag_nodes = list(get_collection_dag_nodes(collection_path, dag_directory)) + dag_nodes = list(read_dag_directory(dag_directory_path)) assert len(dag_nodes) == 2 assert any( node.name == "s1_c1_a" and node.depends_on == ["sx_cx_a"] for node in dag_nodes From 5a28077a60492cabbf476adf580a9aaa3f59ebf9 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Thu, 31 Oct 2024 10:34:09 +0100 Subject: [PATCH 02/14] fix: rename Collection to CollectionReader --- tdp/cli/params/collections_option.py | 4 ++-- tdp/core/collection.py | 6 +++--- tdp/core/collections.py | 12 ++++++------ test_dag_order/helpers.py | 4 ++-- tests/unit/core/conftest.py | 4 ++-- tests/unit/core/models/test_deployment_log.py | 4 ++-- tests/unit/core/test_collection.py | 10 +++++----- tests/unit/core/test_collections.py | 6 +++--- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tdp/cli/params/collections_option.py b/tdp/cli/params/collections_option.py index 6aa82132..e5c18c69 100644 --- a/tdp/cli/params/collections_option.py +++ b/tdp/cli/params/collections_option.py @@ -6,7 +6,7 @@ import click from click.decorators import FC -from tdp.core.collection import Collection +from tdp.core.collection import CollectionReader from tdp.core.collections import Collections @@ -29,7 +29,7 @@ def _collections_from_paths( if not value: raise click.BadParameter("cannot be empty", ctx=ctx, param=param) - collections_list = [Collection.from_path(path) for path in value] + collections_list = [CollectionReader.from_path(path) for path in value] collections = Collections.from_collection_list(collections_list) return collections diff --git a/tdp/core/collection.py b/tdp/core/collection.py index 962a656b..409289a3 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -51,7 +51,7 @@ class MissingMandatoryDirectoryError(Exception): pass -class Collection: +class CollectionReader: """An enriched version of an Ansible collection. A TDP Collection is a directory containing playbooks, DAGs, default variables and variables schemas. @@ -85,7 +85,7 @@ def __init__( self._schemas = read_schema_directory(self.default_vars_directory) @staticmethod - def from_path(path: PathLike) -> Collection: + def from_path(path: PathLike) -> CollectionReader: """Factory method to create a collection from a path. Args: @@ -93,7 +93,7 @@ def from_path(path: PathLike) -> Collection: Returns: A collection. """ - return Collection(path=Path(path).expanduser().resolve()) + return CollectionReader(path=Path(path).expanduser().resolve()) @property def name(self) -> str: diff --git a/tdp/core/collections.py b/tdp/core/collections.py index cde442d9..c198de14 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -16,7 +16,7 @@ from collections import OrderedDict from collections.abc import Mapping, Sequence -from tdp.core.collection import Collection +from tdp.core.collection import CollectionReader from tdp.core.entities.hostable_entity_name import ServiceComponentName from tdp.core.entities.operation import Operations from tdp.core.operation import Operation @@ -25,14 +25,14 @@ logger = logging.getLogger(__name__) -class Collections(Mapping[str, Collection]): +class Collections(Mapping[str, CollectionReader]): """A mapping of collection name to Collection instance. This class also gather operations from all collections and filter them by their presence or not in the DAG. """ - def __init__(self, collections: Mapping[str, Collection]): + def __init__(self, collections: Mapping[str, CollectionReader]): self._collections = collections self._dag_operations, self._other_operations = self._init_operations( self._collections @@ -49,7 +49,7 @@ def __len__(self): return self._collections.__len__() @staticmethod - def from_collection_list(collections: Sequence[Collection]) -> Collections: + def from_collection_list(collections: Sequence[CollectionReader]) -> Collections: """Factory method to build Collections from a sequence of Collection. Ordering of the sequence is what will determine the loading order of the operations. @@ -94,7 +94,7 @@ def schemas(self) -> dict[str, ServiceSchema]: return self._schemas def _init_operations( - self, collections: Mapping[str, Collection] + self, collections: Mapping[str, CollectionReader] ) -> tuple[Operations, Operations]: dag_operations = Operations() other_operations = Operations() @@ -201,7 +201,7 @@ def _init_operations( return dag_operations, other_operations def _init_schemas( - self, collections: Mapping[str, Collection] + self, collections: Mapping[str, CollectionReader] ) -> dict[str, ServiceSchema]: schemas: dict[str, ServiceSchema] = {} for collection in collections.values(): diff --git a/test_dag_order/helpers.py b/test_dag_order/helpers.py index 0f2d00ee..6a8d8fe5 100644 --- a/test_dag_order/helpers.py +++ b/test_dag_order/helpers.py @@ -11,7 +11,7 @@ import yaml from tdp.core.collection import ( - Collection, + CollectionReader, MissingMandatoryDirectoryError, PathDoesNotExistsError, PathIsNotADirectoryError, @@ -113,7 +113,7 @@ def __len__(self): return len(self.fixtures) -class CollectionToTest(Collection): +class CollectionToTest(CollectionReader): """A Collection containing a rules directory""" def __init__(self, path: Union[str, pathlib.Path]): diff --git a/tests/unit/core/conftest.py b/tests/unit/core/conftest.py index efa9ae84..aeb0ac4b 100644 --- a/tests/unit/core/conftest.py +++ b/tests/unit/core/conftest.py @@ -4,7 +4,7 @@ import pytest from tdp.core.cluster_status import ClusterStatus -from tdp.core.collection import Collection +from tdp.core.collection import CollectionReader from tdp.core.collections import Collections from tdp.core.dag import Dag from tdp.core.variables import ClusterVariables @@ -60,7 +60,7 @@ def mock_collections(tmp_path_factory: pytest.TempPathFactory) -> Collections: } generate_collection_at_path(path=temp_collection_path, dag=mock_dag, vars=mock_vars) return Collections.from_collection_list( - [Collection.from_path(temp_collection_path)] + [CollectionReader.from_path(temp_collection_path)] ) diff --git a/tests/unit/core/models/test_deployment_log.py b/tests/unit/core/models/test_deployment_log.py index e81d3e4b..e42a62fb 100644 --- a/tests/unit/core/models/test_deployment_log.py +++ b/tests/unit/core/models/test_deployment_log.py @@ -9,7 +9,7 @@ import pytest from sqlalchemy import Engine -from tdp.core.collection import Collection +from tdp.core.collection import CollectionReader from tdp.core.collections import ( Collections, ) @@ -163,7 +163,7 @@ def test_multiple_host(self, tmp_path_factory: pytest.TempPathFactory): ] } generate_collection_at_path(collection_path, dag_service_operations, {}) - collection = Collection(collection_path, MockInventoryReader(hosts)) + collection = CollectionReader(collection_path, MockInventoryReader(hosts)) collections = Collections.from_collection_list([collection]) deployment = DeploymentModel.from_operations( diff --git a/tests/unit/core/test_collection.py b/tests/unit/core/test_collection.py index 04831934..f8e88047 100644 --- a/tests/unit/core/test_collection.py +++ b/tests/unit/core/test_collection.py @@ -7,7 +7,7 @@ from pydantic import ValidationError from tdp.core.collection import ( - Collection, + CollectionReader, MissingMandatoryDirectoryError, PathDoesNotExistsError, PathIsNotADirectoryError, @@ -30,19 +30,19 @@ def test_collection_from_path_does_not_exist(): with pytest.raises(PathDoesNotExistsError): - Collection.from_path("foo") + CollectionReader.from_path("foo") def test_collection_from_path_is_not_a_directory(tmp_path: Path): empty_file = tmp_path / "foo" empty_file.touch() with pytest.raises(PathIsNotADirectoryError): - Collection.from_path(empty_file) + CollectionReader.from_path(empty_file) def test_collection_from_path_missing_mandatory_directory(tmp_path: Path): with pytest.raises(MissingMandatoryDirectoryError): - Collection.from_path(tmp_path) + CollectionReader.from_path(tmp_path) def test_collection_from_path(tmp_path_factory: pytest.TempPathFactory): @@ -59,7 +59,7 @@ def test_collection_from_path(tmp_path_factory: pytest.TempPathFactory): }, } generate_collection_at_path(collection_path, dag_service_operations, service_vars) - collection = Collection.from_path(collection_path) + collection = CollectionReader.from_path(collection_path) assert "service_install" in collection.playbooks assert "service_config" in collection.playbooks diff --git a/tests/unit/core/test_collections.py b/tests/unit/core/test_collections.py index 43f11020..235cca6d 100644 --- a/tests/unit/core/test_collections.py +++ b/tests/unit/core/test_collections.py @@ -3,7 +3,7 @@ import pytest -from tdp.core.collection import Collection +from tdp.core.collection import CollectionReader from tdp.core.collections import Collections from tests.conftest import generate_collection_at_path @@ -45,8 +45,8 @@ def test_collections_from_collection_list(tmp_path_factory: pytest.TempPathFacto collection_path_2, dag_service_operations_2, service_vars_2 ) - collection1 = Collection.from_path(collection_path_1) - collection2 = Collection.from_path(collection_path_2) + collection1 = CollectionReader.from_path(collection_path_1) + collection2 = CollectionReader.from_path(collection_path_2) collections = Collections.from_collection_list([collection1, collection2]) assert collections.dag_operations is not None From 80d5386820dfe146743a921bab8896b5dc71abad Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 23 Oct 2024 13:11:28 +0200 Subject: [PATCH 03/14] feat: do not stor dag_nodes in collection --- tdp/core/collection.py | 8 +++----- tdp/core/collections.py | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tdp/core/collection.py b/tdp/core/collection.py index 409289a3..1dda7ca3 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -76,7 +76,6 @@ def __init__( check_collection_structure(self._path) self._inventory_reader = inventory_reader or InventoryReader() - self._dag_nodes = list(read_dag_directory(self.dag_directory)) self._playbooks = read_playbooks_directory( self.playbooks_directory, self.name, @@ -125,10 +124,9 @@ def schema_directory(self) -> Path: """Path to the variables schema directory.""" return self._path / SCHEMA_VARS_DIRECTORY_NAME - @property - def dag_nodes(self) -> list[TDPLibDagNodeModel]: - """List of DAG files in the YAML format.""" - return self._dag_nodes # TODO: should return a generator + def read_dag_nodes(self) -> Generator[TDPLibDagNodeModel, None, None]: + """Read the DAG nodes stored in the dag_directory.""" + return read_dag_directory(self.dag_directory) @property def playbooks(self) -> dict[str, Playbook]: diff --git a/tdp/core/collections.py b/tdp/core/collections.py index c198de14..c0fee2fe 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -101,7 +101,7 @@ def _init_operations( for collection in collections.values(): # Load DAG operations from the dag files - for dag_node in collection.dag_nodes: + for dag_node in collection.read_dag_nodes(): existing_operation = dag_operations.get(dag_node.name) # The read_operation is associated with a playbook defined in the From 39977199f9ca535b3083e28c2b0cbf662510a071 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 23 Oct 2024 13:13:32 +0200 Subject: [PATCH 04/14] feat: do not store schema in collection --- tdp/core/collection.py | 8 +++----- tdp/core/collections.py | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tdp/core/collection.py b/tdp/core/collection.py index 1dda7ca3..fb0b5fde 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -81,7 +81,6 @@ def __init__( self.name, inventory_reader=self._inventory_reader, ) - self._schemas = read_schema_directory(self.default_vars_directory) @staticmethod def from_path(path: PathLike) -> CollectionReader: @@ -133,10 +132,9 @@ def playbooks(self) -> dict[str, Playbook]: """Dictionary of playbooks.""" return self._playbooks - @property - def schemas(self) -> list[ServiceCollectionSchema]: - """List of schemas.""" - return self._schemas + def read_schemas(self) -> list[ServiceCollectionSchema]: + """Read the schemas stored in the schema_directory.""" + return read_schema_directory(self.schema_directory) def get_service_default_vars(self, service_name: str) -> list[tuple[str, Path]]: """Get the default variables for a service. diff --git a/tdp/core/collections.py b/tdp/core/collections.py index c0fee2fe..eba2c144 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -205,7 +205,7 @@ def _init_schemas( ) -> dict[str, ServiceSchema]: schemas: dict[str, ServiceSchema] = {} for collection in collections.values(): - for schema in collection.schemas: + for schema in collection.read_schemas(): schemas.setdefault(schema.service, ServiceSchema()).add_schema(schema) return schemas From 541b0810e6a635549bf6d7f196ff843a0e279ee1 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 23 Oct 2024 15:39:52 +0200 Subject: [PATCH 05/14] feat: do not store playbooks in collection --- tdp/core/collection.py | 17 +++++------- tdp/core/collections.py | 35 +++++++++++++++++------- tdp/core/dag.py | 2 +- tdp/core/deployment/deployment_runner.py | 4 +-- tests/unit/core/test_collection.py | 6 ++-- 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/tdp/core/collection.py b/tdp/core/collection.py index fb0b5fde..e7e70674 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -74,13 +74,7 @@ def __init__( """ self._path = Path(path) check_collection_structure(self._path) - self._inventory_reader = inventory_reader or InventoryReader() - self._playbooks = read_playbooks_directory( - self.playbooks_directory, - self.name, - inventory_reader=self._inventory_reader, - ) @staticmethod def from_path(path: PathLike) -> CollectionReader: @@ -127,10 +121,13 @@ def read_dag_nodes(self) -> Generator[TDPLibDagNodeModel, None, None]: """Read the DAG nodes stored in the dag_directory.""" return read_dag_directory(self.dag_directory) - @property - def playbooks(self) -> dict[str, Playbook]: - """Dictionary of playbooks.""" - return self._playbooks + def read_playbooks(self) -> dict[str, Playbook]: + """Read the playbooks stored in the playbooks_directory.""" + return read_playbooks_directory( + self.playbooks_directory, + self.name, + inventory_reader=self._inventory_reader, + ) def read_schemas(self) -> list[ServiceCollectionSchema]: """Read the schemas stored in the schema_directory.""" diff --git a/tdp/core/collections.py b/tdp/core/collections.py index eba2c144..aec1ccb5 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -18,7 +18,7 @@ from tdp.core.collection import CollectionReader from tdp.core.entities.hostable_entity_name import ServiceComponentName -from tdp.core.entities.operation import Operations +from tdp.core.entities.operation import Operations, Playbook from tdp.core.operation import Operation from tdp.core.variables.schema.service_schema import ServiceSchema @@ -34,6 +34,7 @@ class Collections(Mapping[str, CollectionReader]): def __init__(self, collections: Mapping[str, CollectionReader]): self._collections = collections + self._playbooks = self._init_playbooks(self._collections) self._dag_operations, self._other_operations = self._init_operations( self._collections ) @@ -88,11 +89,32 @@ def operations(self) -> Operations: operations.update(self._other_operations) return operations + @property + def playbooks(self) -> dict[str, Playbook]: + """Mapping of playbook name to Playbook instance.""" + return self._playbooks + @property def schemas(self) -> dict[str, ServiceSchema]: """Mapping of service with their variable schemas.""" return self._schemas + def _init_playbooks( + self, collections: Mapping[str, CollectionReader] + ) -> dict[str, Playbook]: + playbooks = {} + for collection in collections.values(): + playbooks.update(collection.read_playbooks()) + for [playbook_name, playbook] in collection.read_playbooks().items(): + if playbook_name in playbooks: + logger.debug( + f"'{playbook_name}' defined in " + f"'{playbooks[playbook_name].collection_name}' " + f"is overridden by '{collection.name}'" + ) + playbooks[playbook_name] = playbook + return playbooks + def _init_operations( self, collections: Mapping[str, CollectionReader] ) -> tuple[Operations, Operations]: @@ -106,7 +128,7 @@ def _init_operations( # The read_operation is associated with a playbook defined in the # current collection - if playbook := collection.playbooks.get(dag_node.name): + if playbook := self.playbooks.get(dag_node.name): # TODO: would be nice to dissociate the Operation class from the playbook and store the playbook in the Operation dag_operation_to_register = Operation( name=dag_node.name, @@ -119,13 +141,6 @@ def _init_operations( dag_operation_to_register.depends_on.extend( dag_operations[dag_node.name].depends_on ) - # Print a warning if we override a playbook operation - if not existing_operation.noop: - logger.debug( - f"'{dag_node.name}' defined in " - f"'{existing_operation.collection_name}' " - f"is overridden by '{collection.name}'" - ) # Register the operation dag_operations[dag_node.name] = dag_operation_to_register continue @@ -183,7 +198,7 @@ def _init_operations( for collection in collections.values(): # Load playbook operations to complete the operations list with the # operations that are not defined in the DAG files - for operation_name, playbook in collection.playbooks.items(): + for operation_name, playbook in self.playbooks.items(): if operation_name in dag_operations: continue if operation_name in other_operations: diff --git a/tdp/core/dag.py b/tdp/core/dag.py index a0e020dc..84584a00 100644 --- a/tdp/core/dag.py +++ b/tdp/core/dag.py @@ -330,7 +330,7 @@ def warning(collection_name: str, message: str) -> None: # Operations tagged with the noop flag should not have a playbook defined in the collection - if operation_name in collections[operation.collection_name].playbooks: + if operation_name in collections.playbooks: if operation.noop: c_warning( f"Operation '{operation_name}' is noop and the playbook should not exist" diff --git a/tdp/core/deployment/deployment_runner.py b/tdp/core/deployment/deployment_runner.py index f5e778bf..4ff52950 100644 --- a/tdp/core/deployment/deployment_runner.py +++ b/tdp/core/deployment/deployment_runner.py @@ -66,9 +66,7 @@ def _run_operation(self, operation_rec: OperationModel) -> None: return # Execute the operation - playbook_file = ( - self._collections[operation.collection_name].playbooks[operation.name].path - ) + playbook_file = self._collections.playbooks[operation.name].path state, logs = self._executor.execute( playbook=playbook_file, host=operation_rec.host, diff --git a/tests/unit/core/test_collection.py b/tests/unit/core/test_collection.py index f8e88047..f8616d09 100644 --- a/tests/unit/core/test_collection.py +++ b/tests/unit/core/test_collection.py @@ -59,9 +59,9 @@ def test_collection_from_path(tmp_path_factory: pytest.TempPathFactory): }, } generate_collection_at_path(collection_path, dag_service_operations, service_vars) - collection = CollectionReader.from_path(collection_path) - assert "service_install" in collection.playbooks - assert "service_config" in collection.playbooks + playbooks = CollectionReader.from_path(collection_path).read_playbooks() + assert "service_install" in playbooks + assert "service_config" in playbooks def test_read_hosts_from_playbook(tmp_path: Path): From cbdcbb6505393c6552b5a082f25e6f62cbef6733 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 30 Oct 2024 17:53:09 +0100 Subject: [PATCH 06/14] feat: collections is not a mapping of collection anymore --- tdp/cli/commands/default_diff.py | 16 ++-- tdp/cli/params/collections_option.py | 2 +- tdp/core/collection.py | 14 ---- tdp/core/collections.py | 81 +++++++++---------- tdp/core/variables/cluster_variables.py | 7 +- test_dag_order/conftest.py | 2 +- tests/unit/core/conftest.py | 4 +- tests/unit/core/models/test_deployment_log.py | 2 +- tests/unit/core/test_collections.py | 2 +- 9 files changed, 58 insertions(+), 72 deletions(-) diff --git a/tdp/cli/commands/default_diff.py b/tdp/cli/commands/default_diff.py index 5401bd36..d3d783d6 100644 --- a/tdp/cli/commands/default_diff.py +++ b/tdp/cli/commands/default_diff.py @@ -35,7 +35,7 @@ def default_diff(collections: Collections, vars: Path, service: Optional[str] = service_diff(collections, service_variables) -def service_diff(collections, service): +def service_diff(collections: Collections, service): """Computes the difference between the default variables from a service, and the variables from your service variables inside your tdp_vars. Args: @@ -44,12 +44,14 @@ def service_diff(collections, service): """ # key: filename with extension, value: PosixPath(filepath) default_service_vars_paths = OrderedDict() - for collection in collections.values(): - default_vars = collection.get_service_default_vars(service.name) - if not default_vars: - continue - for name, path in default_vars: - default_service_vars_paths.setdefault(name, []).append(path) + for collection_default_vars_dir in collections.default_vars_dirs.values(): + for service_default_vars_dir in collection_default_vars_dir.iterdir(): + if not service_default_vars_dir: + continue + for default_vars_path in service_default_vars_dir.iterdir(): + default_service_vars_paths.setdefault( + default_vars_path.name, [] + ).append(default_vars_path) for ( default_service_vars_filename, diff --git a/tdp/cli/params/collections_option.py b/tdp/cli/params/collections_option.py index e5c18c69..b386a22c 100644 --- a/tdp/cli/params/collections_option.py +++ b/tdp/cli/params/collections_option.py @@ -30,7 +30,7 @@ def _collections_from_paths( raise click.BadParameter("cannot be empty", ctx=ctx, param=param) collections_list = [CollectionReader.from_path(path) for path in value] - collections = Collections.from_collection_list(collections_list) + collections = Collections(collections_list) return collections diff --git a/tdp/core/collection.py b/tdp/core/collection.py index e7e70674..cd27f73e 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -133,20 +133,6 @@ def read_schemas(self) -> list[ServiceCollectionSchema]: """Read the schemas stored in the schema_directory.""" return read_schema_directory(self.schema_directory) - def get_service_default_vars(self, service_name: str) -> list[tuple[str, Path]]: - """Get the default variables for a service. - - Args: - service_name: The name of the service. - - Returns: - A list of tuples (name, path) of the default variables. - """ - service_path = self.default_vars_directory / service_name - if not service_path.exists(): - return [] - return [(path.name, path) for path in service_path.glob("*" + YML_EXTENSION)] - def check_collection_structure(path: Path) -> None: """Check the structure of a collection. diff --git a/tdp/core/collections.py b/tdp/core/collections.py index aec1ccb5..1e97f08c 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -13,45 +13,27 @@ from __future__ import annotations import logging -from collections import OrderedDict -from collections.abc import Mapping, Sequence +from collections.abc import Iterable +from pathlib import Path +from typing import TYPE_CHECKING -from tdp.core.collection import CollectionReader from tdp.core.entities.hostable_entity_name import ServiceComponentName from tdp.core.entities.operation import Operations, Playbook from tdp.core.operation import Operation from tdp.core.variables.schema.service_schema import ServiceSchema -logger = logging.getLogger(__name__) - - -class Collections(Mapping[str, CollectionReader]): - """A mapping of collection name to Collection instance. +if TYPE_CHECKING: + from tdp.core.collection import CollectionReader - This class also gather operations from all collections and filter them by their - presence or not in the DAG. - """ - - def __init__(self, collections: Mapping[str, CollectionReader]): - self._collections = collections - self._playbooks = self._init_playbooks(self._collections) - self._dag_operations, self._other_operations = self._init_operations( - self._collections - ) - self._schemas = self._init_schemas(self._collections) - def __getitem__(self, key): - return self._collections.__getitem__(key) +logger = logging.getLogger(__name__) - def __iter__(self): - return self._collections.__iter__() - def __len__(self): - return self._collections.__len__() +class Collections: + """Concatenation of in use collections.""" - @staticmethod - def from_collection_list(collections: Sequence[CollectionReader]) -> Collections: - """Factory method to build Collections from a sequence of Collection. + def __init__(self, collections: Iterable[CollectionReader]): + """Build Collections from a sequence of Collection. Ordering of the sequence is what will determine the loading order of the operations. An operation can override a previous operation. @@ -60,14 +42,14 @@ def from_collection_list(collections: Sequence[CollectionReader]) -> Collections collections: Ordered Sequence of Collection object. Returns: - A Collections object. - - Raises: - ValueError: If a collection name is duplicated. - """ - return Collections( - OrderedDict((collection.name, collection) for collection in collections) + A Collections object.""" + self._collections = list(collections) + self._playbooks = self._init_playbooks(self._collections) + self._dag_operations, self._other_operations = self._init_operations( + self._collections ) + self._default_var_directories = self._init_default_vars_dirs(self._collections) + self._schemas = self._init_schemas(self._collections) @property def dag_operations(self) -> Operations: @@ -94,16 +76,30 @@ def playbooks(self) -> dict[str, Playbook]: """Mapping of playbook name to Playbook instance.""" return self._playbooks + @property + def default_vars_dirs(self) -> dict[str, Path]: + """Mapping of collection name to their default vars directory.""" + return self._default_var_directories + @property def schemas(self) -> dict[str, ServiceSchema]: """Mapping of service with their variable schemas.""" return self._schemas + def _init_default_vars_dirs( + self, collections: Iterable[CollectionReader] + ) -> dict[str, Path]: + """Mapping of collection name to their default vars directory.""" + default_var_directories = {} + for collection in collections: + default_var_directories[collection.name] = collection.default_vars_directory + return default_var_directories + def _init_playbooks( - self, collections: Mapping[str, CollectionReader] + self, collections: Iterable[CollectionReader] ) -> dict[str, Playbook]: playbooks = {} - for collection in collections.values(): + for collection in collections: playbooks.update(collection.read_playbooks()) for [playbook_name, playbook] in collection.read_playbooks().items(): if playbook_name in playbooks: @@ -116,12 +112,13 @@ def _init_playbooks( return playbooks def _init_operations( - self, collections: Mapping[str, CollectionReader] + self, collections: Iterable[CollectionReader] ) -> tuple[Operations, Operations]: + collections = list(collections) dag_operations = Operations() other_operations = Operations() - for collection in collections.values(): + for collection in collections: # Load DAG operations from the dag files for dag_node in collection.read_dag_nodes(): existing_operation = dag_operations.get(dag_node.name) @@ -195,7 +192,7 @@ def _init_operations( # We can't merge the two for loops to handle the case where a playbook operation # is defined in a first collection but not used in the DAG and then used in # the DAG in a second collection. - for collection in collections.values(): + for collection in collections: # Load playbook operations to complete the operations list with the # operations that are not defined in the DAG files for operation_name, playbook in self.playbooks.items(): @@ -216,10 +213,10 @@ def _init_operations( return dag_operations, other_operations def _init_schemas( - self, collections: Mapping[str, CollectionReader] + self, collections: Iterable[CollectionReader] ) -> dict[str, ServiceSchema]: schemas: dict[str, ServiceSchema] = {} - for collection in collections.values(): + for collection in collections: for schema in collection.read_schemas(): schemas.setdefault(schema.service, ServiceSchema()).add_schema(schema) return schemas diff --git a/tdp/core/variables/cluster_variables.py b/tdp/core/variables/cluster_variables.py index 36cc2f02..59782b11 100644 --- a/tdp/core/variables/cluster_variables.py +++ b/tdp/core/variables/cluster_variables.py @@ -74,8 +74,11 @@ def initialize_cluster_variables( cluster_variables = {} collections_and_overrides = [ - (collection_name, collection.default_vars_directory.iterdir()) - for collection_name, collection in collections.items() + (collection_name, default_var_dir.iterdir()) + for [ + collection_name, + default_var_dir, + ] in collections.default_vars_dirs.items() ] for i, override_folder in enumerate(override_folders): diff --git a/test_dag_order/conftest.py b/test_dag_order/conftest.py index 9da93cd0..5bcb9313 100644 --- a/test_dag_order/conftest.py +++ b/test_dag_order/conftest.py @@ -111,7 +111,7 @@ def collections(request: pytest.FixtureRequest) -> Collections: collections = cast( list["CollectionToTest"], request.config.getoption("collection_paths") ) - return Collections.from_collection_list(collections) + return Collections(collections) @pytest.fixture(scope="session") diff --git a/tests/unit/core/conftest.py b/tests/unit/core/conftest.py index aeb0ac4b..a7e6e72c 100644 --- a/tests/unit/core/conftest.py +++ b/tests/unit/core/conftest.py @@ -59,9 +59,7 @@ def mock_collections(tmp_path_factory: pytest.TempPathFactory) -> Collections: }, } generate_collection_at_path(path=temp_collection_path, dag=mock_dag, vars=mock_vars) - return Collections.from_collection_list( - [CollectionReader.from_path(temp_collection_path)] - ) + return Collections([CollectionReader.from_path(temp_collection_path)]) @pytest.fixture(scope="session") diff --git a/tests/unit/core/models/test_deployment_log.py b/tests/unit/core/models/test_deployment_log.py index e42a62fb..455b6652 100644 --- a/tests/unit/core/models/test_deployment_log.py +++ b/tests/unit/core/models/test_deployment_log.py @@ -164,7 +164,7 @@ def test_multiple_host(self, tmp_path_factory: pytest.TempPathFactory): } generate_collection_at_path(collection_path, dag_service_operations, {}) collection = CollectionReader(collection_path, MockInventoryReader(hosts)) - collections = Collections.from_collection_list([collection]) + collections = Collections([collection]) deployment = DeploymentModel.from_operations( collections, operations_names, hosts diff --git a/tests/unit/core/test_collections.py b/tests/unit/core/test_collections.py index 235cca6d..b62b0785 100644 --- a/tests/unit/core/test_collections.py +++ b/tests/unit/core/test_collections.py @@ -47,7 +47,7 @@ def test_collections_from_collection_list(tmp_path_factory: pytest.TempPathFacto collection1 = CollectionReader.from_path(collection_path_1) collection2 = CollectionReader.from_path(collection_path_2) - collections = Collections.from_collection_list([collection1, collection2]) + collections = Collections([collection1, collection2]) assert collections.dag_operations is not None assert "service1_install" in collections.dag_operations From 3935dd70e5c7c669624f8bf79cde58cbe2a43a89 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 30 Oct 2024 18:33:19 +0100 Subject: [PATCH 07/14] feat: store a hostable_entities in collections hostable_entities is a mapping of service to a set of ServiceComponentName that allow to remove the last getter from the class. --- tdp/cli/commands/vars/edit.py | 2 +- tdp/cli/params/status/component_argument.py | 3 +- tdp/core/collections.py | 42 ++++++++++----------- test_dag_order/helpers.py | 2 +- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/tdp/cli/commands/vars/edit.py b/tdp/cli/commands/vars/edit.py index 331a8f7e..2be7bde1 100644 --- a/tdp/cli/commands/vars/edit.py +++ b/tdp/cli/commands/vars/edit.py @@ -96,7 +96,7 @@ def edit( # Check if component exists entity_name = parse_hostable_entity_name(variables_file.stem) if isinstance(entity_name, ServiceComponentName): - if entity_name not in collections.get_components_from_service(service_name): + if entity_name not in collections.hostable_entities[service_name]: raise click.ClickException( f"Error unknown component '{entity_name.component}' for service '{entity_name.service}'" ) diff --git a/tdp/cli/params/status/component_argument.py b/tdp/cli/params/status/component_argument.py index 04f1a1fc..42dcf150 100644 --- a/tdp/cli/params/status/component_argument.py +++ b/tdp/cli/params/status/component_argument.py @@ -19,8 +19,7 @@ def _check_component( collections: Collections = ctx.params["collections"] service: str = ctx.params["service"] if value and value not in [ - sc_name.component - for sc_name in collections.get_components_from_service(service) + sc_name.component for sc_name in collections.hostable_entities[service] ]: raise click.UsageError( f"Component '{value}' does not exists in service '{service}'." diff --git a/tdp/core/collections.py b/tdp/core/collections.py index 1e97f08c..68cca7bc 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -50,6 +50,7 @@ def __init__(self, collections: Iterable[CollectionReader]): ) self._default_var_directories = self._init_default_vars_dirs(self._collections) self._schemas = self._init_schemas(self._collections) + self._services_components = self._init_hostable_entities(self.operations) @property def dag_operations(self) -> Operations: @@ -86,6 +87,13 @@ def schemas(self) -> dict[str, ServiceSchema]: """Mapping of service with their variable schemas.""" return self._schemas + # ? The mapping is using service name as a string for convenience. Should we keep + # ? this or change it to ServiceName? + @property + def hostable_entities(self) -> dict[str, set[ServiceComponentName]]: + """Mapping of services to their set of components.""" + return self._services_components + def _init_default_vars_dirs( self, collections: Iterable[CollectionReader] ) -> dict[str, Path]: @@ -221,25 +229,15 @@ def _init_schemas( schemas.setdefault(schema.service, ServiceSchema()).add_schema(schema) return schemas - def get_components_from_service( - self, service_name: str - ) -> set[ServiceComponentName]: - """Retrieve the distinct components associated with a specific service. - - This method fetches and returns the unique component names tied to a given - service. The input service is not returned. - - Args: - service_name: The name of the service for which to retrieve associated - components. - - Returns: - A set containing the unique names of components related to the provided - service. - """ - return { - ServiceComponentName(service_name, operation.component_name) - for operation in self.operations.values() - if operation.service_name == service_name - and not operation.is_service_operation() - } + def _init_hostable_entities( + self, operations: Operations + ) -> dict[str, set[ServiceComponentName]]: + services_components: dict[str, set[ServiceComponentName]] = {} + for operation in operations.values(): + service = services_components.setdefault(operation.service_name, set()) + if not operation.component_name: + continue + service.add( + ServiceComponentName(operation.service_name, operation.component_name) + ) + return services_components diff --git a/test_dag_order/helpers.py b/test_dag_order/helpers.py index 6a8d8fe5..7443b482 100644 --- a/test_dag_order/helpers.py +++ b/test_dag_order/helpers.py @@ -187,7 +187,7 @@ def resolve_components( service_component_map: dict[str, str] = {} for service_component in service_components: if isinstance(parse_hostable_entity_name(service_component), ServiceName): - for component in collections.get_components_from_service(service_component): + for component in collections.hostable_entities[service_component]: resolved_components.add(component.name) service_component_map[component.name] = service_component else: From d53347f1756c071f6fcaa23e2c38c31fe67f2587 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 30 Oct 2024 19:06:45 +0100 Subject: [PATCH 08/14] feat: remove CollectionReader instances outside of Collections --- tdp/cli/params/collections_option.py | 6 +----- tdp/core/collection.py | 9 +++++++-- tdp/core/collections.py | 16 ++++++++++++++-- tests/unit/core/conftest.py | 3 +-- tests/unit/core/models/test_deployment_log.py | 6 +++--- tests/unit/core/test_collections.py | 7 +++---- 6 files changed, 29 insertions(+), 18 deletions(-) diff --git a/tdp/cli/params/collections_option.py b/tdp/cli/params/collections_option.py index b386a22c..6b29c3cb 100644 --- a/tdp/cli/params/collections_option.py +++ b/tdp/cli/params/collections_option.py @@ -6,7 +6,6 @@ import click from click.decorators import FC -from tdp.core.collection import CollectionReader from tdp.core.collections import Collections @@ -29,10 +28,7 @@ def _collections_from_paths( if not value: raise click.BadParameter("cannot be empty", ctx=ctx, param=param) - collections_list = [CollectionReader.from_path(path) for path in value] - collections = Collections(collections_list) - - return collections + return Collections.from_collection_paths(value) def collections_option(func: FC) -> FC: diff --git a/tdp/core/collection.py b/tdp/core/collection.py index cd27f73e..672aad3e 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -76,8 +76,12 @@ def __init__( check_collection_structure(self._path) self._inventory_reader = inventory_reader or InventoryReader() + # ? Is this method really useful? @staticmethod - def from_path(path: PathLike) -> CollectionReader: + def from_path( + path: PathLike, + inventory_reader: Optional[InventoryReader] = None, + ) -> CollectionReader: """Factory method to create a collection from a path. Args: @@ -85,7 +89,8 @@ def from_path(path: PathLike) -> CollectionReader: Returns: A collection. """ - return CollectionReader(path=Path(path).expanduser().resolve()) + inventory_reader = inventory_reader or InventoryReader() + return CollectionReader(Path(path).expanduser().resolve(), inventory_reader) @property def name(self) -> str: diff --git a/tdp/core/collections.py b/tdp/core/collections.py index 68cca7bc..b54bfaae 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -15,15 +15,17 @@ import logging from collections.abc import Iterable from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional +from tdp.core.collection import CollectionReader from tdp.core.entities.hostable_entity_name import ServiceComponentName from tdp.core.entities.operation import Operations, Playbook +from tdp.core.inventory_reader import InventoryReader from tdp.core.operation import Operation from tdp.core.variables.schema.service_schema import ServiceSchema if TYPE_CHECKING: - from tdp.core.collection import CollectionReader + from tdp.core.types import PathLike logger = logging.getLogger(__name__) @@ -52,6 +54,16 @@ def __init__(self, collections: Iterable[CollectionReader]): self._schemas = self._init_schemas(self._collections) self._services_components = self._init_hostable_entities(self.operations) + @staticmethod + def from_collection_paths( + paths: Iterable[PathLike], inventory_reader: Optional[InventoryReader] = None + ): + inventory_reader = inventory_reader or InventoryReader() + collection_readers = [ + CollectionReader.from_path(path, inventory_reader) for path in paths + ] + return Collections(collection_readers) + @property def dag_operations(self) -> Operations: """Mapping of operation name that are defined in dag files to their Operation instance.""" diff --git a/tests/unit/core/conftest.py b/tests/unit/core/conftest.py index a7e6e72c..51893936 100644 --- a/tests/unit/core/conftest.py +++ b/tests/unit/core/conftest.py @@ -4,7 +4,6 @@ import pytest from tdp.core.cluster_status import ClusterStatus -from tdp.core.collection import CollectionReader from tdp.core.collections import Collections from tdp.core.dag import Dag from tdp.core.variables import ClusterVariables @@ -59,7 +58,7 @@ def mock_collections(tmp_path_factory: pytest.TempPathFactory) -> Collections: }, } generate_collection_at_path(path=temp_collection_path, dag=mock_dag, vars=mock_vars) - return Collections([CollectionReader.from_path(temp_collection_path)]) + return Collections.from_collection_paths([temp_collection_path]) @pytest.fixture(scope="session") diff --git a/tests/unit/core/models/test_deployment_log.py b/tests/unit/core/models/test_deployment_log.py index 455b6652..79a1ba81 100644 --- a/tests/unit/core/models/test_deployment_log.py +++ b/tests/unit/core/models/test_deployment_log.py @@ -9,7 +9,6 @@ import pytest from sqlalchemy import Engine -from tdp.core.collection import CollectionReader from tdp.core.collections import ( Collections, ) @@ -163,8 +162,9 @@ def test_multiple_host(self, tmp_path_factory: pytest.TempPathFactory): ] } generate_collection_at_path(collection_path, dag_service_operations, {}) - collection = CollectionReader(collection_path, MockInventoryReader(hosts)) - collections = Collections([collection]) + collections = Collections.from_collection_paths( + [collection_path], MockInventoryReader(hosts) + ) deployment = DeploymentModel.from_operations( collections, operations_names, hosts diff --git a/tests/unit/core/test_collections.py b/tests/unit/core/test_collections.py index b62b0785..e3234375 100644 --- a/tests/unit/core/test_collections.py +++ b/tests/unit/core/test_collections.py @@ -3,7 +3,6 @@ import pytest -from tdp.core.collection import CollectionReader from tdp.core.collections import Collections from tests.conftest import generate_collection_at_path @@ -45,9 +44,9 @@ def test_collections_from_collection_list(tmp_path_factory: pytest.TempPathFacto collection_path_2, dag_service_operations_2, service_vars_2 ) - collection1 = CollectionReader.from_path(collection_path_1) - collection2 = CollectionReader.from_path(collection_path_2) - collections = Collections([collection1, collection2]) + collections = Collections.from_collection_paths( + [collection_path_1, collection_path_2] + ) assert collections.dag_operations is not None assert "service1_install" in collections.dag_operations From 2308be20e199f3958ec3da5974018e1536e4300d Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Thu, 7 Nov 2024 11:38:01 +0100 Subject: [PATCH 09/14] fix: import constant from wrong file --- tdp/core/variables/service_variables.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tdp/core/variables/service_variables.py b/tdp/core/variables/service_variables.py index 395d522b..bd6cb286 100644 --- a/tdp/core/variables/service_variables.py +++ b/tdp/core/variables/service_variables.py @@ -10,8 +10,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Optional -from tdp.core.collection import YML_EXTENSION -from tdp.core.constants import SERVICE_NAME_MAX_LENGTH +from tdp.core.constants import SERVICE_NAME_MAX_LENGTH, YML_EXTENSION from tdp.core.types import PathLike from tdp.core.variables.schema.exceptions import SchemaValidationError from tdp.core.variables.variables import ( From e3e7b4391d421589252c9239ff66794121c47dae Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Thu, 7 Nov 2024 11:39:07 +0100 Subject: [PATCH 10/14] refactor: merge CollectionReader methods --- tdp/core/collection.py | 193 ++++++++++------------------- tests/unit/core/test_collection.py | 135 +++++--------------- 2 files changed, 100 insertions(+), 228 deletions(-) diff --git a/tdp/core/collection.py b/tdp/core/collection.py index 672aad3e..b98d9aac 100644 --- a/tdp/core/collection.py +++ b/tdp/core/collection.py @@ -51,6 +51,23 @@ class MissingMandatoryDirectoryError(Exception): pass +class TDPLibDagNodeModel(BaseModel): + """Model for a TDP operation defined in a tdp_lib_dag file.""" + + model_config = ConfigDict(extra="ignore") + + name: str + depends_on: list[str] = [] + + +class TDPLibDagModel(BaseModel): + """Model for a TDP DAG defined in a tdp_lib_dag file.""" + + model_config = ConfigDict(extra="ignore") + + operations: list[TDPLibDagNodeModel] + + class CollectionReader: """An enriched version of an Ansible collection. @@ -73,7 +90,7 @@ def __init__( MissingMandatoryDirectoryError: If the collection does not contain a mandatory directory. """ self._path = Path(path) - check_collection_structure(self._path) + self._check_collection_structure(self._path) self._inventory_reader = inventory_reader or InventoryReader() # ? Is this method really useful? @@ -124,93 +141,63 @@ def schema_directory(self) -> Path: def read_dag_nodes(self) -> Generator[TDPLibDagNodeModel, None, None]: """Read the DAG nodes stored in the dag_directory.""" - return read_dag_directory(self.dag_directory) + for dag_file in (self.dag_directory).glob("*" + YML_EXTENSION): + with dag_file.open("r") as operations_file: + file_content = yaml.load(operations_file, Loader=Loader) + + try: + tdp_lib_dag = TDPLibDagModel(operations=file_content) + for operation in tdp_lib_dag.operations: + yield operation + except ValidationError as e: + logger.error(f"Error while parsing tdp_lib_dag file {dag_file}: {e}") + raise def read_playbooks(self) -> dict[str, Playbook]: """Read the playbooks stored in the playbooks_directory.""" - return read_playbooks_directory( - self.playbooks_directory, - self.name, - inventory_reader=self._inventory_reader, - ) - - def read_schemas(self) -> list[ServiceCollectionSchema]: - """Read the schemas stored in the schema_directory.""" - return read_schema_directory(self.schema_directory) - - -def check_collection_structure(path: Path) -> None: - """Check the structure of a collection. - - Args: - path: Path to the collection. - - Raises: - PathDoesNotExistsError: If the path does not exists. - PathIsNotADirectoryError: If the path is not a directory. - MissingMandatoryDirectoryError: If the collection does not contain a mandatory directory. - """ - if not path.exists(): - raise PathDoesNotExistsError(f"{path} does not exists.") - if not path.is_dir(): - raise PathIsNotADirectoryError(f"{path} is not a directory.") - for mandatory_directory in MANDATORY_DIRECTORIES: - mandatory_path = path / mandatory_directory - if not mandatory_path.exists() or not mandatory_path.is_dir(): - raise MissingMandatoryDirectoryError( - f"{path} does not contain the mandatory directory {mandatory_directory}.", + return { + playbook_path.stem: Playbook( + path=playbook_path, + collection_name=self.name, + hosts=read_hosts_from_playbook(playbook_path, self._inventory_reader), ) + for playbook_path in (self.playbooks_directory).glob("*" + YML_EXTENSION) + } + def read_schemas(self) -> list[ServiceCollectionSchema]: + """Read the schemas stored in the schema_directory. -def read_schema_directory(directory_path: Path) -> list[ServiceCollectionSchema]: - """Read the schemas from a directory. - - This function is meant to be used only once during the initialization of a - collection object. - - Invalid schemas are ignored. - - Args: - directory_path: Path to the schema directory. - - Returns: - Dictionary of schemas. - """ - schemas: list[ServiceCollectionSchema] = [] - for schema_path in (directory_path).glob("*" + JSON_EXTENSION): - try: - schemas.append(ServiceCollectionSchema.from_path(schema_path)) - except InvalidSchemaError as e: - logger.warning(f"{e}. Ignoring schema.") - return schemas - - -def read_playbooks_directory( - directory_path: Path, - collection_name: str, - inventory_reader: Optional[InventoryReader] = None, -) -> dict[str, Playbook]: - """Read the playbooks from a directory. + Invalid schemas are ignored. + """ + schemas: list[ServiceCollectionSchema] = [] + for schema_path in (self.schema_directory).glob("*" + JSON_EXTENSION): + try: + schemas.append(ServiceCollectionSchema.from_path(schema_path)) + except InvalidSchemaError as e: + logger.warning(f"{e}. Ignoring schema.") + return schemas - This function is meant to be used only once during the initialization of a - collection object. + def _check_collection_structure(self, path: Path) -> None: + """Check the structure of a collection. - Args: - directory_path: Path to the playbooks directory. - collection_name: Name of the collection. - inventory_reader: Inventory reader. + Args: + path: Path to the collection. - Returns: - Dictionary of playbooks. - """ - return { - playbook_path.stem: Playbook( - playbook_path, - collection_name, - read_hosts_from_playbook(playbook_path, inventory_reader), - ) - for playbook_path in (directory_path).glob("*" + YML_EXTENSION) - } + Raises: + PathDoesNotExistsError: If the path does not exists. + PathIsNotADirectoryError: If the path is not a directory. + MissingMandatoryDirectoryError: If the collection does not contain a mandatory directory. + """ + if not path.exists(): + raise PathDoesNotExistsError(f"{path} does not exists.") + if not path.is_dir(): + raise PathIsNotADirectoryError(f"{path} is not a directory.") + for mandatory_directory in MANDATORY_DIRECTORIES: + mandatory_path = path / mandatory_directory + if not mandatory_path.exists() or not mandatory_path.is_dir(): + raise MissingMandatoryDirectoryError( + f"{path} does not contain the mandatory directory {mandatory_directory}.", + ) def read_hosts_from_playbook( @@ -232,51 +219,3 @@ def read_hosts_from_playbook( return inventory_reader.get_hosts_from_playbook(fd) except Exception as e: raise ValueError(f"Can't parse playbook {playbook_path}.") from e - - -def read_dag_directory( - directory_path: Path, -) -> Generator[TDPLibDagNodeModel, None, None]: - """Read the DAG files from a directory. - - Args: - directory_path: Path to the DAG directory. - - Returns: - List of DAG nodes. - """ - for dag_file in (directory_path).glob("*" + YML_EXTENSION): - yield from read_dag_file(dag_file) - - -class TDPLibDagNodeModel(BaseModel): - """Model for a TDP operation defined in a tdp_lib_dag file.""" - - model_config = ConfigDict(extra="ignore") - - name: str - depends_on: list[str] = [] - - -class TDPLibDagModel(BaseModel): - """Model for a TDP DAG defined in a tdp_lib_dag file.""" - - model_config = ConfigDict(extra="ignore") - - operations: list[TDPLibDagNodeModel] - - -def read_dag_file( - dag_file_path: Path, -) -> Generator[TDPLibDagNodeModel, None, None]: - """Read a tdp_lib_dag file and return a list of DAG operations.""" - with dag_file_path.open("r") as operations_file: - file_content = yaml.load(operations_file, Loader=Loader) - - try: - tdp_lib_dag = TDPLibDagModel(operations=file_content) - for operation in tdp_lib_dag.operations: - yield operation - except ValidationError as e: - logger.error(f"Error while parsing tdp_lib_dag file {dag_file_path}: {e}") - raise diff --git a/tests/unit/core/test_collection.py b/tests/unit/core/test_collection.py index f8616d09..a1c2ab59 100644 --- a/tests/unit/core/test_collection.py +++ b/tests/unit/core/test_collection.py @@ -11,11 +11,7 @@ MissingMandatoryDirectoryError, PathDoesNotExistsError, PathIsNotADirectoryError, - check_collection_structure, - read_dag_directory, - read_dag_file, read_hosts_from_playbook, - read_playbooks_directory, ) from tdp.core.constants import ( DAG_DIRECTORY_NAME, @@ -28,6 +24,18 @@ ) +@pytest.fixture(scope="session") +def mock_empty_collection_reader(tmp_path_factory: pytest.TempPathFactory) -> Path: + temp_collection_path = tmp_path_factory.mktemp("mock_collection") + for directory in [ + DAG_DIRECTORY_NAME, + DEFAULT_VARS_DIRECTORY_NAME, + PLAYBOOKS_DIRECTORY_NAME, + ]: + (temp_collection_path / directory).mkdir(parents=True, exist_ok=True) + return temp_collection_path + + def test_collection_from_path_does_not_exist(): with pytest.raises(PathDoesNotExistsError): CollectionReader.from_path("foo") @@ -82,14 +90,10 @@ def test_read_hosts_from_playbook(tmp_path: Path): assert hosts == {"host1", "host2"} -def test_init_collection_playbooks(tmp_path: Path): - collection_path = tmp_path / "collection" - playbook_directory = "playbooks" - (playbook_directory_path := collection_path / playbook_directory).mkdir( - parents=True, exist_ok=True - ) - playbook_path_1 = playbook_directory_path / "playbook1.yml" - playbook_path_2 = playbook_directory_path / "playbook2.yml" +def test_collection_reader_read_playbooks(mock_empty_collection_reader: Path): + collection_reader = CollectionReader(mock_empty_collection_reader) + playbook_path_1 = collection_reader.playbooks_directory / "playbook1.yml" + playbook_path_2 = collection_reader.playbooks_directory / "playbook2.yml" playbook_path_1.write_text( """--- - name: Play 1 @@ -108,101 +112,20 @@ def test_init_collection_playbooks(tmp_path: Path): command: echo "Hello, GitHub Copilot!" """ ) - playbooks = read_playbooks_directory(playbook_directory_path, collection_path.name) + playbooks = collection_reader.read_playbooks() assert len(playbooks) == 2 assert "playbook1" in playbooks assert "playbook2" in playbooks assert playbooks["playbook1"].path == playbook_path_1 - assert playbooks["playbook1"].collection_name == collection_path.name + assert playbooks["playbook1"].collection_name == collection_reader.name assert playbooks["playbook2"].path == playbook_path_2 - assert playbooks["playbook2"].collection_name == collection_path.name - - -def test_check_collection_structure_path_does_not_exist(tmp_path: Path): - with pytest.raises(PathDoesNotExistsError): - check_collection_structure(tmp_path / "nonexistent_directory") - - -def test_check_collection_structure_path_is_not_a_directory(tmp_path: Path): - empty_file = tmp_path / "foo" - empty_file.touch() - with pytest.raises(PathIsNotADirectoryError): - check_collection_structure(empty_file) - - -def test_check_collection_structure_missing_mandatory_directory(tmp_path: Path): - with pytest.raises(MissingMandatoryDirectoryError): - check_collection_structure(tmp_path) - - -def test_check_collection_structure_valid_collection(tmp_path: Path): - collection_path = tmp_path / "collection" - for mandatory_directory in ( - DAG_DIRECTORY_NAME, - DEFAULT_VARS_DIRECTORY_NAME, - PLAYBOOKS_DIRECTORY_NAME, - ): - (collection_path / mandatory_directory).mkdir(parents=True, exist_ok=True) - assert check_collection_structure(collection_path) is None - - -def test_read_dag_file(tmp_path: Path): - dag_file_path = tmp_path / "dag_file.yml" - dag_file_path.write_text( - """--- -- name: s1_c1_a - depends_on: - - sx_cx_a -- name: s2_c2_a - depends_on: - - s1_c1_a -- name: s3_c3_a - depends_on: - - sx_cx_a - - sy_cy_a -""" - ) - operations = list(read_dag_file(dag_file_path)) - assert len(operations) == 3 - assert operations[0].name == "s1_c1_a" - assert operations[0].depends_on == ["sx_cx_a"] - assert operations[1].name == "s2_c2_a" - assert operations[1].depends_on == ["s1_c1_a"] - assert operations[2].name == "s3_c3_a" - assert operations[2].depends_on == ["sx_cx_a", "sy_cy_a"] - - -def test_read_dag_file_empty(tmp_path: Path): - dag_file_path = tmp_path / "dag_file.yml" - dag_file_path.write_text("") - with pytest.raises(ValidationError): - list(read_dag_file(dag_file_path)) + assert playbooks["playbook2"].collection_name == collection_reader.name -def test_read_dag_file_with_additional_props(tmp_path: Path): - dag_file_path = tmp_path / "dag_file.yml" - dag_file_path.write_text( - """--- -- name: s1_c1_a - depends_on: - - sx_cx_a - foo: bar -""" - ) - operations = list(read_dag_file(dag_file_path)) - assert len(operations) == 1 - assert operations[0].name == "s1_c1_a" - assert operations[0].depends_on == ["sx_cx_a"] - - -def test_get_collection_dag_nodes(tmp_path: Path): - collection_path = tmp_path / "collection" - dag_directory = "dag" - (dag_directory_path := collection_path / dag_directory).mkdir( - parents=True, exist_ok=True - ) - dag_file_1 = dag_directory_path / "dag1.yml" - dag_file_2 = dag_directory_path / "dag2.yml" +def test_collection_reader_read_dag_nodes(mock_empty_collection_reader: Path): + collection_reader = CollectionReader(mock_empty_collection_reader) + dag_file_1 = collection_reader.dag_directory / "dag1.yml" + dag_file_2 = collection_reader.dag_directory / "dag2.yml" dag_file_1.write_text( """--- - name: s1_c1_a @@ -217,7 +140,7 @@ def test_get_collection_dag_nodes(tmp_path: Path): - s1_c1_a """ ) - dag_nodes = list(read_dag_directory(dag_directory_path)) + dag_nodes = list(collection_reader.read_dag_nodes()) assert len(dag_nodes) == 2 assert any( node.name == "s1_c1_a" and node.depends_on == ["sx_cx_a"] for node in dag_nodes @@ -225,3 +148,13 @@ def test_get_collection_dag_nodes(tmp_path: Path): assert any( node.name == "s2_c2_a" and node.depends_on == ["s1_c1_a"] for node in dag_nodes ) + + +def test_collection_reader_read_dag_nodes_empty_file( + mock_empty_collection_reader: Path, +): + collection_reader = CollectionReader(mock_empty_collection_reader) + dag_file = collection_reader.dag_directory / "dag.yml" + dag_file.write_text("") + with pytest.raises(ValidationError): + list(collection_reader.read_dag_nodes()) From a6c8d61b4e80cda7e5837b6272f67bf1a3839be3 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Thu, 31 Oct 2024 10:30:38 +0100 Subject: [PATCH 11/14] refactor: stop passing collection_readers as an arg to the init methods This doesn't make sense anymore as some of the init methods require other parameter that are not passed as arguments. --- tdp/core/collections.py | 48 ++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/tdp/core/collections.py b/tdp/core/collections.py index b54bfaae..c8bc5db1 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -45,14 +45,13 @@ def __init__(self, collections: Iterable[CollectionReader]): Returns: A Collections object.""" - self._collections = list(collections) - self._playbooks = self._init_playbooks(self._collections) - self._dag_operations, self._other_operations = self._init_operations( - self._collections - ) - self._default_var_directories = self._init_default_vars_dirs(self._collections) - self._schemas = self._init_schemas(self._collections) - self._services_components = self._init_hostable_entities(self.operations) + self._collection_readers = list(collections) + + self._playbooks = self._init_playbooks() + self._dag_operations, self._other_operations = self._init_operations() + self._default_var_directories = self._init_default_vars_dirs() + self._schemas = self._init_schemas() + self._services_components = self._init_hostable_entities() @staticmethod def from_collection_paths( @@ -106,20 +105,16 @@ def hostable_entities(self) -> dict[str, set[ServiceComponentName]]: """Mapping of services to their set of components.""" return self._services_components - def _init_default_vars_dirs( - self, collections: Iterable[CollectionReader] - ) -> dict[str, Path]: + def _init_default_vars_dirs(self) -> dict[str, Path]: """Mapping of collection name to their default vars directory.""" default_var_directories = {} - for collection in collections: + for collection in self._collection_readers: default_var_directories[collection.name] = collection.default_vars_directory return default_var_directories - def _init_playbooks( - self, collections: Iterable[CollectionReader] - ) -> dict[str, Playbook]: + def _init_playbooks(self) -> dict[str, Playbook]: playbooks = {} - for collection in collections: + for collection in self._collection_readers: playbooks.update(collection.read_playbooks()) for [playbook_name, playbook] in collection.read_playbooks().items(): if playbook_name in playbooks: @@ -131,14 +126,11 @@ def _init_playbooks( playbooks[playbook_name] = playbook return playbooks - def _init_operations( - self, collections: Iterable[CollectionReader] - ) -> tuple[Operations, Operations]: - collections = list(collections) + def _init_operations(self) -> tuple[Operations, Operations]: dag_operations = Operations() other_operations = Operations() - for collection in collections: + for collection in self._collection_readers: # Load DAG operations from the dag files for dag_node in collection.read_dag_nodes(): existing_operation = dag_operations.get(dag_node.name) @@ -212,7 +204,7 @@ def _init_operations( # We can't merge the two for loops to handle the case where a playbook operation # is defined in a first collection but not used in the DAG and then used in # the DAG in a second collection. - for collection in collections: + for collection in self._collection_readers: # Load playbook operations to complete the operations list with the # operations that are not defined in the DAG files for operation_name, playbook in self.playbooks.items(): @@ -232,20 +224,16 @@ def _init_operations( return dag_operations, other_operations - def _init_schemas( - self, collections: Iterable[CollectionReader] - ) -> dict[str, ServiceSchema]: + def _init_schemas(self) -> dict[str, ServiceSchema]: schemas: dict[str, ServiceSchema] = {} - for collection in collections: + for collection in self._collection_readers: for schema in collection.read_schemas(): schemas.setdefault(schema.service, ServiceSchema()).add_schema(schema) return schemas - def _init_hostable_entities( - self, operations: Operations - ) -> dict[str, set[ServiceComponentName]]: + def _init_hostable_entities(self) -> dict[str, set[ServiceComponentName]]: services_components: dict[str, set[ServiceComponentName]] = {} - for operation in operations.values(): + for operation in self.operations.values(): service = services_components.setdefault(operation.service_name, set()) if not operation.component_name: continue From f3516e8308b9697d6dfae32f8472d4724a67f67b Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Thu, 7 Nov 2024 11:13:42 +0100 Subject: [PATCH 12/14] refactor: rename collection module to collection_reader --- tdp/core/{collection.py => collection_reader.py} | 0 tdp/core/collections.py | 2 +- test_dag_order/helpers.py | 2 +- .../unit/core/{test_collection.py => test_collection_reader.py} | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename tdp/core/{collection.py => collection_reader.py} (100%) rename tests/unit/core/{test_collection.py => test_collection_reader.py} (99%) diff --git a/tdp/core/collection.py b/tdp/core/collection_reader.py similarity index 100% rename from tdp/core/collection.py rename to tdp/core/collection_reader.py diff --git a/tdp/core/collections.py b/tdp/core/collections.py index c8bc5db1..5b56ecaf 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Optional -from tdp.core.collection import CollectionReader +from tdp.core.collection_reader import CollectionReader from tdp.core.entities.hostable_entity_name import ServiceComponentName from tdp.core.entities.operation import Operations, Playbook from tdp.core.inventory_reader import InventoryReader diff --git a/test_dag_order/helpers.py b/test_dag_order/helpers.py index 7443b482..f94d48d6 100644 --- a/test_dag_order/helpers.py +++ b/test_dag_order/helpers.py @@ -10,7 +10,7 @@ import yaml -from tdp.core.collection import ( +from tdp.core.collection_reader import ( CollectionReader, MissingMandatoryDirectoryError, PathDoesNotExistsError, diff --git a/tests/unit/core/test_collection.py b/tests/unit/core/test_collection_reader.py similarity index 99% rename from tests/unit/core/test_collection.py rename to tests/unit/core/test_collection_reader.py index a1c2ab59..626a92d4 100644 --- a/tests/unit/core/test_collection.py +++ b/tests/unit/core/test_collection_reader.py @@ -6,7 +6,7 @@ import pytest from pydantic import ValidationError -from tdp.core.collection import ( +from tdp.core.collection_reader import ( CollectionReader, MissingMandatoryDirectoryError, PathDoesNotExistsError, From f2a031353804bfd0e9f9baa5fc69f2ffd014c0e5 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Tue, 12 Nov 2024 16:08:11 +0100 Subject: [PATCH 13/14] refactor: move collections related module in a collections package --- tdp/core/collections/__init__.py | 4 ++++ tdp/core/{ => collections}/collection_reader.py | 0 tdp/core/{ => collections}/collections.py | 3 ++- test_dag_order/helpers.py | 4 ++-- tests/unit/core/test_collection_reader.py | 2 +- 5 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 tdp/core/collections/__init__.py rename tdp/core/{ => collections}/collection_reader.py (100%) rename tdp/core/{ => collections}/collections.py (99%) diff --git a/tdp/core/collections/__init__.py b/tdp/core/collections/__init__.py new file mode 100644 index 00000000..953f287c --- /dev/null +++ b/tdp/core/collections/__init__.py @@ -0,0 +1,4 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +from .collections import Collections diff --git a/tdp/core/collection_reader.py b/tdp/core/collections/collection_reader.py similarity index 100% rename from tdp/core/collection_reader.py rename to tdp/core/collections/collection_reader.py diff --git a/tdp/core/collections.py b/tdp/core/collections/collections.py similarity index 99% rename from tdp/core/collections.py rename to tdp/core/collections/collections.py index 5b56ecaf..05024b34 100644 --- a/tdp/core/collections.py +++ b/tdp/core/collections/collections.py @@ -17,13 +17,14 @@ from pathlib import Path from typing import TYPE_CHECKING, Optional -from tdp.core.collection_reader import CollectionReader from tdp.core.entities.hostable_entity_name import ServiceComponentName from tdp.core.entities.operation import Operations, Playbook from tdp.core.inventory_reader import InventoryReader from tdp.core.operation import Operation from tdp.core.variables.schema.service_schema import ServiceSchema +from .collection_reader import CollectionReader + if TYPE_CHECKING: from tdp.core.types import PathLike diff --git a/test_dag_order/helpers.py b/test_dag_order/helpers.py index f94d48d6..f33de363 100644 --- a/test_dag_order/helpers.py +++ b/test_dag_order/helpers.py @@ -10,13 +10,13 @@ import yaml -from tdp.core.collection_reader import ( +from tdp.core.collections import Collections +from tdp.core.collections.collection_reader import ( CollectionReader, MissingMandatoryDirectoryError, PathDoesNotExistsError, PathIsNotADirectoryError, ) -from tdp.core.collections import Collections from tdp.core.constants import YML_EXTENSION from tdp.core.entities.hostable_entity_name import ( ServiceName, diff --git a/tests/unit/core/test_collection_reader.py b/tests/unit/core/test_collection_reader.py index 626a92d4..e5bb63bb 100644 --- a/tests/unit/core/test_collection_reader.py +++ b/tests/unit/core/test_collection_reader.py @@ -6,7 +6,7 @@ import pytest from pydantic import ValidationError -from tdp.core.collection_reader import ( +from tdp.core.collections.collection_reader import ( CollectionReader, MissingMandatoryDirectoryError, PathDoesNotExistsError, From cd3490980c4ed0ff867430af213dccf011063d82 Mon Sep 17 00:00:00 2001 From: Paul Farault Date: Wed, 13 Nov 2024 10:42:01 +0100 Subject: [PATCH 14/14] fix: default diff service check --- tdp/cli/commands/default_diff.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tdp/cli/commands/default_diff.py b/tdp/cli/commands/default_diff.py index d3d783d6..80a1a4b1 100644 --- a/tdp/cli/commands/default_diff.py +++ b/tdp/cli/commands/default_diff.py @@ -45,13 +45,13 @@ def service_diff(collections: Collections, service): # key: filename with extension, value: PosixPath(filepath) default_service_vars_paths = OrderedDict() for collection_default_vars_dir in collections.default_vars_dirs.values(): - for service_default_vars_dir in collection_default_vars_dir.iterdir(): - if not service_default_vars_dir: - continue - for default_vars_path in service_default_vars_dir.iterdir(): - default_service_vars_paths.setdefault( - default_vars_path.name, [] - ).append(default_vars_path) + service_default_vars_dir = collection_default_vars_dir / service.name + if not service_default_vars_dir.exists(): + continue + for default_vars_path in service_default_vars_dir.iterdir(): + default_service_vars_paths.setdefault(default_vars_path.name, []).append( + default_vars_path + ) for ( default_service_vars_filename,