Skip to content

Commit

Permalink
refactor: extract parsing logic from collection module
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulFarault committed Oct 22, 2024
1 parent bbf82f3 commit b3c28bb
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 143 deletions.
9 changes: 9 additions & 0 deletions tdp/core/collection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

from .collection import (
Collection,
MissingMandatoryDirectoryError,
PathDoesNotExistsError,
PathIsNotADirectoryError,
)
57 changes: 1 addition & 56 deletions tdp/core/collection.py → tdp/core/collection/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
from pathlib import Path
from typing import Optional

import yaml
from pydantic import BaseModel, ConfigDict, ValidationError

from tdp.core.collection.parse_dag import TDPLibDagNodeModel, parse_dag_directory
from tdp.core.constants import (
DAG_DIRECTORY_NAME,
DEFAULT_VARS_DIRECTORY_NAME,
Expand All @@ -25,11 +23,6 @@
from tdp.core.variables.schema import ServiceCollectionSchema
from tdp.core.variables.schema.exceptions import InvalidSchemaError

try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader

MANDATORY_DIRECTORIES = [
DAG_DIRECTORY_NAME,
DEFAULT_VARS_DIRECTORY_NAME,
Expand Down Expand Up @@ -252,51 +245,3 @@ def read_hosts_from_playbook(
return inventory_reader.get_hosts_from_playbook(fd)
except Exception as e:
raise ValueError(f"Can't parse playbook {playbook_path}.") from e


def parse_dag_directory(
dag_directory_path: Path,
) -> Generator[TDPLibDagNodeModel, None, None]:
"""Get the DAG nodes of a collection.
Args:
dag_directory_path: Path to the DAG directory.
Returns:
List of DAG nodes.
"""
for dag_file in (dag_directory_path).glob("*" + YML_EXTENSION):
yield from parse_dag_file(dag_file)


class TDPLibDagNodeModel(BaseModel):
"""Model for a TDP operation defined in a tdp_lib_dag file."""

model_config = ConfigDict(extra="ignore")

name: str
depends_on: list[str] = []


class TDPLibDagModel(BaseModel):
"""Model for a TDP DAG defined in a tdp_lib_dag file."""

model_config = ConfigDict(extra="ignore")

operations: list[TDPLibDagNodeModel]


def parse_dag_file(
dag_file_path: Path,
) -> Generator[TDPLibDagNodeModel, None, None]:
"""Read a tdp_lib_dag file and return a list of DAG operations."""
with dag_file_path.open("r") as operations_file:
file_content = yaml.load(operations_file, Loader=Loader)

try:
tdp_lib_dag = TDPLibDagModel(operations=file_content)
for operation in tdp_lib_dag.operations:
yield operation
except ValidationError as e:
logger.error(f"Error while parsing tdp_lib_dag file {dag_file_path}: {e}")
raise
70 changes: 70 additions & 0 deletions tdp/core/collection/parse_dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import logging
from collections.abc import Generator
from pathlib import Path

import yaml
from pydantic import BaseModel, ConfigDict, ValidationError

from tdp.core.constants import (
YML_EXTENSION,
)

try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader

logger = logging.getLogger(__name__)


def parse_dag_directory(
dag_directory_path: Path,
) -> Generator[TDPLibDagNodeModel, None, None]:
"""Get the DAG nodes of a collection.
Args:
dag_directory_path: Path to the DAG directory.
Returns:
List of DAG nodes.
"""
for dag_file in (dag_directory_path).glob("*" + YML_EXTENSION):
yield from parse_dag_file(dag_file)


class TDPLibDagNodeModel(BaseModel):
"""Model for a TDP operation defined in a tdp_lib_dag file."""

model_config = ConfigDict(extra="ignore")

name: str
depends_on: list[str] = []


class TDPLibDagModel(BaseModel):
"""Model for a TDP DAG defined in a tdp_lib_dag file."""

model_config = ConfigDict(extra="ignore")

operations: list[TDPLibDagNodeModel]


def parse_dag_file(
dag_file_path: Path,
) -> Generator[TDPLibDagNodeModel, None, None]:
"""Read a tdp_lib_dag file and return a list of DAG operations."""
with dag_file_path.open("r") as operations_file:
file_content = yaml.load(operations_file, Loader=Loader)

try:
tdp_lib_dag = TDPLibDagModel(operations=file_content)
for operation in tdp_lib_dag.operations:
yield operation
except ValidationError as e:
logger.error(f"Error while parsing tdp_lib_dag file {dag_file_path}: {e}")
raise
3 changes: 1 addition & 2 deletions tdp/core/variables/service_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, Optional

from tdp.core.collection import YML_EXTENSION
from tdp.core.constants import SERVICE_NAME_MAX_LENGTH
from tdp.core.constants import SERVICE_NAME_MAX_LENGTH, YML_EXTENSION
from tdp.core.types import PathLike
from tdp.core.variables.schema.exceptions import SchemaValidationError
from tdp.core.variables.variables import (
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/core/collections/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0
90 changes: 90 additions & 0 deletions tests/unit/core/collections/test_parse_dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2022 TOSIT.IO
# SPDX-License-Identifier: Apache-2.0

from pathlib import Path

import pytest
from pydantic import ValidationError

from tdp.core.collection.parse_dag import parse_dag_directory, parse_dag_file


def test_read_dag_file(tmp_path: Path):
dag_file_path = tmp_path / "dag_file.yml"
dag_file_path.write_text(
"""---
- name: s1_c1_a
depends_on:
- sx_cx_a
- name: s2_c2_a
depends_on:
- s1_c1_a
- name: s3_c3_a
depends_on:
- sx_cx_a
- sy_cy_a
"""
)
operations = list(parse_dag_file(dag_file_path))
assert len(operations) == 3
assert operations[0].name == "s1_c1_a"
assert operations[0].depends_on == ["sx_cx_a"]
assert operations[1].name == "s2_c2_a"
assert operations[1].depends_on == ["s1_c1_a"]
assert operations[2].name == "s3_c3_a"
assert operations[2].depends_on == ["sx_cx_a", "sy_cy_a"]


def test_read_dag_file_empty(tmp_path: Path):
dag_file_path = tmp_path / "dag_file.yml"
dag_file_path.write_text("")
with pytest.raises(ValidationError):
list(parse_dag_file(dag_file_path))


def test_read_dag_file_with_additional_props(tmp_path: Path):
dag_file_path = tmp_path / "dag_file.yml"
dag_file_path.write_text(
"""---
- name: s1_c1_a
depends_on:
- sx_cx_a
foo: bar
"""
)
operations = list(parse_dag_file(dag_file_path))
assert len(operations) == 1
assert operations[0].name == "s1_c1_a"
assert operations[0].depends_on == ["sx_cx_a"]


def test_get_collection_dag_nodes(tmp_path: Path):
collection_path = tmp_path / "collection"
dag_directory = "dag"
(dag_directory_path := collection_path / dag_directory).mkdir(
parents=True, exist_ok=True
)
dag_file_1 = dag_directory_path / "dag1.yml"
dag_file_2 = dag_directory_path / "dag2.yml"
dag_file_1.write_text(
"""---
- name: s1_c1_a
depends_on:
- sx_cx_a
"""
)
dag_file_2.write_text(
"""---
- name: s2_c2_a
depends_on:
- s1_c1_a
"""
)
dag_nodes = list(parse_dag_directory(dag_directory_path))
assert len(dag_nodes) == 2
assert any(
node.name == "s1_c1_a" and node.depends_on == ["sx_cx_a"] for node in dag_nodes
)
assert any(
node.name == "s2_c2_a" and node.depends_on == ["s1_c1_a"] for node in dag_nodes
)
86 changes: 1 addition & 85 deletions tests/unit/core/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
from pathlib import Path

import pytest
from pydantic import ValidationError

from tdp.core.collection import (
from tdp.core.collection.collection import (
Collection,
MissingMandatoryDirectoryError,
PathDoesNotExistsError,
PathIsNotADirectoryError,
check_collection_structure,
get_collection_playbooks,
parse_dag_directory,
parse_dag_file,
read_hosts_from_playbook,
)
from tdp.core.constants import (
Expand Down Expand Up @@ -144,84 +141,3 @@ def test_check_collection_structure_valid_collection(tmp_path: Path):
):
(collection_path / mandatory_directory).mkdir(parents=True, exist_ok=True)
assert check_collection_structure(collection_path) is None


def test_read_dag_file(tmp_path: Path):
dag_file_path = tmp_path / "dag_file.yml"
dag_file_path.write_text(
"""---
- name: s1_c1_a
depends_on:
- sx_cx_a
- name: s2_c2_a
depends_on:
- s1_c1_a
- name: s3_c3_a
depends_on:
- sx_cx_a
- sy_cy_a
"""
)
operations = list(parse_dag_file(dag_file_path))
assert len(operations) == 3
assert operations[0].name == "s1_c1_a"
assert operations[0].depends_on == ["sx_cx_a"]
assert operations[1].name == "s2_c2_a"
assert operations[1].depends_on == ["s1_c1_a"]
assert operations[2].name == "s3_c3_a"
assert operations[2].depends_on == ["sx_cx_a", "sy_cy_a"]


def test_read_dag_file_empty(tmp_path: Path):
dag_file_path = tmp_path / "dag_file.yml"
dag_file_path.write_text("")
with pytest.raises(ValidationError):
list(parse_dag_file(dag_file_path))


def test_read_dag_file_with_additional_props(tmp_path: Path):
dag_file_path = tmp_path / "dag_file.yml"
dag_file_path.write_text(
"""---
- name: s1_c1_a
depends_on:
- sx_cx_a
foo: bar
"""
)
operations = list(parse_dag_file(dag_file_path))
assert len(operations) == 1
assert operations[0].name == "s1_c1_a"
assert operations[0].depends_on == ["sx_cx_a"]


def test_get_collection_dag_nodes(tmp_path: Path):
collection_path = tmp_path / "collection"
dag_directory = "dag"
(dag_directory_path := collection_path / dag_directory).mkdir(
parents=True, exist_ok=True
)
dag_file_1 = dag_directory_path / "dag1.yml"
dag_file_2 = dag_directory_path / "dag2.yml"
dag_file_1.write_text(
"""---
- name: s1_c1_a
depends_on:
- sx_cx_a
"""
)
dag_file_2.write_text(
"""---
- name: s2_c2_a
depends_on:
- s1_c1_a
"""
)
dag_nodes = list(parse_dag_directory(dag_directory_path))
assert len(dag_nodes) == 2
assert any(
node.name == "s1_c1_a" and node.depends_on == ["sx_cx_a"] for node in dag_nodes
)
assert any(
node.name == "s2_c2_a" and node.depends_on == ["s1_c1_a"] for node in dag_nodes
)

0 comments on commit b3c28bb

Please sign in to comment.