Skip to content

Commit

Permalink
Remove runtime patterns after run (#4429)
Browse files Browse the repository at this point in the history
* Added method to remove runtime patterns

Signed-off-by: Elena Khaustova <[email protected]>

* Added test for removing patterns

Signed-off-by: Elena Khaustova <[email protected]>

* Updated test_run_twice_giving_same_result test to check mutation

Signed-off-by: Elena Khaustova <[email protected]>

* Updated release notes

Signed-off-by: Elena Khaustova <[email protected]>

---------

Signed-off-by: Elena Khaustova <[email protected]>
  • Loading branch information
ElenaKhaustova authored Jan 27, 2025
1 parent a960558 commit 3d18096
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 3 deletions.
8 changes: 6 additions & 2 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_baseline_file",
"filename": ".secrets.baseline"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
Expand Down Expand Up @@ -211,9 +215,9 @@
"filename": "tests/io/test_kedro_data_catalog.py",
"hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70",
"is_verified": false,
"line_number": 482
"line_number": 491
}
]
},
"generated_at": "2025-01-08T12:21:43Z"
"generated_at": "2025-01-20T11:39:50Z"
}
2 changes: 1 addition & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* Remove lowercase transformation in regex validation.
* Moved `kedro-catalog` JSON schema to `kedro-datasets`.
* Updated `Partitioned dataset lazy saving` docs page.

* Fixed `KedroDataCatalog` mutation after pipeline run.

## Breaking changes to the API
## Documentation changes
Expand Down
7 changes: 7 additions & 0 deletions kedro/io/catalog_config_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,10 @@ def add_runtime_patterns(self, dataset_patterns: Patterns) -> None:
"""Add new runtime patterns and re-sort them."""
self._runtime_patterns = {**self._runtime_patterns, **dataset_patterns}
self._runtime_patterns = self._sort_patterns(self._runtime_patterns)

def remove_runtime_patterns(self, dataset_patterns: Patterns) -> None:
"""Remove runtime patterns and re-sort them."""
for pattern_name in dataset_patterns:
if pattern_name in self._runtime_patterns:
del self._runtime_patterns[pattern_name]
self._runtime_patterns = self._sort_patterns(self._runtime_patterns)
7 changes: 7 additions & 0 deletions kedro/runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def run(
)

# Register the default dataset pattern with the catalog
# TODO: replace with catalog.config_resolver.add_runtime_patterns() when removing old catalog
catalog = catalog.shallow_copy(
extra_dataset_patterns=self._extra_dataset_patterns
)
Expand Down Expand Up @@ -136,6 +137,12 @@ def run(

run_output = {ds_name: catalog.load(ds_name) for ds_name in free_outputs}

# Remove runtime patterns after run, so they do not affect further runs
if self._extra_dataset_patterns:
catalog.config_resolver.remove_runtime_patterns(
self._extra_dataset_patterns
)

return run_output

def run_only_missing(
Expand Down
9 changes: 9 additions & 0 deletions tests/io/test_kedro_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,15 @@ def test_get_dataset_matching_pattern(self, data_catalog):
ds = data_catalog.get_dataset(match_pattern_ds)
assert isinstance(ds, MemoryDataset)

def test_remove_runtime_pattern(self, data_catalog):
runtime_pattern = {"{default}": {"type": "MemoryDataset"}}
data_catalog.config_resolver.add_runtime_patterns(runtime_pattern)
match_pattern_ds = "match_pattern_ds"
assert match_pattern_ds in data_catalog

data_catalog.config_resolver.remove_runtime_patterns(runtime_pattern)
assert match_pattern_ds not in data_catalog

def test_release(self, data_catalog):
"""Test release is called without errors"""
data_catalog.release("test")
Expand Down
4 changes: 4 additions & 0 deletions tests/runner/test_sequential_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,13 @@ def test_log_not_using_async(self, fan_out_fan_in, catalog, caplog):

def test_run_twice_giving_same_result(self, fan_out_fan_in, catalog):
catalog.add_feed_dict({"A": 42})
patterns_before_run = catalog.config_resolver.list_patterns()
result_first_run = SequentialRunner().run(
fan_out_fan_in, catalog, hook_manager=_create_hook_manager()
)
# Check runtime patterns removed after run, so catalog is not mutated
assert patterns_before_run == catalog.config_resolver.list_patterns()

result_second_run = SequentialRunner().run(
fan_out_fan_in, catalog, hook_manager=_create_hook_manager()
)
Expand Down

0 comments on commit 3d18096

Please sign in to comment.