From 3d1809613260671158a3f581a5472dc885865cc0 Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Mon, 27 Jan 2025 18:41:12 +0000 Subject: [PATCH] Remove runtime patterns after run (#4429) * Added method to remove runtime patterns Signed-off-by: Elena Khaustova * Added test for removing patterns Signed-off-by: Elena Khaustova * Updated test_run_twice_giving_same_result test to check mutation Signed-off-by: Elena Khaustova * Updated release notes Signed-off-by: Elena Khaustova --------- Signed-off-by: Elena Khaustova --- .secrets.baseline | 8 ++++++-- RELEASE.md | 2 +- kedro/io/catalog_config_resolver.py | 7 +++++++ kedro/runner/runner.py | 7 +++++++ tests/io/test_kedro_data_catalog.py | 9 +++++++++ tests/runner/test_sequential_runner.py | 4 ++++ 6 files changed, 34 insertions(+), 3 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index 965f48cbac..697179efc5 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -90,6 +90,10 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -211,9 +215,9 @@ "filename": "tests/io/test_kedro_data_catalog.py", "hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70", "is_verified": false, - "line_number": 482 + "line_number": 491 } ] }, - "generated_at": "2025-01-08T12:21:43Z" + "generated_at": "2025-01-20T11:39:50Z" } diff --git a/RELEASE.md b/RELEASE.md index f186c81d3d..c1d23d843c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -16,7 +16,7 @@ * Remove lowercase transformation in regex validation. * Moved `kedro-catalog` JSON schema to `kedro-datasets`. * Updated `Partitioned dataset lazy saving` docs page. - +* Fixed `KedroDataCatalog` mutation after pipeline run. ## Breaking changes to the API ## Documentation changes diff --git a/kedro/io/catalog_config_resolver.py b/kedro/io/catalog_config_resolver.py index d4582d8e25..5004d3b57f 100644 --- a/kedro/io/catalog_config_resolver.py +++ b/kedro/io/catalog_config_resolver.py @@ -332,3 +332,10 @@ def add_runtime_patterns(self, dataset_patterns: Patterns) -> None: """Add new runtime patterns and re-sort them.""" self._runtime_patterns = {**self._runtime_patterns, **dataset_patterns} self._runtime_patterns = self._sort_patterns(self._runtime_patterns) + + def remove_runtime_patterns(self, dataset_patterns: Patterns) -> None: + """Remove runtime patterns and re-sort them.""" + for pattern_name in dataset_patterns: + if pattern_name in self._runtime_patterns: + del self._runtime_patterns[pattern_name] + self._runtime_patterns = self._sort_patterns(self._runtime_patterns) diff --git a/kedro/runner/runner.py b/kedro/runner/runner.py index f882e37249..beb0b73021 100644 --- a/kedro/runner/runner.py +++ b/kedro/runner/runner.py @@ -104,6 +104,7 @@ def run( ) # Register the default dataset pattern with the catalog + # TODO: replace with catalog.config_resolver.add_runtime_patterns() when removing old catalog catalog = catalog.shallow_copy( extra_dataset_patterns=self._extra_dataset_patterns ) @@ -136,6 +137,12 @@ def run( run_output = {ds_name: catalog.load(ds_name) for ds_name in free_outputs} + # Remove runtime patterns after run, so they do not affect further runs + if self._extra_dataset_patterns: + catalog.config_resolver.remove_runtime_patterns( + self._extra_dataset_patterns + ) + return run_output def run_only_missing( diff --git a/tests/io/test_kedro_data_catalog.py b/tests/io/test_kedro_data_catalog.py index e6ffbf88aa..0fac96b8c1 100644 --- a/tests/io/test_kedro_data_catalog.py +++ b/tests/io/test_kedro_data_catalog.py @@ -291,6 +291,15 @@ def test_get_dataset_matching_pattern(self, data_catalog): ds = data_catalog.get_dataset(match_pattern_ds) assert isinstance(ds, MemoryDataset) + def test_remove_runtime_pattern(self, data_catalog): + runtime_pattern = {"{default}": {"type": "MemoryDataset"}} + data_catalog.config_resolver.add_runtime_patterns(runtime_pattern) + match_pattern_ds = "match_pattern_ds" + assert match_pattern_ds in data_catalog + + data_catalog.config_resolver.remove_runtime_patterns(runtime_pattern) + assert match_pattern_ds not in data_catalog + def test_release(self, data_catalog): """Test release is called without errors""" data_catalog.release("test") diff --git a/tests/runner/test_sequential_runner.py b/tests/runner/test_sequential_runner.py index e2424fbec6..8f0dffbbe4 100644 --- a/tests/runner/test_sequential_runner.py +++ b/tests/runner/test_sequential_runner.py @@ -42,9 +42,13 @@ def test_log_not_using_async(self, fan_out_fan_in, catalog, caplog): def test_run_twice_giving_same_result(self, fan_out_fan_in, catalog): catalog.add_feed_dict({"A": 42}) + patterns_before_run = catalog.config_resolver.list_patterns() result_first_run = SequentialRunner().run( fan_out_fan_in, catalog, hook_manager=_create_hook_manager() ) + # Check runtime patterns removed after run, so catalog is not mutated + assert patterns_before_run == catalog.config_resolver.list_patterns() + result_second_run = SequentialRunner().run( fan_out_fan_in, catalog, hook_manager=_create_hook_manager() )