Remove dataset factory discovery (#1688)

Dataset Factory Pattern discovery was introduced to discover datasets (mostly Tracking datasets used in Experiment Tracking) before we populate kedro viz data repositories Due to this discovery, datasets that users do not have access to, are either timed-out or raise exceptions. This causes Kedro Viz to timeout or fail. This PR removes the dataset factory pattern discovery implementation as a temporary fix. This restricts users from using Dataset Factory Patterns for Experiment Tracking
kedro-org · Dec 19, 2023 · 99b84e4 · 99b84e4
1 parent a25cff0
commit 99b84e4
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 146 deletions.
diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py
@@ -69,40 +69,17 @@ def set_db_session(self, db_session_class: sessionmaker):
         """Set db session on repositories that need it."""
         self.runs.set_db_session(db_session_class)
 
-    def resolve_dataset_factory_patterns(
-        self, catalog: DataCatalog, pipelines: Dict[str, KedroPipeline]
-    ):
-        """Resolve dataset factory patterns in data catalog by matching
-        them against the datasets in the pipelines.
-        """
-        for pipeline in pipelines.values():
-            if hasattr(pipeline, "data_sets"):
-                # Support for Kedro 0.18.x
-                datasets = pipeline.data_sets()
-            else:
-                datasets = pipeline.datasets()
-
-            for dataset_name in datasets:
-                try:
-                    catalog.exists(dataset_name)
-                # pylint: disable=broad-except
-                except Exception as exc:  # pragma: no cover
-                    logger.warning(
-                        "'%s' does not exist. Full exception: %s: %s",
-                        dataset_name,
-                        type(exc).__name__,
-                        exc,
-                    )
-
-    def add_catalog(self, catalog: DataCatalog, pipelines: Dict[str, KedroPipeline]):
+    def add_catalog(self, catalog: DataCatalog):
         """Resolve dataset factory patterns, add the catalog to the CatalogRepository
         and relevant tracking datasets to TrackingDatasetRepository.
 
         Args:
             catalog: The DataCatalog instance to add.
-            pipelines: A dictionary which holds project pipelines
         """
-        self.resolve_dataset_factory_patterns(catalog, pipelines)
+
+        # TODO: Implement dataset factory pattern discovery for
+        # experiment tracking datasets.
+
         self.catalog.set_catalog(catalog)
 
         for dataset_name, dataset in self.catalog.as_dict().items():

diff --git a/package/kedro_viz/server.py b/package/kedro_viz/server.py
@@ -38,7 +38,7 @@ def populate_data(
         session_class = make_db_session_factory(session_store.location)
         data_access_manager.set_db_session(session_class)
 
-    data_access_manager.add_catalog(catalog, pipelines)
+    data_access_manager.add_catalog(catalog)
 
     # add dataset stats before adding pipelines as the data nodes
     # need stats information and they are created during add_pipelines

diff --git a/package/tests/test_api/test_graphql/test_queries.py b/package/tests/test_api/test_graphql/test_queries.py
@@ -68,11 +68,8 @@ def test_run_tracking_data_query(
         client,
         example_tracking_catalog,
         data_access_manager_with_runs,
-        example_pipelines,
     ):
-        data_access_manager_with_runs.add_catalog(
-            example_tracking_catalog, example_pipelines
-        )
+        data_access_manager_with_runs.add_catalog(example_tracking_catalog)
         example_run_id = example_run_ids[0]
 
         response = client.post(
@@ -173,15 +170,9 @@ def test_run_tracking_data_query(
         assert response.json() == expected_response
 
     def test_metrics_data(
-        self,
-        client,
-        example_tracking_catalog,
-        data_access_manager_with_runs,
-        example_pipelines,
+        self, client, example_tracking_catalog, data_access_manager_with_runs
     ):
-        data_access_manager_with_runs.add_catalog(
-            example_tracking_catalog, example_pipelines
-        )
+        data_access_manager_with_runs.add_catalog(example_tracking_catalog)
 
         response = client.post(
             "/graphql",
@@ -295,11 +286,8 @@ def test_graphql_run_tracking_data(
         data_access_manager_with_runs,
         show_diff,
         expected_response,
-        example_pipelines,
     ):
-        data_access_manager_with_runs.add_catalog(
-            example_multiple_run_tracking_catalog, example_pipelines
-        )
+        data_access_manager_with_runs.add_catalog(example_multiple_run_tracking_catalog)
 
         response = client.post(
             "/graphql",
@@ -355,11 +343,9 @@ def test_graphql_run_tracking_data_at_least_one_empty_run(
         data_access_manager_with_runs,
         show_diff,
         expected_response,
-        example_pipelines,
     ):
         data_access_manager_with_runs.add_catalog(
-            example_multiple_run_tracking_catalog_at_least_one_empty_run,
-            example_pipelines,
+            example_multiple_run_tracking_catalog_at_least_one_empty_run
         )
 
         response = client.post(
@@ -393,10 +379,9 @@ def test_graphql_run_tracking_data_all_empty_runs(
         data_access_manager_with_runs,
         show_diff,
         expected_response,
-        example_pipelines,
     ):
         data_access_manager_with_runs.add_catalog(
-            example_multiple_run_tracking_catalog_all_empty_runs, example_pipelines
+            example_multiple_run_tracking_catalog_all_empty_runs
         )
 
         response = client.post(