diff --git a/demo-project/src/demo_project/requirements.in b/demo-project/src/demo_project/requirements.in index dd53d43f3a..d035f9438e 100644 --- a/demo-project/src/demo_project/requirements.in +++ b/demo-project/src/demo_project/requirements.in @@ -6,7 +6,7 @@ jupyter~=1.0 jupyter_client>=5.1, <7.0 jupyterlab~=3.0 kedro~=0.18.0 -kedro-datasets[pandas.CSVDataset,pandas.ExcelDataset, pandas.ParquetDataset, plotly.PlotlyDataset]<=2.0.0 +git+https://github.com/kedro-org/kedro-plugins.git@main#egg=kedro-datasets[pandas.ParquetDataset,pandas.CSVDataset,pandas.ExcelDataset,plotly.JSONDataset]&subdirectory=kedro-datasets # temporary pin until the next release of kedro-datasets nbstripout~=0.4 pytest-cov~=2.5 pytest-mock>=1.7.1, <2.0 diff --git a/demo-project/src/docker_requirements.txt b/demo-project/src/docker_requirements.txt index 1934ea38d1..46b3b18fda 100644 --- a/demo-project/src/docker_requirements.txt +++ b/demo-project/src/docker_requirements.txt @@ -1,5 +1,5 @@ kedro>=0.18.0 -kedro-datasets[pandas.CSVDataset,pandas.ExcelDataset, pandas.ParquetDataset, plotly.PlotlyDataset, matplotlib.MatplotlibWriter]<=2.0.0 +git+https://github.com/kedro-org/kedro-plugins.git@main#egg=kedro-datasets[pandas.ParquetDataset,pandas.CSVDataset,pandas.ExcelDataset,plotly.JSONDataset]&subdirectory=kedro-datasets # temporary pin until the next release of kedro-datasets scikit-learn~=1.0 pillow~=9.0 seaborn~=0.11.2 diff --git a/package/kedro_viz/api/rest/responses.py b/package/kedro_viz/api/rest/responses.py index 885fdd8142..08fb3c76bd 100644 --- a/package/kedro_viz/api/rest/responses.py +++ b/package/kedro_viz/api/rest/responses.py @@ -131,11 +131,9 @@ class Config: class DataNodeMetadataAPIResponse(BaseAPIResponse): filepath: Optional[str] type: str - plot: Optional[Dict] - image: Optional[str] - tracking_data: Optional[Dict] run_command: Optional[str] - preview: Optional[Dict] + preview: Optional[Union[Dict, str]] + preview_type: Optional[str] stats: Optional[Dict] class Config: diff --git a/package/kedro_viz/data_access/repositories/tracking_datasets.py b/package/kedro_viz/data_access/repositories/tracking_datasets.py index e4471c3d2c..d8d06cb9a0 100644 --- a/package/kedro_viz/data_access/repositories/tracking_datasets.py +++ b/package/kedro_viz/data_access/repositories/tracking_datasets.py @@ -8,8 +8,8 @@ TRACKING_DATASET_GROUPS, TrackingDatasetGroup, TrackingDatasetModel, - get_dataset_type, ) +from kedro_viz.models.utils import get_dataset_type if TYPE_CHECKING: try: diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 8e242c09a9..3ffd608bca 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -3,9 +3,7 @@ load data from projects created in a range of Kedro versions. """ # pylint: disable=import-outside-toplevel, protected-access -# pylint: disable=missing-function-docstring -import base64 import json import logging from pathlib import Path @@ -14,24 +12,7 @@ from kedro import __version__ from kedro.framework.session import KedroSession from kedro.framework.session.store import BaseSessionStore - -try: - from kedro_datasets import ( # isort:skip - json as json_dataset, - matplotlib, - plotly, - tracking, - ) -except ImportError: # kedro_datasets is not installed. - from kedro.extras.datasets import ( # Safe since ImportErrors are suppressed within kedro. - json as json_dataset, - matplotlib, - plotly, - tracking, - ) - from kedro.io import DataCatalog -from kedro.io.core import get_filepath_str from kedro.pipeline import Pipeline logger = logging.getLogger(__name__) @@ -129,50 +110,3 @@ def load_data( stats_dict = _get_dataset_stats(project_path) return catalog, pipelines_dict, session_store, stats_dict - - -# Try to access the attribute to trigger the import of dependencies, only modify the _load -# if dependencies are installed. -# These datasets do not have _load methods defined (tracking and matplotlib) or do not -# load to json (plotly), hence the need to define _load here. -try: - getattr(matplotlib, "MatplotlibWriter") # Trigger the lazy import - - def matplotlib_writer_load(dataset: matplotlib.MatplotlibWriter) -> str: - load_path = get_filepath_str(dataset._get_load_path(), dataset._protocol) - with dataset._fs.open(load_path, mode="rb") as img_file: - base64_bytes = base64.b64encode(img_file.read()) - return base64_bytes.decode("utf-8") - - matplotlib.MatplotlibWriter._load = matplotlib_writer_load -except (ImportError, AttributeError): - pass - -try: - getattr(plotly, "JSONDataset") # Trigger import - plotly.JSONDataset._load = json_dataset.JSONDataset._load -except (ImportError, AttributeError): - getattr(plotly, "JSONDataSet") # Trigger import - plotly.JSONDataSet._load = json_dataset.JSONDataSet._load - - -try: - getattr(plotly, "PlotlyDataset") # Trigger import - plotly.PlotlyDataset._load = json_dataset.JSONDataset._load -except (ImportError, AttributeError): - getattr(plotly, "PlotlyDataSet") # Trigger import - plotly.PlotlyDataSet._load = json_dataset.JSONDataSet._load - -try: - getattr(tracking, "JSONDataset") # Trigger import - tracking.JSONDataset._load = json_dataset.JSONDataset._load -except (ImportError, AttributeError): - getattr(tracking, "JSONDataSet") # Trigger import - tracking.JSONDataSet._load = json_dataset.JSONDataSet._load - -try: - getattr(tracking, "MetricsDataset") # Trigger import - tracking.MetricsDataset._load = json_dataset.JSONDataset._load -except (ImportError, AttributeError): - getattr(tracking, "MetricsDataSet") # Trigger import - tracking.MetricsDataSet._load = json_dataset.JSONDataSet._load diff --git a/package/kedro_viz/models/experiment_tracking.py b/package/kedro_viz/models/experiment_tracking.py index 96e61a586e..7512984621 100644 --- a/package/kedro_viz/models/experiment_tracking.py +++ b/package/kedro_viz/models/experiment_tracking.py @@ -63,7 +63,7 @@ class TrackingDatasetGroup(str, Enum): JSON = "json" -# Map dataset types (as produced by get_dataset_type) to their group +# Map dataset types to their group TRACKING_DATASET_GROUPS = { "plotly.plotly_dataset.PlotlyDataset": TrackingDatasetGroup.PLOT, "plotly.json_dataset.JSONDataset": TrackingDatasetGroup.PLOT, @@ -110,9 +110,11 @@ def load_tracking_data(self, run_id: str): try: if TRACKING_DATASET_GROUPS[self.dataset_type] is TrackingDatasetGroup.PLOT: - self.runs[run_id] = {self.dataset._filepath.name: self.dataset.load()} + self.runs[run_id] = { + self.dataset._filepath.name: self.dataset.preview() # type: ignore + } else: - self.runs[run_id] = self.dataset.load() + self.runs[run_id] = self.dataset.preview() # type: ignore except Exception as exc: # pylint: disable=broad-except # pragma: no cover logger.warning( "'%s' with version '%s' could not be loaded. Full exception: %s: %s", diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py index 4212b727c2..8e1f04fa4e 100644 --- a/package/kedro_viz/models/flowchart.py +++ b/package/kedro_viz/models/flowchart.py @@ -1,4 +1,5 @@ """`kedro_viz.models.flowchart` defines data models to represent Kedro entities in a viz graph.""" + # pylint: disable=protected-access, missing-function-docstring import abc import hashlib @@ -586,52 +587,16 @@ def set_viz_metadata(cls, _, values): return None - # TODO: improve this scheme. - def is_plot_node(self): - """Check if the current node is a plot node. - Currently it only recognises one underlying dataset as a plot node. - In the future, we might want to make this generic. - """ - return self.dataset_type in ( - "plotly.plotly_dataset.PlotlyDataset", - "plotly.json_dataset.JSONDataset", - "plotly.plotly_dataset.PlotlyDataSet", - "plotly.json_dataset.JSONDataSet", - ) - - def is_image_node(self): - """Check if the current node is a matplotlib image node.""" - return self.dataset_type == "matplotlib.matplotlib_writer.MatplotlibWriter" - - def is_metric_node(self): - """Check if the current node is a metrics node.""" - return self.dataset_type in ( - "tracking.metrics_dataset.MetricsDataset", - "tracking.metrics_dataset.MetricsDataSet", - ) - - def is_json_node(self): - """Check if the current node is a JSONDataset node.""" - return self.dataset_type in ( - "tracking.json_dataset.JSONDataset", - "tracking.json_dataset.JSONDataSet", - ) - - def is_tracking_node(self): - """Checks if the current node is a tracking data node""" - return self.is_json_node() or self.is_metric_node() - - def is_preview_node(self): - """Checks if the current node has a preview""" - if not (self.viz_metadata and self.viz_metadata.get("preview_args", None)): - return False - - return True - def get_preview_args(self): """Gets the preview arguments for a dataset""" return self.viz_metadata.get("preview_args", None) + def is_preview_disabled(self): + """Checks if the dataset has a preview disabled""" + return ( + self.viz_metadata is not None and self.viz_metadata.get("preview") is False + ) + class TranscodedDataNode(GraphNode): """Represent a graph node of type data @@ -718,24 +683,15 @@ class DataNodeMetadata(GraphNodeMetadata): # The path to the actual data file for the underlying dataset filepath: Optional[str] - plot: Optional[Dict] = Field( - None, description="The optional plot data if the underlying dataset has a plot" - ) - - # The image data if the underlying dataset has a image - # currently only applicable for matplotlib.MatplotlibWriter - image: Optional[str] = Field( - None, description="The image data if the underlying dataset has a image" - ) - tracking_data: Optional[Dict] = Field( - None, - description="The tracking data if the underlying dataset has a tracking dataset", - ) run_command: Optional[str] = Field( None, description="Command to run the pipeline to this node" ) - preview: Optional[Dict] = Field( - None, description="Preview data for the underlying datanode" + preview: Optional[Union[Dict, str]] = Field( + None, description="Preview data for the underlying data node" + ) + + preview_type: Optional[str] = Field( + None, description="Type of preview for the dataset" ) stats: Optional[Dict] = Field(None, description="The statistics for the data node.") @@ -769,35 +725,39 @@ def set_run_command(cls, _): return f"kedro run --to-outputs={cls.data_node.name}" return None - @validator("plot", always=True) - def set_plot(cls, _): - if cls.data_node.is_plot_node(): - return cls.data_node.kedro_obj.load() - return None + @validator("preview", always=True) + def set_preview(cls, _): + if cls.data_node.is_preview_disabled() or not hasattr(cls.dataset, "preview"): + return None - @validator("image", always=True) - def set_image(cls, _): - if cls.data_node.is_image_node(): - return cls.data_node.kedro_obj.load() - return None + try: + preview_args = ( + cls.data_node.get_preview_args() if cls.data_node.viz_metadata else None + ) + if preview_args is None: + return cls.dataset.preview() + return cls.dataset.preview(**preview_args) - @validator("tracking_data", always=True) - def set_tracking_data(cls, _): - if cls.data_node.is_tracking_node(): - return cls.data_node.kedro_obj.load() - return None + except Exception as exc: # pylint: disable=broad-except + logger.warning( + "'%s' could not be previewed. Full exception: %s: %s", + cls.data_node.name, + type(exc).__name__, + exc, + ) + return None - @validator("preview", always=True) - def set_preview(cls, _): - if not (cls.data_node.is_preview_node() and hasattr(cls.dataset, "_preview")): + @validator("preview_type", always=True) + def set_preview_type(cls, _): + if cls.data_node.is_preview_disabled() or not hasattr(cls.dataset, "preview"): return None try: - return cls.dataset._preview(**cls.data_node.get_preview_args()) + return inspect.signature(cls.dataset.preview).return_annotation.__name__ except Exception as exc: # pylint: disable=broad-except # pragma: no cover logger.warning( - "'%s' could not be previewed. Full exception: %s: %s", + "'%s' did not have preview type. Full exception: %s: %s", cls.data_node.name, type(exc).__name__, exc, diff --git a/package/test_requirements.txt b/package/test_requirements.txt index 6db009dbeb..5fa8487c9b 100644 --- a/package/test_requirements.txt +++ b/package/test_requirements.txt @@ -1,7 +1,7 @@ -r requirements.txt kedro >=0.18.0 -kedro-datasets[pandas.ParquetDataset, pandas.CSVDataset, pandas.ExcelDataset, plotly.JSONDataset]<=2.0.0 +git+https://github.com/kedro-org/kedro-plugins.git@main#egg=kedro-datasets[pandas.ParquetDataset,pandas.CSVDataset,pandas.ExcelDataset,plotly.JSONDataset]&subdirectory=kedro-datasets # temporary pin until the next release of kedro-datasets kedro-telemetry>=0.1.1 # for testing telemetry integration bandit~=1.7 behave~=1.2 diff --git a/package/tests/conftest.py b/package/tests/conftest.py index 2a7e793ea7..e788bfc389 100644 --- a/package/tests/conftest.py +++ b/package/tests/conftest.py @@ -267,11 +267,17 @@ def json(self): @pytest.fixture def example_data_frame(): data = { - "id": ["35029", "30292"], - "company_rating": ["100%", "67%"], - "company_location": ["Niue", "Anguilla"], - "total_fleet_count": ["4.0", "6.0"], - "iata_approved": ["f", "f"], + "id": ["35029", "30292", "12345", "67890", "54321", "98765", "11111"], + "company_rating": ["100%", "67%", "80%", "95%", "72%", "88%", "75%"], + "company_location": [ + "Niue", + "Anguilla", + "Barbados", + "Fiji", + "Grenada", + "Jamaica", + "Trinidad and Tobago", + ], } yield pd.DataFrame(data) @@ -292,10 +298,32 @@ def example_csv_dataset(tmp_path, example_data_frame): @pytest.fixture -def example_data_node(): +def example_csv_filepath(tmp_path, example_data_frame): + csv_file_path = tmp_path / "temporary_test_data.csv" + example_data_frame.to_csv(csv_file_path, index=False) + yield csv_file_path + + +@pytest.fixture +def example_data_node(example_csv_filepath): dataset_name = "uk.data_science.model_training.dataset" metadata = {"kedro-viz": {"preview_args": {"nrows": 3}}} - kedro_dataset = CSVDataset(filepath="test.csv", metadata=metadata) + kedro_dataset = CSVDataset(filepath=example_csv_filepath, metadata=metadata) + data_node = GraphNode.create_data_node( + dataset_name=dataset_name, + layer="raw", + tags=set(), + dataset=kedro_dataset, + stats={"rows": 10, "columns": 5, "file_size": 1024}, + ) + + yield data_node + + +@pytest.fixture +def example_data_node_without_viz_metadata(example_csv_filepath): + dataset_name = "uk.data_science.model_training.dataset" + kedro_dataset = CSVDataset(filepath=example_csv_filepath) data_node = GraphNode.create_data_node( dataset_name=dataset_name, layer="raw", diff --git a/package/tests/test_api/test_apps.py b/package/tests/test_api/test_apps.py index ac64bb5318..310c2b679f 100644 --- a/package/tests/test_api/test_apps.py +++ b/package/tests/test_api/test_apps.py @@ -82,7 +82,11 @@ class TestNodeMetadataEndpoint: ( "13399a82", 200, - {"filepath": "raw_data.csv", "type": "pandas.csv_dataset.CSVDataset"}, + { + "filepath": "raw_data.csv", + "preview_type": "TablePreview", + "type": "pandas.csv_dataset.CSVDataset", + }, ), ], ) diff --git a/package/tests/test_api/test_rest/test_responses.py b/package/tests/test_api/test_rest/test_responses.py index d1ea4794ad..68c655847d 100644 --- a/package/tests/test_api/test_rest/test_responses.py +++ b/package/tests/test_api/test_rest/test_responses.py @@ -630,6 +630,7 @@ def test_data_node_metadata(self, client): assert response.json() == { "filepath": "model_inputs.csv", "type": "pandas.csv_dataset.CSVDataset", + "preview_type": "TablePreview", "run_command": "kedro run --to-outputs=model_inputs", "stats": {"columns": 12, "rows": 29768}, } @@ -638,6 +639,7 @@ def test_data_node_metadata_for_free_input(self, client): response = client.get("/api/nodes/13399a82") assert response.json() == { "filepath": "raw_data.csv", + "preview_type": "TablePreview", "type": "pandas.csv_dataset.CSVDataset", } diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_flowchart.py index fb40ccb47f..87cc505701 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_flowchart.py @@ -1,5 +1,3 @@ -# pylint: disable=too-many-public-methods -import base64 from functools import partial from pathlib import Path from textwrap import dedent @@ -125,11 +123,6 @@ def test_create_data_node(self, dataset_name, expected_modular_pipelines): assert data_node.stats["rows"] == 10 assert data_node.stats["columns"] == 5 assert data_node.stats["file_size"] == 1024 - assert not data_node.is_plot_node() - assert not data_node.is_metric_node() - assert not data_node.is_image_node() - assert not data_node.is_json_node() - assert not data_node.is_tracking_node() @pytest.mark.parametrize( "transcoded_dataset_name, original_name", @@ -379,76 +372,82 @@ def test_data_node_metadata(self): ) data_node_metadata = DataNodeMetadata(data_node=data_node) assert data_node_metadata.type == "pandas.csv_dataset.CSVDataset" + assert data_node_metadata.preview_type == "TablePreview" assert data_node_metadata.filepath == "/tmp/dataset.csv" assert data_node_metadata.run_command == "kedro run --to-outputs=dataset" assert data_node_metadata.stats["rows"] == 10 assert data_node_metadata.stats["columns"] == 2 - def test_preview_args_not_exist(self): - metadata = {"kedro-viz": {"something": 3}} + def test_get_preview_args(self): + metadata = {"kedro-viz": {"preview_args": {"nrows": 3}}} dataset = CSVDataset(filepath="test.csv", metadata=metadata) data_node = GraphNode.create_data_node( dataset_name="dataset", tags=set(), layer=None, dataset=dataset, stats=None ) - assert not data_node.is_preview_node() + assert data_node.get_preview_args() == {"nrows": 3} - def test_get_preview_args(self): - metadata = {"kedro-viz": {"preview_args": {"nrows": 3}}} + def test_is_preview_disabled(self): + metadata = {"kedro-viz": {"preview": False}} dataset = CSVDataset(filepath="test.csv", metadata=metadata) data_node = GraphNode.create_data_node( dataset_name="dataset", tags=set(), layer=None, dataset=dataset, stats=None ) - assert data_node.is_preview_node() - assert data_node.get_preview_args() == {"nrows": 3} + assert data_node.is_preview_disabled() is True - def test_preview_data_node_metadata(self, example_data_node, mocker): - mock_preview_data = { + def test_preview_data_node_metadata(self, example_data_node): + expected_preview_data = { "columns": ["id", "company_rating", "company_location"], "index": [0, 1, 2], "data": [ - [1, "90%", "London"], - [2, "80%", "Paris"], - [3, "40%", "Milan"], + [35029, "100%", "Niue"], + [30292, "67%", "Anguilla"], + [12345, "80%", "Barbados"], ], } + preview_node_metadata = DataNodeMetadata(data_node=example_data_node) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=True - ) - mocker.patch( - "kedro_datasets.pandas.CSVDataset._preview", return_value=mock_preview_data + assert preview_node_metadata.preview == expected_preview_data + + def test_preview_data_node_metadata_exception(self, caplog): + empty_dataset = CSVDataset(filepath="temp.csv") + dataset_name = "dataset" + empty_data_node = GraphNode.create_data_node( + dataset_name=dataset_name, + tags=set(), + layer=None, + dataset=empty_dataset, + stats=None, ) - preview_node_metadata = DataNodeMetadata(data_node=example_data_node) + DataNodeMetadata(data_node=empty_data_node) - assert preview_node_metadata.preview == mock_preview_data + assert f" '{dataset_name}' could not be previewed" in caplog.text - def test_preview_data_node_metadata_not_exist(self, example_data_node, mocker): - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=True + def test_preview_default_data_node_metadata( + self, example_data_node_without_viz_metadata + ): + expected_preview_data = { + "columns": ["id", "company_rating", "company_location"], + "index": [0, 1, 2, 3, 4], + "data": [ + [35029, "100%", "Niue"], + [30292, "67%", "Anguilla"], + [12345, "80%", "Barbados"], + [67890, "95%", "Fiji"], + [54321, "72%", "Grenada"], + ], + } + preview_node_metadata = DataNodeMetadata( + data_node=example_data_node_without_viz_metadata ) - mocker.patch("kedro_datasets.pandas.CSVDataset._preview", return_value=False) + + assert preview_node_metadata.preview == expected_preview_data + + def test_preview_data_node_metadata_not_exist(self, example_data_node, mocker): + mocker.patch("kedro_datasets.pandas.CSVDataset.preview", return_value=False) preview_node_metadata = DataNodeMetadata(data_node=example_data_node) - assert preview_node_metadata.plot is None + assert preview_node_metadata.preview is False def test_transcoded_data_node_metadata(self): dataset = CSVDataset(filepath="/tmp/dataset.csv") @@ -489,171 +488,6 @@ def test_partitioned_data_node_metadata(self): data_node_metadata = DataNodeMetadata(data_node=data_node) assert data_node_metadata.filepath == "partitioned/" - # TODO: these test should ideally use a "real" catalog entry to create actual rather - # than mock DataNode. Or if the loading functionality is tested elsewhere, - # perhaps they are not needed at all. At the moment they don't actually test much. - def test_plotly_data_node_metadata(self, example_data_node, mocker): - mock_plot_data = { - "data": [ - { - "x": ["giraffes", "orangutans", "monkeys"], - "y": [20, 14, 23], - "type": "bar", - } - ] - } - - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=True - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=False - ) - mocker.patch("kedro.io.core.AbstractDataset.load", return_value=mock_plot_data) - - plotly_node_metadata = DataNodeMetadata(data_node=example_data_node) - assert plotly_node_metadata.plot == mock_plot_data - - def test_plotly_data_node_dataset_not_exist(self, example_data_node, mocker): - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=True - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=False - ) - mocker.patch("kedro.io.core.AbstractDataset.load", return_value=None) - - plotly_node_metadata = DataNodeMetadata(data_node=example_data_node) - assert plotly_node_metadata.plot is None - - # @patch("base64.b64encode") - def test_image_data_node_metadata(self, example_data_node, mocker): - mock_image_data = base64.b64encode( - b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAA" - b"AAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=" - ) - - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=True - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=False - ) - mocker.patch("kedro.io.core.AbstractDataset.load", return_value=mock_image_data) - - image_node_metadata = DataNodeMetadata(data_node=example_data_node) - assert image_node_metadata.image == mock_image_data - - def test_image_data_node_dataset_not_exist(self, example_data_node, mocker): - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=True - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=False - ) - mocker.patch("kedro.io.core.AbstractDataset.load", return_value=None) - - image_node_metadata = DataNodeMetadata(data_node=example_data_node) - assert image_node_metadata.image is None - - def test_tracking_data_node_metadata_exist(self, example_data_node, mocker): - mock_tracking_data = { - "recommendations": "test string", - "recommended_controls": False, - "projected_optimization": 0.0013902, - } - - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=True - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_metric_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=False - ) - mocker.patch( - "kedro.io.core.AbstractDataset.load", return_value=mock_tracking_data - ) - - tracking_node_metadata = DataNodeMetadata(data_node=example_data_node) - - assert tracking_node_metadata.tracking_data == mock_tracking_data - assert tracking_node_metadata.plot is None - - def test_tracking_data_node_metadata_not_exist(self, example_data_node, mocker): - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_tracking_node", return_value=True - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_metric_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=False - ) - mocker.patch("kedro.io.core.AbstractDataset.load", return_value=None) - - tracking_node_metadata = DataNodeMetadata(data_node=example_data_node) - - assert tracking_node_metadata.tracking_data is None - assert tracking_node_metadata.plot is None - - def test_metrics_data_node_metadata_dataset_not_exist( - self, example_data_node, mocker - ): - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_metric_node", return_value=True - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_image_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_plot_node", return_value=False - ) - mocker.patch( - "kedro_viz.models.flowchart.DataNode.is_preview_node", return_value=False - ) - mocker.patch("kedro.io.core.AbstractDataset.load", return_value=None) - - metrics_node_metadata = DataNodeMetadata(data_node=example_data_node) - assert metrics_node_metadata.plot is None - def test_parameters_metadata_all_parameters(self): parameters = {"test_split_ratio": 0.3, "num_epochs": 1000} parameters_dataset = MemoryDataset(data=parameters) diff --git a/src/components/metadata-modal/metadata-modal.js b/src/components/metadata-modal/metadata-modal.js index 747616ff16..56f8f6e561 100644 --- a/src/components/metadata-modal/metadata-modal.js +++ b/src/components/metadata-modal/metadata-modal.js @@ -11,11 +11,11 @@ import { getClickedNodeMetaData } from '../../selectors/metadata'; import './metadata-modal.scss'; const MetadataModal = ({ metadata, onToggle, visible }) => { - const hasPlot = Boolean(metadata?.plot); - const hasImage = Boolean(metadata?.image); - const hasPreview = Boolean(metadata?.preview); + const hasPlot = metadata?.previewType === 'PlotlyPreview'; + const hasImage = metadata?.previewType === 'ImagePreview'; + const hasTable = metadata?.previewType === 'TablePreview'; - if (!visible.metadataModal || (!hasPlot && !hasImage && !hasPreview)) { + if (!visible.metadataModal || (!hasPlot && !hasImage && !hasTable)) { return null; } @@ -44,16 +44,16 @@ const MetadataModal = ({ metadata, onToggle, visible }) => { {metadata.name} - {hasPreview && ( + {hasTable && (
- Previewing first {metadata.preview.data.length} rows + Previewing first {metadata.preview.length} rows
)} {hasPlot && ( )} @@ -63,17 +63,17 @@ const MetadataModal = ({ metadata, onToggle, visible }) => { Matplotlib rendering )} - {hasPreview && ( + {hasTable && (
)} - {!hasPreview && ( + {!hasTable && (
@@ -277,7 +290,7 @@ const MetaData = ({ Matplotlib rendering )} - {hasPreviewData && ( + {hasTable && ( <>
state.node.outputs, (state) => state.node.code, (state) => state.node.parameters, - (state) => state.node.plot, - (state) => state.node.image, - (state) => state.node.trackingData, (state) => state.node.datasetType, (state) => state.node.originalType, (state) => state.node.transcodedTypes, (state) => state.node.runCommand, (state) => state.node.preview, + (state) => state.node.previewType, (state) => state.node.stats, (state) => state.isPrettyName, ], @@ -59,27 +57,18 @@ export const getClickedNodeMetaData = createSelector( nodeOutputs, nodeCodes, nodeParameters, - nodePlot, - nodeImage, - nodeTrackingData, nodeDatasetTypes, nodeOriginalTypes, nodeTranscodedTypes, nodeRunCommand, preview, + previewType, stats, isPrettyName ) => { if (!nodeId || Object.keys(nodeType).length === 0) { return null; } - //rounding of tracking data - nodeTrackingData[nodeId] && - Object.entries(nodeTrackingData[nodeId]).forEach(([key, value]) => { - if (typeof value === 'number') { - nodeTrackingData[nodeId][key] = Math.round(value * 100) / 100; - } - }); const metadata = { id: nodeId, @@ -95,9 +84,6 @@ export const getClickedNodeMetaData = createSelector( runCommand: nodeRunCommand[nodeId], code: nodeCodes[nodeId], filepath: nodeFilepaths[nodeId], - plot: nodePlot[nodeId], - image: nodeImage[nodeId], - trackingData: nodeTrackingData[nodeId], datasetType: nodeDatasetTypes[nodeId], originalType: nodeOriginalTypes[nodeId], transcodedTypes: nodeTranscodedTypes[nodeId], @@ -116,6 +102,7 @@ export const getClickedNodeMetaData = createSelector( : nodeOutputs[nodeId] && nodeOutputs[nodeId].map((nodeOutput) => stripNamespace(nodeOutput)), preview: preview && preview[nodeId], + previewType: previewType && previewType[nodeId], stats: stats && stats[nodeId], }; diff --git a/src/store/normalize-data.js b/src/store/normalize-data.js index d5908b163c..1546526930 100644 --- a/src/store/normalize-data.js +++ b/src/store/normalize-data.js @@ -151,8 +151,6 @@ const addNode = (state) => (node) => { state.node.code[id] = node.code; state.node.parameters[id] = node.parameters; state.node.filepath[id] = node.filepath; - state.node.plot[id] = node.plot; - state.node.image[id] = node.image; state.node.datasetType[id] = node.dataset_type; state.node.originalType[id] = node.original_type; state.node.transcodedTypes[id] = node.transcoded_types; diff --git a/src/utils/data/node_json_data.mock.json b/src/utils/data/node_json_data.mock.json index 42e0f4ec52..3568ae8aed 100644 --- a/src/utils/data/node_json_data.mock.json +++ b/src/utils/data/node_json_data.mock.json @@ -1,7 +1,8 @@ { "filepath": "/Users/Documents/project-src/test/data/01_raw/iris.csv", "type": "tracking.json_dataset.JSONDataset", - "tracking_data": { + "preview_type": "JSONTrackingPreview", + "preview": { "recommendations": "dummy_recommendation", "recommended_controls": 0.2701227292578884, "projected_optimization": 0.32414727510946606 diff --git a/src/utils/data/node_metrics_data.mock.json b/src/utils/data/node_metrics_data.mock.json index 575fc8c4f8..c311053f31 100644 --- a/src/utils/data/node_metrics_data.mock.json +++ b/src/utils/data/node_metrics_data.mock.json @@ -1,7 +1,8 @@ { "filepath": "/Users/Documents/project-src/test/data/01_raw/iris.csv", "type": "tracking.metrics_dataset.MetricsDataset", - "tracking_data": { + "preview_type": "MetricsTrackingPreview", + "preview": { "recommendations": 0.2160981834063107, "recommended_controls": 0.2701227292578884, "projected_optimization": 0.32414727510946606 diff --git a/src/utils/data/node_plot.mock.json b/src/utils/data/node_plot.mock.json index b9388935e1..1298df306e 100644 --- a/src/utils/data/node_plot.mock.json +++ b/src/utils/data/node_plot.mock.json @@ -1,6 +1,7 @@ { "type": "plotly.plotly_dataset.PlotlyDataset", - "plot": { + "preview_type": "PlotlyPreview", + "preview": { "data": [ { "uid": "babced",