Skip to content

Commit

Permalink
Conditionally move session store and stats file to .viz directory (#1915
Browse files Browse the repository at this point in the history
)

* merge main from remote

Signed-off-by: ravi-kumar-pilla <[email protected]>

* initial draft - testing basic shift

Signed-off-by: ravi-kumar-pilla <[email protected]>

* initial draft - testing basic shift

Signed-off-by: ravi-kumar-pilla <[email protected]>

* format check

Signed-off-by: ravi-kumar-pilla <[email protected]>

* stats moved

Signed-off-by: ravi-kumar-pilla <[email protected]>

* session store path change

Signed-off-by: ravi-kumar-pilla <[email protected]>

* add test coverage and lint fix

Signed-off-by: ravi-kumar-pilla <[email protected]>

* docs update

Signed-off-by: ravi-kumar-pilla <[email protected]>

* update release note

Signed-off-by: ravi-kumar-pilla <[email protected]>

* create dir reuse method

Signed-off-by: ravi-kumar-pilla <[email protected]>

* address PR comments

Signed-off-by: ravi-kumar-pilla <[email protected]>

---------

Signed-off-by: ravi-kumar-pilla <[email protected]>
  • Loading branch information
ravi-kumar-pilla authored Jun 25, 2024
1 parent 4005b58 commit f82eb87
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 30 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Please follow the established format:
- Enable/disable preview for all the datasets when publishing Kedro-Viz from CLI. (#1894)
- Enable/disable preview for all the datasets when publishing Kedro-Viz from UI. (#1895)
- Display published URLs. (#1907)
- Conditionally move session store and stats file to .viz directory. (#1915)

## Bug fixes and other changes

Expand Down
File renamed without changes.
3 changes: 3 additions & 0 deletions docs/source/experiment_tracking.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ This specifies the creation of the `SQLiteStore` under the `data` subfolder, usi

This step is crucial to enable experiment tracking features on Kedro-Viz, as it is the database used to serve all run data to the Kedro-Viz front-end. Once this step is complete, you can either proceed to [set up the tracking datasets](#set-up-experiment-tracking-datasets) or [set up your nodes and pipelines to log metrics](#modify-your-nodes-and-pipelines-to-log-metrics); these two activities are interchangeable, but both should be completed to get a working experiment tracking setup.

```{note}
Starting from Kedro-Viz 9.2.0, if the user does not provide `SESSION_STORE_ARGS` in the project settings, a default directory `.viz` will be created at the root of your Kedro project and used for `SQLiteStore`.
```

## Collaborative experiment tracking

Expand Down
Binary file added docs/source/images/pipeline_dataset_stats.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 9 additions & 0 deletions docs/source/kedro-viz_visualisation.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ The next step is optional, but useful to check that all is working. Run the full
```bash
kedro run
```
Kedro-Viz provides stats related to datasets under the metadata panel

![](./images/pipeline_dataset_stats.png)

These stats are generated by hooks. If you have Kedro-Viz installed and execute `kedro run`, the hooks will generate the stats by default. To disable this, you can disable Kedro-Viz hooks in the settings file of your project.

```{note}
Starting from Kedro-Viz 9.2.0, the dataset stats file `stats.json` will be moved to `.viz` directory at the root of your Kedro project in-case you have `kedro-viz` installed and had not disabled hooks for `kedro-viz` plugin.
```

To start Kedro-Viz, type the following into your terminal from the project directory:

Expand Down
3 changes: 3 additions & 0 deletions package/kedro_viz/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@
"for users with kedro-datasets >= 2.1.0",
},
}

VIZ_SESSION_STORE_ARGS = {"path": ".viz"}
VIZ_METADATA_ARGS = {"path": ".viz"}
4 changes: 3 additions & 1 deletion package/kedro_viz/integrations/kedro/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from kedro.io import DataCatalog
from kedro.pipeline import Pipeline

from kedro_viz.constants import VIZ_METADATA_ARGS

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -49,7 +51,7 @@ def _get_dataset_stats(project_path: Path) -> Dict:
project_path: the path where the Kedro project is located.
"""
try:
stats_file_path = project_path / "stats.json"
stats_file_path = project_path / f"{VIZ_METADATA_ARGS['path']}/stats.json"

if not stats_file_path.exists():
return {}
Expand Down
17 changes: 15 additions & 2 deletions package/kedro_viz/integrations/kedro/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
import json
import logging
from collections import defaultdict
from pathlib import PurePosixPath
from pathlib import Path, PurePosixPath
from typing import Any, Union

from kedro.framework.hooks import hook_impl
from kedro.io import DataCatalog
from kedro.io.core import get_filepath_str

from kedro_viz.constants import VIZ_METADATA_ARGS
from kedro_viz.launchers.utils import _find_kedro_project
from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -73,7 +75,18 @@ def after_pipeline_run(self):
"""
try:
with open("stats.json", "w", encoding="utf8") as file:
kedro_project_path = _find_kedro_project(Path.cwd())

if not kedro_project_path:
logger.warning("Could not find a Kedro project to create stats file")
return

stats_file_path = Path(
f"{kedro_project_path}/{VIZ_METADATA_ARGS['path']}/stats.json"
)
stats_file_path.parent.mkdir(parents=True, exist_ok=True)

with stats_file_path.open("w", encoding="utf8") as file:
sorted_stats_data = {
dataset_name: self.format_stats(stats)
for dataset_name, stats in self._stats.items()
Expand Down
21 changes: 20 additions & 1 deletion package/kedro_viz/integrations/kedro/sqlite_store.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""kedro_viz.intergrations.kedro.sqlite_store is a child of BaseSessionStore
which stores sessions data in the SQLite database"""

# pylint: disable=no-member, broad-exception-caught

import getpass
Expand All @@ -10,12 +11,15 @@
from typing import Any, Optional

import fsspec
from kedro.framework.project import settings
from kedro.framework.session.store import BaseSessionStore
from kedro.io.core import get_protocol_and_path
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session

from kedro_viz.constants import VIZ_SESSION_STORE_ARGS
from kedro_viz.database import make_db_session_factory
from kedro_viz.launchers.utils import _find_kedro_project
from kedro_viz.models.experiment_tracking import RunModel

logger = logging.getLogger(__name__)
Expand All @@ -33,6 +37,15 @@ def _is_json_serializable(obj: Any):
return False


def _get_session_path(session_path: str) -> str:
"""Returns the session path by creating its parent directory
if unavailable.
"""
session_file_path = Path(session_path)
session_file_path.parent.mkdir(parents=True, exist_ok=True)
return str(session_file_path)


class SQLiteStore(BaseSessionStore):
"""Stores the session data on the sqlite db."""

Expand All @@ -49,7 +62,13 @@ def __init__(self, *args, remote_path: Optional[str] = None, **kwargs):
@property
def location(self) -> str:
"""Returns location of the sqlite_store database"""
return str(Path(self._path) / "session_store.db")
if "path" not in settings.SESSION_STORE_ARGS:
kedro_project_path = _find_kedro_project(Path.cwd()) or self._path
return _get_session_path(
f"{kedro_project_path}/{VIZ_SESSION_STORE_ARGS['path']}/session_store.db"
)

return _get_session_path(f"{self._path}/session_store.db")

@property
def remote_location(self) -> Optional[str]:
Expand Down
7 changes: 7 additions & 0 deletions package/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
from kedro_viz.server import populate_data


@pytest.fixture
def setup_kedro_project(tmp_path):
"""Fixture to setup a temporary Kedro project directory structure."""
kedro_project_path = tmp_path / "kedro_project"
return kedro_project_path


@pytest.fixture
def data_access_manager():
yield DataAccessManager()
Expand Down
48 changes: 31 additions & 17 deletions package/tests/test_integrations/test_hooks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from collections import defaultdict
from unittest.mock import mock_open, patch

import pytest
from kedro.io import MemoryDataset
Expand Down Expand Up @@ -84,31 +84,45 @@ def test_after_dataset_saved(

@pytest.mark.parametrize("dataset_name", ["companies", "companies@pandas1"])
def test_after_pipeline_run(
dataset_name, example_dataset_stats_hook_obj, example_data_frame
dataset_name,
example_dataset_stats_hook_obj,
example_data_frame,
mocker,
setup_kedro_project,
caplog,
):
mocker.patch(
"kedro_viz.integrations.kedro.hooks._find_kedro_project",
return_value=setup_kedro_project,
)
stats_dataset_name = example_dataset_stats_hook_obj.get_stats_dataset_name(
dataset_name
)
stats_json = {
example_dataset_stats_hook_obj._stats = {
stats_dataset_name: {
"rows": int(example_data_frame.shape[0]),
"columns": int(example_data_frame.shape[1]),
}
}
# Create a mock_open context manager
with patch("builtins.open", mock_open()) as mock_file, patch(
"json.dump"
) as mock_json_dump:
example_dataset_stats_hook_obj.after_dataset_loaded(
dataset_name, example_data_frame
)
example_dataset_stats_hook_obj.after_pipeline_run()

# Assert that the file was opened with the correct filename
mock_file.assert_called_once_with("stats.json", "w", encoding="utf8")

# Assert that json.dump was called with the expected arguments
mock_json_dump.assert_called_once_with(stats_json, mock_file.return_value)
expected_stats_file_path = setup_kedro_project / ".viz/stats.json"
example_dataset_stats_hook_obj.after_pipeline_run()

# Check if stats.json file is created
assert expected_stats_file_path.exists()

# Verify the contents of the stats.json file
with expected_stats_file_path.open("r", encoding="utf8") as file:
data = json.load(file)

assert data == example_dataset_stats_hook_obj._stats

# stats file should not get created if it is not a valid kedro project path
mocker.patch(
"kedro_viz.integrations.kedro.hooks._find_kedro_project",
return_value=None,
)
example_dataset_stats_hook_obj.after_pipeline_run()
assert "Could not find a Kedro project to create stats file" in caplog.text


@pytest.mark.parametrize(
Expand Down
56 changes: 47 additions & 9 deletions package/tests/test_integrations/test_sqlite_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,40 @@


@pytest.fixture
def store_path(tmp_path):
return Path(tmp_path)
def parametrize_session_store_args(request):
"""Fixture to parameterize has_session_store_args."""

# This fixture sets a class attribute has_session_store_args
# based on the parameter passed
request.cls.has_session_store_args = request.param


@pytest.fixture
def mock_session_store_args(request, mocker, setup_kedro_project):
"""Fixture to mock SESSION_STORE_ARGS and _find_kedro_project."""

# This fixture uses the class attribute has_session_store_args
# to apply the appropriate mocks.
if request.cls.has_session_store_args:
mocker.patch.dict(
"kedro_viz.integrations.kedro.sqlite_store.settings.SESSION_STORE_ARGS",
{"path": "some_path"},
clear=True,
)
else:
mocker.patch(
"kedro_viz.integrations.kedro.sqlite_store._find_kedro_project",
return_value=setup_kedro_project,
)


@pytest.fixture
def store_path(request, tmp_path, setup_kedro_project):
if request.cls.has_session_store_args:
return Path(tmp_path)
session_store_path = Path(tmp_path / setup_kedro_project / ".viz")
session_store_path.mkdir(parents=True, exist_ok=True)
return session_store_path


@pytest.fixture
Expand Down Expand Up @@ -54,24 +86,24 @@ def remote_path():


@pytest.fixture
def mock_db1(tmp_path):
database_loc = str(tmp_path / "db1.db")
def mock_db1(store_path):
database_loc = str(store_path / "db1.db")
with make_db_session_factory(database_loc).begin() as session:
session.add(RunModel(id="1", blob="blob1"))
yield Path(database_loc)


@pytest.fixture
def mock_db2(tmp_path):
database_loc = str(tmp_path / "db2.db")
def mock_db2(store_path):
database_loc = str(store_path / "db2.db")
with make_db_session_factory(database_loc).begin() as session:
session.add(RunModel(id="2", blob="blob2"))
yield Path(database_loc)


@pytest.fixture
def mock_db3_with_db2_data(tmp_path):
database_loc = str(tmp_path / "db3.db")
def mock_db3_with_db2_data(store_path):
database_loc = str(store_path / "db3.db")
with make_db_session_factory(database_loc).begin() as session:
session.add(RunModel(id="2", blob="blob2"))
yield Path(database_loc)
Expand Down Expand Up @@ -147,6 +179,8 @@ def test_get_dbname_without_env_var(mocker):
assert dbname == "computer_user_name.db"


@pytest.mark.usefixtures("parametrize_session_store_args", "mock_session_store_args")
@pytest.mark.parametrize("parametrize_session_store_args", [True, False], indirect=True)
class TestSQLiteStore:
def test_empty(self, store_path):
sqlite_store = SQLiteStore(store_path, next(session_id()))
Expand Down Expand Up @@ -230,7 +264,11 @@ def test_upload_to_s3_fail(self, mocker, store_path, remote_path, caplog):
assert "Upload failed: Connection error" in caplog.text

def test_download_from_s3_success(
self, mocker, store_path, remote_path, mocked_db_in_s3, tmp_path
self,
mocker,
store_path,
remote_path,
mocked_db_in_s3,
):
mocker.patch("fsspec.filesystem")
sqlite_store = SQLiteStore(
Expand Down

0 comments on commit f82eb87

Please sign in to comment.