Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace watchgod library with watchfiles #2134

Merged
merged 29 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

- Improve `kedro viz build` usage documentation (#2126)
- Fix unserializable parameters value (#2122)
- Replace `watchgod` library with `watchfiles` and improve autoreload file watching filter (#2134)

Check warning on line 19 in RELEASE.md

View workflow job for this annotation

GitHub Actions / vale

[vale] RELEASE.md#L19

[Kedro-viz.Spellings] Did you really mean 'autoreload'?
Raw output
{"message": "[Kedro-viz.Spellings] Did you really mean 'autoreload'?", "location": {"path": "RELEASE.md", "range": {"start": {"line": 19, "column": 60}}}, "severity": "WARNING"}
- Display full dataset type with library prefix in metadata panel (#2136)
- Enable SQLite WAL mode for Azure ML to fix database locking issues (#2131)

Expand Down
3 changes: 2 additions & 1 deletion package/features/steps/lower_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ fastapi==0.100.0
fsspec==2021.4
aiofiles==22.1.0
uvicorn[standard]==0.22.0
watchgod==0.8.2
watchfiles==0.24.0
plotly==4.8
packaging==23.0
pandas==1.3; python_version < '3.10'
Expand All @@ -16,3 +16,4 @@ secure==0.3.0
# numpy 2.0 breaks with old versions of pandas and this
# could be removed when the lowest version supported is updated
numpy==1.26.4
pathspec==0.12.1
88 changes: 88 additions & 0 deletions package/kedro_viz/autoreload_file_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
This module provides a custom file filter for autoreloading that filters out files based on allowed
file extensions and patterns specified in a .gitignore file.
"""

import logging
from pathlib import Path
from typing import Optional, Set

from pathspec import PathSpec
jitu5 marked this conversation as resolved.
Show resolved Hide resolved
from watchfiles import Change, DefaultFilter

logger = logging.getLogger(__name__)


class AutoreloadFileFilter(DefaultFilter):
"""
Custom file filter for autoreloading that extends DefaultFilter.
Filters out files based on allowed file extensions and patterns specified in a .gitignore file.
"""

allowed_extensions: Set[str] = {".py", ".yml", ".yaml", ".json"}

def __init__(self, base_path: Optional[Path] = None):
"""
Initialize the AutoreloadFileFilter.

Args:
base_path (Optional[Path]): The base path to set as the current working directory
for the filter.
"""
self.cwd = base_path or Path.cwd()

# Call the superclass constructor
super().__init__()

# Load .gitignore patterns
gitignore_path = self.cwd / ".gitignore"
try:
with open(gitignore_path, "r", encoding="utf-8") as gitignore_file:
ignore_patterns = gitignore_file.read().splitlines()
ravi-kumar-pilla marked this conversation as resolved.
Show resolved Hide resolved
self.gitignore_spec: Optional[PathSpec] = PathSpec.from_lines(
"gitwildmatch", ignore_patterns
)
except FileNotFoundError:
self.gitignore_spec = None

def __call__(self, change: Change, path: str) -> bool:
"""
Determine whether a file change should be processed.

Args:
change (Change): The type of change detected.
path (str): The path to the file that changed.

Returns:
bool: True if the file should be processed, False otherwise.
"""
if not super().__call__(change, path):
logger.debug("Filtered out by DefaultFilter: %s", path)
return False

path_obj = Path(path)

# Exclude files matching .gitignore patterns
try:
relative_path = path_obj.resolve().relative_to(self.cwd.resolve())
except ValueError:
logger.debug("Path not relative to CWD: %s", path)
return False

try:
if self.gitignore_spec and self.gitignore_spec.match_file(
str(relative_path)
):
logger.debug("Filtered out by .gitignore: %s", relative_path)
return False
# pylint: disable=broad-exception-caught
except Exception as exc:
logger.debug("Exception during .gitignore matching: %s", exc)
return True # Pass the file if .gitignore matching fails

# Include only files with allowed extensions
if path_obj.suffix in self.allowed_extensions:
logger.debug("Allowed file: %s", path)
return True
logger.debug("Filtered out by allowed_extensions: %s", path_obj.suffix)
return False
19 changes: 12 additions & 7 deletions package/kedro_viz/launchers/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from kedro.framework.cli.project import PARAMS_ARG_HELP
from kedro.framework.cli.utils import _split_params

from kedro_viz.autoreload_file_filter import AutoreloadFileFilter
from kedro_viz.constants import DEFAULT_HOST, DEFAULT_PORT
from kedro_viz.launchers.cli.main import viz

Expand Down Expand Up @@ -163,21 +164,25 @@ def run(
"extra_params": params,
"is_lite": lite,
}

process_context = multiprocessing.get_context("spawn")
if autoreload:
from watchgod import RegExpWatcher, run_process
from watchfiles import run_process

run_process_args = [str(kedro_project_path)]
ravi-kumar-pilla marked this conversation as resolved.
Show resolved Hide resolved
run_process_kwargs = {
"path": kedro_project_path,
"target": run_server,
"kwargs": run_server_kwargs,
"watcher_cls": RegExpWatcher,
"watcher_kwargs": {"re_files": r"^.*(\.yml|\.yaml|\.py|\.json)$"},
"watch_filter": AutoreloadFileFilter(),
}
viz_process = multiprocessing.Process(
target=run_process, daemon=False, kwargs={**run_process_kwargs}
viz_process = process_context.Process(
target=run_process,
daemon=False,
args=run_process_args,
kwargs={**run_process_kwargs},
)
else:
viz_process = multiprocessing.Process(
viz_process = process_context.Process(
target=run_server, daemon=False, kwargs={**run_server_kwargs}
)

Expand Down
13 changes: 8 additions & 5 deletions package/kedro_viz/launchers/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
import IPython
from IPython.display import HTML, display
from kedro.framework.project import PACKAGE_NAME
from watchgod import RegExpWatcher, run_process
from watchfiles import run_process

from kedro_viz.autoreload_file_filter import AutoreloadFileFilter
from kedro_viz.launchers.utils import _check_viz_up, _wait_for
from kedro_viz.server import DEFAULT_HOST, DEFAULT_PORT, run_server

Expand Down Expand Up @@ -148,15 +149,17 @@ def run_viz( # pylint: disable=too-many-locals
}
process_context = multiprocessing.get_context("spawn")
if autoreload:
run_process_args = [str(project_path)]
run_process_kwargs = {
"path": project_path,
"target": run_server,
"kwargs": run_server_kwargs,
"watcher_cls": RegExpWatcher,
"watcher_kwargs": {"re_files": r"^.*(\.yml|\.yaml|\.py|\.json)$"},
"watch_filter": AutoreloadFileFilter(),
}
viz_process = process_context.Process(
target=run_process, daemon=False, kwargs={**run_process_kwargs}
target=run_process,
daemon=False,
args=run_process_args,
kwargs={**run_process_kwargs},
)
else:
viz_process = process_context.Process(
Expand Down
17 changes: 11 additions & 6 deletions package/kedro_viz/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from kedro.pipeline import Pipeline

from kedro_viz.api.rest.responses import save_api_responses_to_fs
from kedro_viz.autoreload_file_filter import AutoreloadFileFilter
from kedro_viz.constants import DEFAULT_HOST, DEFAULT_PORT
from kedro_viz.data_access import DataAccessManager, data_access_manager
from kedro_viz.database import make_db_session_factory
Expand Down Expand Up @@ -142,7 +143,7 @@ def run_server(
import argparse
import multiprocessing

from watchgod import RegExpWatcher, run_process
from watchfiles import run_process

parser = argparse.ArgumentParser(description="Launch a development viz server")
parser.add_argument("project_path", help="Path to a Kedro project")
Expand All @@ -156,20 +157,24 @@ def run_server(

project_path = (Path.cwd() / args.project_path).absolute()

run_process_args = [str(project_path)]
run_process_kwargs = {
"path": project_path,
"target": run_server,
"kwargs": {
"host": args.host,
"port": args.port,
"project_path": str(project_path),
},
"watcher_cls": RegExpWatcher,
"watcher_kwargs": {"re_files": r"^.*(\.yml|\.yaml|\.py|\.json)$"},
"watch_filter": AutoreloadFileFilter(),
}

viz_process = multiprocessing.Process(
target=run_process, daemon=False, kwargs={**run_process_kwargs}
process_context = multiprocessing.get_context("spawn")

viz_process = process_context.Process(
target=run_process,
daemon=False,
args=run_process_args,
kwargs={**run_process_kwargs},
)

print("Starting Kedro Viz ...")
Expand Down
3 changes: 2 additions & 1 deletion package/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ secure>=0.3.0
sqlalchemy>=1.4, <3
strawberry-graphql>=0.192.0, <1.0
uvicorn[standard]>=0.30.0, <1.0
watchgod>=0.8.2, <1.0
watchfiles>=0.24.0
pathspec>=0.12.1
1 change: 1 addition & 0 deletions package/test_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ sqlalchemy-stubs~=0.4
strawberry-graphql[cli]>=0.99.0, <1.0
trufflehog~=2.2
httpx~=0.27.0
pathspec>=0.12.1

# mypy
types-aiofiles==0.1.3
Expand Down
128 changes: 128 additions & 0 deletions package/tests/test_autoreload_file_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import logging
import shutil
import tempfile
from pathlib import Path
from unittest.mock import patch

import pytest
from watchfiles import Change, DefaultFilter

from kedro_viz.autoreload_file_filter import AutoreloadFileFilter

logger = logging.getLogger(__name__)


@pytest.fixture
def test_environment():
jitu5 marked this conversation as resolved.
Show resolved Hide resolved
# Create a temporary directory
test_dir = tempfile.mkdtemp()
yield test_dir
# Remove temp directory
shutil.rmtree(test_dir)


@pytest.fixture
def file_filter(test_environment):
test_dir = Path(test_environment)
# Create a .gitignore file
gitignore_path = test_dir / ".gitignore"
gitignore_path.write_text("ignored.py\n")

# Initialize the filter with the test directory as base_path
return AutoreloadFileFilter(base_path=test_dir)


def test_no_gitignore(test_environment):
test_dir = Path(test_environment)
gitignored_file = test_dir / "ignored.py"
gitignored_file.touch()

# Initialize the filter without a .gitignore file
gitignore_path = test_dir / ".gitignore"
if gitignore_path.exists():
gitignore_path.unlink()
file_filter = AutoreloadFileFilter(base_path=test_dir)

result = file_filter(Change.modified, str(gitignored_file))
assert result, "File should pass the filter when .gitignore is missing"


def test_gitignore_exception(file_filter, test_environment):
test_dir = Path(test_environment)
allowed_file = test_dir / "test.py"
allowed_file.touch()

with patch(
"pathspec.PathSpec.match_file", side_effect=Exception("Mocked exception")
):
result = file_filter(Change.modified, str(allowed_file))
assert result, "Filter should pass the file if .gitignore matching fails"


def test_allowed_file(file_filter, test_environment):
test_dir = Path(test_environment)
allowed_file = test_dir / "test.py"
allowed_file.touch()

result = file_filter(Change.modified, str(allowed_file))
assert result, "Allowed file should pass the filter"


def test_disallowed_file(file_filter, test_environment):
test_dir = Path(test_environment)
disallowed_file = test_dir / "test.txt"
disallowed_file.touch()

result = file_filter(Change.modified, str(disallowed_file))
assert not result, "Disallowed file should not pass the filter"


def test_gitignored_file(file_filter, test_environment):
test_dir = Path(test_environment)
gitignored_file = test_dir / "ignored.py"
gitignored_file.touch()

result = file_filter(Change.modified, str(gitignored_file))
assert not result, "Gitignored file should not pass the filter"


def test_non_relative_path(file_filter, test_environment):
original_cwd = Path.cwd().parent # Go up one directory
outside_file = original_cwd / "outside.py"
outside_file.touch()

result = file_filter(Change.modified, str(outside_file))
assert not result, "File outside the CWD should not pass the filter"

# Cleanup
outside_file.unlink()


def test_no_allowed_extension(file_filter, test_environment):
test_dir = Path(test_environment)
no_extension_file = test_dir / "no_extension"
no_extension_file.touch()

result = file_filter(Change.modified, str(no_extension_file))
assert not result, "File without allowed extension should not pass the filter"


def test_directory_path(file_filter, test_environment):
test_dir = Path(test_environment)
directory_path = test_dir / "some_directory"
directory_path.mkdir()

result = file_filter(Change.modified, str(directory_path))
assert not result, "Directories should not pass the filter"


def test_filtered_out_by_default_filter(file_filter, test_environment, mocker):
test_dir = Path(test_environment)
filtered_file = test_dir / "filtered.py"
filtered_file.touch()

# Mock the super().__call__ method to return False
mocker.patch.object(DefaultFilter, "__call__", return_value=False)

result = file_filter(Change.modified, str(filtered_file))
assert not result, "File should be filtered out by DefaultFilter"
Loading
Loading