Skip to content

Commit

Permalink
initial draft
Browse files Browse the repository at this point in the history
Signed-off-by: ravi_kumar_pilla <[email protected]>
  • Loading branch information
ravi-kumar-pilla committed Jan 15, 2025
1 parent e9542ad commit dc31929
Show file tree
Hide file tree
Showing 6 changed files with 417 additions and 3 deletions.
283 changes: 283 additions & 0 deletions demo-project/Untitled.ipynb

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions package/kedro_viz/api/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,37 @@ async def set_secure_headers(request, call_next):

return app

def create_api_app_for_notebook() -> FastAPI:
"""Create an API app for notebook users without full blown kedro project.
Returns:
The FastAPI app
"""
app = _create_base_api_app()
app.include_router(rest_router)

# Check for html directory existence.
if Path(_HTML_DIR).is_dir():
# The html is needed when kedro_viz is used in cli but not required when running
# frontend e2e tests via Cypress
app.mount("/static", StaticFiles(directory=_HTML_DIR / "static"), name="static")

# every time the server reloads, a new app with a new timestamp will be created.
# this is used as an etag embedded in the frontend for client to use when making requests.
app_etag = _create_etag()

# Serve the favicon.ico file from the "html" directory
@app.get("/favicon.ico", include_in_schema=False)
async def favicon():
return FileResponse(_HTML_DIR / "favicon.ico")

@app.get("/")
async def index():
html_content = (_HTML_DIR / "index.html").read_text(encoding="utf-8")
return HTMLResponse(html_content)

return app


def create_api_app_from_project(
project_path: Path, autoreload: bool = False
Expand Down
15 changes: 15 additions & 0 deletions package/kedro_viz/integrations/kedro/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from kedro_viz.integrations.kedro.lite_parser import LiteParser
from kedro_viz.integrations.utils import _VizNullPluginManager
from kedro_viz.models.metadata import Metadata
from kedro_viz.utils import NotebookUser

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -113,6 +114,20 @@ def _load_data_helper(
return catalog, pipelines_dict, session_store, stats_dict


def load_data_for_notebook_users(notebook_user: NotebookUser) -> Tuple[DataCatalog, Dict[str, Pipeline], BaseSessionStore, Dict]:

# Create a dummy data catalog with all datasets as memory datasets
catalog = DataCatalog()
session_store = None
stats_dict = {}

# create a dictionary of pipelines, for now we will have single pipeline
# i.e., a __default__ pipeline

pipelines_dict = {"__default__": notebook_user.pipeline[0]}
return catalog, pipelines_dict, session_store, stats_dict


def load_data(
project_path: Path,
env: Optional[str] = None,
Expand Down
65 changes: 65 additions & 0 deletions package/kedro_viz/launchers/experimental_viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import multiprocessing
from typing import Dict

from IPython.display import HTML, IFrame, display
from kedro.io.data_catalog import DataCatalog
from kedro.pipeline import Pipeline

from kedro_viz.constants import DEFAULT_HOST, DEFAULT_PORT
from kedro_viz.launchers.jupyter import _allocate_port
from kedro_viz.launchers.utils import _check_viz_up, _wait_for
from kedro_viz.server import run_server
from kedro_viz.utils import NotebookUser

_VIZ_PROCESSES: Dict[str, int] = {}

class KedroVizNotebook:
def visualize(self, pipeline: Pipeline, catalog: DataCatalog = None, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT, embed_in_notebook=True):
"""
Show the visualization either in a browser or embedded in a notebook.
Args:
pipeline: Kedro Pipeline to visualize
catalog: Data Catalog for the pipeline
host: the host to launch the webserver
port: the port to launch the webserver
embed_in_notebook (bool): Whether to embed the visualization in the notebook.
Raises:
RuntimeError: If the server is not running.
"""

print("The pipeline we get::", pipeline)

# Allocate port
port = _allocate_port(host, start_at=port)

# Terminate existing process if needed
if port in _VIZ_PROCESSES and _VIZ_PROCESSES[port].is_alive():
_VIZ_PROCESSES[port].terminate()

notebook_user = NotebookUser(pipeline=pipeline, catalog=catalog)

run_server_kwargs = {
"host": host,
"port": port,
"notebook_user": notebook_user
}

process_context = multiprocessing.get_context("fork")
viz_process = process_context.Process(
target=run_server, daemon=True, kwargs={**run_server_kwargs}
)

viz_process.start()
_VIZ_PROCESSES[port] = viz_process

_wait_for(func=_check_viz_up, host=host, port=port)

url = f"http://{host}:{port}/"

if embed_in_notebook:
display(IFrame(src=url, width=900, height=600))
else:
link_html = f'<a href="{url}" target="_blank">Open Kedro-Viz</a>'
display(HTML(link_html))
18 changes: 15 additions & 3 deletions package/kedro_viz/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from kedro_viz.integrations.kedro import data_loader as kedro_data_loader
from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore
from kedro_viz.launchers.utils import _check_viz_up, _wait_for, display_cli_message
from kedro_viz.utils import NotebookUser

DEV_PORT = 4142

Expand Down Expand Up @@ -44,6 +45,13 @@ def populate_data(
data_access_manager.add_pipelines(pipelines)


def load_and_populate_data_for_notebook_users(notebook_user: NotebookUser):
"""Loads pipeline data and populates Kedro Viz Repositories"""
catalog, pipelines, session_store, stats_dict = kedro_data_loader.load_data_for_notebook_users(notebook_user)
# Creates data repositories which are used by Kedro Viz Backend APIs
populate_data(data_access_manager, catalog, pipelines, session_store, stats_dict)


def load_and_populate_data(
path: Path,
env: Optional[str] = None,
Expand Down Expand Up @@ -83,6 +91,7 @@ def run_server(
package_name: Optional[str] = None,
extra_params: Optional[Dict[str, Any]] = None,
is_lite: bool = False,
notebook_user: NotebookUser = None
):
"""Run a uvicorn server with a FastAPI app that either launches API response data from a file
or from reading data from a real Kedro project.
Expand Down Expand Up @@ -117,7 +126,12 @@ def run_server(

path = Path(project_path) if project_path else Path.cwd()

if load_file is None:
if notebook_user:
load_and_populate_data_for_notebook_users(notebook_user)
app = apps.create_api_app_for_notebook()
elif load_file:
app = apps.create_api_app_from_file(f"{path}/{load_file}/api")
else:
load_and_populate_data(
path, env, include_hooks, package_name, pipeline_name, extra_params, is_lite
)
Expand All @@ -131,8 +145,6 @@ def run_server(
save_api_responses_to_fs(save_file, fsspec.filesystem("file"), True)

app = apps.create_api_app_from_project(path, autoreload)
else:
app = apps.create_api_app_from_file(f"{path}/{load_file}/api")

uvicorn.run(app, host=host, port=port, log_config=None)

Expand Down
8 changes: 8 additions & 0 deletions package/kedro_viz/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import hashlib
from typing import Tuple

from kedro.io.data_catalog import DataCatalog
from kedro.pipeline import Pipeline

TRANSCODING_SEPARATOR = "@"


Expand Down Expand Up @@ -57,3 +60,8 @@ def _strip_transcoding(element: str) -> str:
def is_dataset_param(dataset_name: str) -> bool:
"""Return whether a dataset is a parameter"""
return dataset_name.lower().startswith("params:") or dataset_name == "parameters"

class NotebookUser:
def __init__(self, pipeline: Pipeline = None, catalog: DataCatalog = None):
self.pipeline = pipeline,
self.catalog = catalog

0 comments on commit dc31929

Please sign in to comment.