Skip to content
This repository has been archived by the owner on Mar 3, 2023. It is now read-only.

Update for Kedro 0.17.0 #12

Merged
merged 4 commits into from
Feb 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions kedro-exercises/spaceflight/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,16 @@ conf/**/*credentials*
# ignore everything in the following folders
data/**
logs/**
references/**
results/**

# except their sub-folders
!data/**/
!logs/**/
!references/**/
!results/**/

# also keep all .gitkeep files
!.gitkeep

# keep also the example dataset
!data/01_raw/iris.csv
!data/01_raw/*


##########################
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
import logging.config
import sys
from pathlib import Path
from typing import Any, Dict

from IPython.core.magic import register_line_magic, needs_local_scope
from IPython.core.magic import needs_local_scope, register_line_magic

# Find the project root (./../../../)
from kedro.framework.startup import _get_project_metadata

startup_error = None
project_path = Path(__file__).parents[3].resolve()


@register_line_magic
def reload_kedro(path, line=None):
def reload_kedro(path, line=None, env: str = None, extra_params: Dict[str, Any] = None):
"""Line magic which reloads all Kedro default variables."""
global startup_error
global context
global catalog
global session

try:
import kedro.config.default_logger
from kedro.framework.context import load_context
from kedro.framework.hooks import get_hook_manager
from kedro.framework.session import KedroSession
from kedro.framework.session.session import _activate_session
from kedro.framework.cli.jupyter import collect_line_magic
except ImportError:
logging.error(
Expand All @@ -30,23 +36,32 @@ def reload_kedro(path, line=None):
try:
path = path or project_path

# clear hook manager
hook_manager = get_hook_manager()
name_plugin_pairs = hook_manager.list_name_plugin()
for name, plugin in name_plugin_pairs:
hook_manager.unregister(name=name, plugin=plugin)

# remove cached user modules
context = load_context(path)
to_remove = [mod for mod in sys.modules if mod.startswith(context.package_name)]
metadata = _get_project_metadata(path)
to_remove = [
mod for mod in sys.modules if mod.startswith(metadata.package_name)
]
# `del` is used instead of `reload()` because: If the new version of a module does not
# define a name that was defined by the old version, the old definition remains.
for module in to_remove:
del sys.modules[module]

session = KedroSession.create(
metadata.package_name, path, env=env, extra_params=extra_params
)
_activate_session(session, force=True)
logging.debug("Loading the context from %s", str(path))
# Reload context to fix `pickle` related error (it is unable to serialize reloaded objects)
# Some details can be found here:
# https://modwsgi.readthedocs.io/en/develop/user-guides/issues-with-pickle-module.html#packing-and-script-reloading
context = load_context(path)
context = session.load_context()
catalog = context.catalog

logging.info("** Kedro project %s", str(context.project_name))
logging.info("Defined global variable `context` and `catalog`")
logging.info("** Kedro project %s", str(metadata.project_name))
logging.info("Defined global variable `context`, `session` and `catalog`")

for line_magic in collect_line_magic():
register_line_magic(needs_local_scope(line_magic))
Expand All @@ -59,4 +74,4 @@ def reload_kedro(path, line=None):
raise err


reload_kedro(project_path)
reload_kedro(project_path)
6 changes: 0 additions & 6 deletions kedro-exercises/spaceflight/.kedro.yml

This file was deleted.

27 changes: 0 additions & 27 deletions kedro-exercises/spaceflight/LICENSE.md

This file was deleted.

124 changes: 117 additions & 7 deletions kedro-exercises/spaceflight/README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,121 @@
# What is `kedro-exercises/spaceflight`?
# kedro_tutorial

This repo contains exercise code to help you learn how to use [Kedro](https://github.com/quantumblacklabs/kedro) 0.16.6 using spaceflight tutorial.
## Overview

You are supposed to create Kedro pipelines by writing code in the following Python files:
- `src/kedro_tutorial/pipelines/data_engineering/pipeline.py`
- `src/kedro_tutorial/pipelines/data_science/pipeline.py`
- `src/kedro_tutorial/pipeline.py`
This is your new Kedro project, which was generated using `Kedro 0.17.0`, with the completed version of the [Space Flights tutorial](https://kedro.readthedocs.io/en/stable/03_tutorial/01_spaceflights_tutorial.html) and the data necessary to run the project.

The complete code can be found in `/kedro-tutorial` directory.
Take a look at the [Kedro documentation](https://kedro.readthedocs.io) to get started.

## Rules and guidelines

In order to get the best out of the template:

* Don't remove any lines from the `.gitignore` file we provide
* Make sure your results can be reproduced by following a [data engineering convention](https://kedro.readthedocs.io/en/stable/11_faq/01_faq.html#what-is-data-engineering-convention)
* Don't commit data to your repository
* Don't commit any credentials or your local configuration to your repository. Keep all your credentials and local configuration in `conf/local/`

## How to install dependencies

Declare any dependencies in `src/requirements.txt` for `pip` installation and `src/environment.yml` for `conda` installation.

To install them, run:

```
kedro install
```

## How to run Kedro

You can run your Kedro project with:

```
kedro run
```

## How to test your Kedro project

Have a look at the file `src/tests/test_run.py` for instructions on how to write your tests. You can run your tests as follows:

```
kedro test
```

To configure the coverage threshold, look at the `.coveragerc` file.


## Project dependencies

To generate or update the dependency requirements for your project:

```
kedro build-reqs
```

This will copy the contents of `src/requirements.txt` into a new file `src/requirements.in` which will be used as the source for [`pip-compile`](https://github.com/jazzband/pip-tools#example-usage-for-pip-compile). You can see the output of the resolution by opening `src/requirements.txt`.

After this, if you'd like to update your project requirements, please update `src/requirements.in` and re-run `kedro build-reqs`.

[Further information about project dependencies](https://kedro.readthedocs.io/en/stable/04_kedro_project_setup/01_dependencies.html#project-specific-dependencies)

## How to work with Kedro and notebooks

> Note: Using `kedro jupyter` or `kedro ipython` to run your notebook provides these variables in scope: `context`, `catalog`, and `startup_error`.

### Jupyter
To use Jupyter notebooks in your Kedro project, you need to install Jupyter:

```
pip install jupyter
```

After installing Jupyter, you can start a local notebook server:

```
kedro jupyter notebook
```

### JupyterLab
To use JupyterLab, you need to install it:

```
pip install jupyterlab
```

You can also start JupyterLab:

```
kedro jupyter lab
```

### IPython
And if you want to run an IPython session:

```
kedro ipython
```

### How to convert notebook cells to nodes in a Kedro project
You can move notebook code over into a Kedro project structure using a mixture of [cell tagging](https://jupyter-notebook.readthedocs.io/en/stable/changelog.html#cell-tags) and Kedro CLI commands.

By adding the `node` tag to a cell and running the command below, the cell's source code will be copied over to a Python file within `src/<package_name>/nodes/`:

```
kedro jupyter convert <filepath_to_my_notebook>
```
> *Note:* The name of the Python file matches the name of the original notebook.

Alternatively, you may want to transform all your notebooks in one go. Run the following command to convert all notebook files found in the project root directory and under any of its sub-folders:

```
kedro jupyter convert --all
```

### How to ignore notebook output cells in `git`
To automatically strip out all output cell contents before committing to `git`, you can run `kedro activate-nbstripout`. This will add a hook in `.git/config` which will run `nbstripout` before anything is committed to `git`.

> *Note:* Your output cells will be retained locally.

## Package your Kedro project

[Further information about building project documentation and packaging your project](https://kedro.readthedocs.io/en/stable/03_tutorial/05_package_a_project.html)
18 changes: 0 additions & 18 deletions kedro-exercises/spaceflight/conf/base/credentials.yml

This file was deleted.

14 changes: 10 additions & 4 deletions kedro-exercises/spaceflight/docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2020 QuantumBlack Visual Analytics Limited
# Copyright 2021 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -47,15 +47,15 @@
#
import re

from kedro.cli.utils import find_stylesheets
from kedro.framework.cli.utils import find_stylesheets
from recommonmark.transform import AutoStructify

from kedro_tutorial import __version__ as release

# -- Project information -----------------------------------------------------

project = "kedro_tutorial"
copyright = "2020, QuantumBlack Visual Analytics Limited"
copyright = "2021, QuantumBlack Visual Analytics Limited"
author = "QuantumBlack"

# The short X.Y version.
Expand Down Expand Up @@ -189,7 +189,13 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, "kedro_tutorial", "kedro_tutorial Documentation", [author], 1,)
(
master_doc,
"kedro_tutorial",
"kedro_tutorial Documentation",
[author],
1,
)
]

# -- Options for Texinfo output ----------------------------------------------
Expand Down
17 changes: 17 additions & 0 deletions kedro-exercises/spaceflight/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[tool.kedro]
package_name = "kedro_tutorial"
project_name = "kedro_tutorial"
project_version = "0.17.0"

[tool.isort]
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
line_length = 88
known_third_party = "kedro"

[tool.pytest.ini_options]
addopts = """
--cov-report term-missing \
--cov src/kedro_tutorial -ra"""
6 changes: 3 additions & 3 deletions kedro-exercises/spaceflight/setup.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[tool:pytest]
addopts=--cov-report term-missing
--cov src/kedro_tutorial -ra
[flake8]
max-line-length=88
extend-ignore=E203
4 changes: 2 additions & 2 deletions kedro-exercises/spaceflight/src/kedro_tutorial/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 QuantumBlack Visual Analytics Limited
# Copyright 2021 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -25,7 +25,7 @@
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""kedro-tutorial
"""kedro_tutorial
"""

__version__ = "0.1"
Loading