From 4cad93ed508a0eafb8ad0082fdb49e2dbed1d1e5 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Mon, 11 Dec 2023 15:24:03 +0000 Subject: [PATCH 01/10] Bump kedro-datasets to 2.0.0 Signed-off-by: Merel Theisen --- docs/source/conf.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 743d5771bd..49258acee9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -68,7 +68,7 @@ intersphinx_mapping = { "kedro-viz": ("https://docs.kedro.org/projects/kedro-viz/en/v6.6.1/", None), - "kedro-datasets": ("https://docs.kedro.org/projects/kedro-datasets/en/kedro-datasets-1.8.0/", None), + "kedro-datasets": ("https://docs.kedro.org/projects/kedro-datasets/en/kedro-datasets-2.0.0/", None), } # The suffix(es) of source filenames. diff --git a/pyproject.toml b/pyproject.toml index 20256a1341..f5ab2df827 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,7 +98,7 @@ docs = [ "sphinxcontrib-mermaid~=0.7.1", "myst-parser~=1.0.0", "Jinja2<3.1.0", - "kedro-datasets[all]~=1.8.0", + "kedro-datasets[all]~=2.0.0", ] all = [ "kedro[test,docs]" ] From 1cd7d610eb30639d8cf9758902c5a4737f9926be Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Mon, 11 Dec 2023 16:12:56 +0000 Subject: [PATCH 02/10] Bump python version used to build RTD Signed-off-by: Merel Theisen --- .readthedocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 037b2ed597..3492645d89 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -8,7 +8,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.8" + python: "3.9" nodejs: "19" apt_packages: - libasound2 From db2c58b1213a9fc674f75692873a188807f37809 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Mon, 11 Dec 2023 16:38:16 +0000 Subject: [PATCH 03/10] Bump s3fs to be compatible with 2.0.0 Signed-off-by: Merel Theisen --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f5ab2df827..d2adc4f1cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ test = [ "pytest-mock>=1.7.1, <2.0", "pytest-xdist[psutil]~=2.2.1", "pytest~=7.2", - "s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem. + "s3fs<2024.1 and >=2021.4", "semver", "trufflehog~=2.1", ] From 65f68047cdfd70b2482392b5728af1b94e97442c Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Mon, 11 Dec 2023 16:41:03 +0000 Subject: [PATCH 04/10] Bump s3fs to be compatible with 2.0.0 Signed-off-by: Merel Theisen --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d2adc4f1cd..06e0694d22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ test = [ "pytest-mock>=1.7.1, <2.0", "pytest-xdist[psutil]~=2.2.1", "pytest~=7.2", - "s3fs<2024.1 and >=2021.4", + "s3fs>=2021.4, <2024.1" # Upper bound set arbitrarily, to be reassessed in early 2024 "semver", "trufflehog~=2.1", ] From b34a67b3a57d48bc7130e5b51448e8415b697480 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Mon, 11 Dec 2023 16:42:52 +0000 Subject: [PATCH 05/10] Fix lint Signed-off-by: Merel Theisen --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 06e0694d22..36731b7557 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ test = [ "pytest-mock>=1.7.1, <2.0", "pytest-xdist[psutil]~=2.2.1", "pytest~=7.2", - "s3fs>=2021.4, <2024.1" # Upper bound set arbitrarily, to be reassessed in early 2024 + "s3fs>=2021.4, <2024.1", # Upper bound set arbitrarily, to be reassessed in early 2024 "semver", "trufflehog~=2.1", ] From 56e91518ed01a7306802805cc5def7f7d366a394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 11 Dec 2023 18:51:37 +0100 Subject: [PATCH 06/10] Replace outdated kedro-datasets links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- docs/source/data/advanced_data_catalog_usage.md | 2 +- docs/source/data/data_catalog.md | 2 +- docs/source/data/how_to_create_a_custom_dataset.md | 2 +- docs/source/data/index.md | 2 +- docs/source/extend_kedro/common_use_cases.md | 4 ++-- docs/source/faq/faq.md | 2 +- docs/source/get_started/kedro_concepts.md | 2 +- docs/source/tutorial/set_up_data.md | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/data/advanced_data_catalog_usage.md b/docs/source/data/advanced_data_catalog_usage.md index 9242a0be9e..d61c969abd 100644 --- a/docs/source/data/advanced_data_catalog_usage.md +++ b/docs/source/data/advanced_data_catalog_usage.md @@ -11,7 +11,7 @@ From version **`2.0.0`** of `kedro-datasets`, all dataset names have changed to To use the `DataCatalog` API, construct a `DataCatalog` object programmatically in a file like `catalog.py`. -In the following code, we use several pre-built data loaders documented in the {doc}`kedro-datasets documentation`. +In the following code, we use several pre-built data loaders documented in the {py:mod}`kedro-datasets documentation `. ```python from kedro.io import DataCatalog diff --git a/docs/source/data/data_catalog.md b/docs/source/data/data_catalog.md index b55f16151f..f37e5c458e 100644 --- a/docs/source/data/data_catalog.md +++ b/docs/source/data/data_catalog.md @@ -34,7 +34,7 @@ shuttles: Kedro supports a range of connectors, for CSV files, Excel spreadsheets, Parquet files, Feather files, HDF5 files, JSON documents, pickled objects, SQL tables, SQL queries, and more. They are supported using libraries such as pandas, PySpark, NetworkX, and Matplotlib. -{doc}`The kedro-datasets package documentation` contains a comprehensive list of all available file types. +{py:mod}`The kedro-datasets package documentation ` contains a comprehensive list of all available file types. ### Dataset `filepath` diff --git a/docs/source/data/how_to_create_a_custom_dataset.md b/docs/source/data/how_to_create_a_custom_dataset.md index 4976dc732a..b3e67f39ef 100644 --- a/docs/source/data/how_to_create_a_custom_dataset.md +++ b/docs/source/data/how_to_create_a_custom_dataset.md @@ -1,6 +1,6 @@ # Advanced: Tutorial to create a custom dataset -{doc}`Kedro supports many datasets` out of the box, but you may find that you need to create a custom dataset. For example, you may need to handle a proprietary data format or filesystem in your pipeline, or perhaps you have found a particular use case for a dataset that Kedro does not support. This tutorial explains how to create a custom dataset to read and save image data. +{py:mod}`Kedro supports many datasets ` out of the box, but you may find that you need to create a custom dataset. For example, you may need to handle a proprietary data format or filesystem in your pipeline, or perhaps you have found a particular use case for a dataset that Kedro does not support. This tutorial explains how to create a custom dataset to read and save image data. ## AbstractDataset diff --git a/docs/source/data/index.md b/docs/source/data/index.md index 70108da95a..18edfc1ab9 100644 --- a/docs/source/data/index.md +++ b/docs/source/data/index.md @@ -3,7 +3,7 @@ In a Kedro project, the Data Catalog is a registry of all data sources available for use by the project. The catalog is stored in a YAML file (`catalog.yml`) that maps the names of node inputs and outputs as keys in the `DataCatalog` class. -The {doc}`kedro-datasets` package offers built-in datasets for common file types and file systems. +The {py:mod}`kedro-datasets ` package offers built-in datasets for common file types and file systems. We first introduce the basic sections of `catalog.yml`, which is the file used to register data sources for a Kedro project. diff --git a/docs/source/extend_kedro/common_use_cases.md b/docs/source/extend_kedro/common_use_cases.md index 0714ea209e..b57910b509 100644 --- a/docs/source/extend_kedro/common_use_cases.md +++ b/docs/source/extend_kedro/common_use_cases.md @@ -4,7 +4,7 @@ Kedro has a few built-in mechanisms for you to extend its behaviour. This docume ## Use Case 1: How to add extra behaviour to Kedro's execution timeline -The execution timeline of a Kedro pipeline can be thought of as a sequence of actions performed by various Kedro library components, such as the {doc}`datasets`, [DataCatalog](/kedro.io.DataCatalog), [Pipeline](/kedro.pipeline.Pipeline), [Node](/kedro.pipeline.node.Node) and [KedroContext](/kedro.framework.context.KedroContext). +The execution timeline of a Kedro pipeline can be thought of as a sequence of actions performed by various Kedro library components, such as the {py:mod}`datasets `, [DataCatalog](/kedro.io.DataCatalog), [Pipeline](/kedro.pipeline.Pipeline), [Node](/kedro.pipeline.node.Node) and [KedroContext](/kedro.framework.context.KedroContext). At different points in the lifecycle of these components, you might want to add extra behaviour: for example, you could add extra computation for profiling purposes _before_ and _after_ a node runs, or _before_ and _after_ the I/O actions of a dataset, namely the `load` and `save` actions. @@ -12,7 +12,7 @@ This can now achieved by using [Hooks](../hooks/introduction.md), to define the ## Use Case 2: How to integrate Kedro with additional data sources -You can use {doc}`datasets` to interface with various different data sources. If the data source you plan to use is not supported out of the box by Kedro, you can [create a custom dataset](../data/how_to_create_a_custom_dataset.md). +You can use {py:mod}`datasets ` to interface with various different data sources. If the data source you plan to use is not supported out of the box by Kedro, you can [create a custom dataset](../data/how_to_create_a_custom_dataset.md). ## Use Case 3: How to add or modify CLI commands diff --git a/docs/source/faq/faq.md b/docs/source/faq/faq.md index 99b8f235b9..0e361bb3cb 100644 --- a/docs/source/faq/faq.md +++ b/docs/source/faq/faq.md @@ -10,7 +10,7 @@ This is a growing set of technical FAQs. The [product FAQs on the Kedro website] ## Kedro documentation * {doc}`Where can I find the documentation about Kedro-Viz`? -* {doc}`Where can I find the documentation for Kedro's datasets`? +* {py:mod}`Where can I find the documentation for Kedro's datasets `? ## Working with Jupyter diff --git a/docs/source/get_started/kedro_concepts.md b/docs/source/get_started/kedro_concepts.md index 59f6bdffaf..ffe602a7e2 100644 --- a/docs/source/get_started/kedro_concepts.md +++ b/docs/source/get_started/kedro_concepts.md @@ -57,7 +57,7 @@ greeting_pipeline = pipeline([return_greeting_node, join_statements_node]) The Kedro Data Catalog is the registry of all data sources that the project can use to manage loading and saving data. It maps the names of node inputs and outputs as keys in a `DataCatalog`, a Kedro class that can be specialised for different types of data storage. -{doc}`Kedro provides different built-in datasets` for numerous file types and file systems, so you don’t have to write the logic for reading/writing data. +{py:mod}`Kedro provides different built-in datasets ` for numerous file types and file systems, so you don’t have to write the logic for reading/writing data. ## Kedro project directory structure diff --git a/docs/source/tutorial/set_up_data.md b/docs/source/tutorial/set_up_data.md index 3064066a2a..5b2c1a327f 100644 --- a/docs/source/tutorial/set_up_data.md +++ b/docs/source/tutorial/set_up_data.md @@ -118,7 +118,7 @@ When you have finished, close `ipython` session with `exit()`. ### Custom data -{doc}`Kedro supports numerous datasets` out of the box, but you can also add support for any proprietary data format or filesystem. +{py:mod}`Kedro supports numerous datasets ` out of the box, but you can also add support for any proprietary data format or filesystem. You can find further information about [how to add support for custom datasets](../data/how_to_create_a_custom_dataset.md) in specific documentation covering advanced usage. From 473974f2f33df30d603a9d97ced794d718a15654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 11 Dec 2023 19:26:00 +0100 Subject: [PATCH 07/10] Remove kedro-datasets from docs dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 36731b7557..32f9f01b1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,7 +98,6 @@ docs = [ "sphinxcontrib-mermaid~=0.7.1", "myst-parser~=1.0.0", "Jinja2<3.1.0", - "kedro-datasets[all]~=2.0.0", ] all = [ "kedro[test,docs]" ] From d274cbee2360153e1f421ef1310be1e1e300fc68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 11 Dec 2023 19:37:49 +0100 Subject: [PATCH 08/10] Restore simple RTD build process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- .readthedocs.yml | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 3492645d89..886b5ceba7 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -15,13 +15,6 @@ build: jobs: post_create_environment: - npm install -g @mermaid-js/mermaid-cli - pre_install: - # pip==23.2 breaks pip-tools<7.0, and pip-tools>=7.0 does not support Python 3.7 - # pip==23.3 breaks dependency resolution - - python -m pip install -U "pip>=21.2,<23.2" - # These are technically installation steps, due to RTD's limit we need to inject the installation earlier. - - python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext - - python -m pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir .[docs,test] pre_build: - pip freeze - python -m sphinx -WETan -j auto -D language=en -b linkcheck -d _build/doctrees docs/source _build/linkcheck @@ -32,15 +25,9 @@ sphinx: configuration: docs/source/conf.py fail_on_warning: true -# Build documentation with MkDocs -# mkdocs: -# configuration: mkdocs.yml - -# Optionally set the version of Python and requirements required to build your docs -# python: -# install: -# - method: pip -# path: . -# extra_requirements: -# - docs -# - test +python: + install: + - method: pip + path: . + extra_requirements: + - docs From 1d69b1e4871602403bf78bb5bfefd280b389b831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 11 Dec 2023 19:39:47 +0100 Subject: [PATCH 09/10] Update docutils version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 32f9f01b1a..4a7079a89b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,9 +84,7 @@ test = [ "trufflehog~=2.1", ] docs = [ - # docutils>=0.17 changed the HTML - # see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115 - "docutils==0.16", + "docutils<0.18", "sphinx~=5.3.0", "sphinx_rtd_theme==1.2.0", # Regression on sphinx-autodoc-typehints 1.21 From 8854b54e5be61949ba7909e297ca0fe86dcab541 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Mon, 11 Dec 2023 18:48:11 +0000 Subject: [PATCH 10/10] Update release notes Signed-off-by: Merel Theisen --- RELEASE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE.md b/RELEASE.md index f3abbbadef..f70a1c7438 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -17,6 +17,7 @@ * Removed `pip-tools` as a dependency. * Accepted path-like filepaths more broadly for datasets. * Removed support for defining the `layer` attribute at top-level within DataCatalog. +* Bumped `kedro-datasets` to latest `2.0.0`. ## Breaking changes to the API * Renamed the `data_sets` argument and the `_data_sets` attribute in `Catalog` and their references to `datasets` and `_datasets` respectively.