From ab35173da5d5f769666f8df20a9c8eb55cf1a571 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Date: Thu, 2 Jan 2025 15:10:14 +0000 Subject: [PATCH 1/9] Remove checkout branch check when not an official starter (#4389) * Remove branch check when not a starter Signed-off-by: Ankita Katiyar * fix docslink + tests Signed-off-by: Ankita Katiyar * fix docslink + tests Signed-off-by: Ankita Katiyar * no cover Signed-off-by: Ankita Katiyar * Add release notes Signed-off-by: Ankita Katiyar --------- Signed-off-by: Ankita Katiyar --- RELEASE.md | 1 + .../deployment/databricks/databricks_deployment_workflow.md | 4 ++-- kedro/framework/cli/starters.py | 3 +-- tests/framework/cli/test_starters.py | 6 ------ 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 0cc0fdf013..f3651ec992 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -5,6 +5,7 @@ ## Bug fixes and other changes * Added validation to ensure dataset versions consistency across catalog. +* Fixed a bug in project creation when using a custom starter template offline. ## Breaking changes to the API ## Documentation changes diff --git a/docs/source/deployment/databricks/databricks_deployment_workflow.md b/docs/source/deployment/databricks/databricks_deployment_workflow.md index 2ebe21c48e..cf5ede1589 100644 --- a/docs/source/deployment/databricks/databricks_deployment_workflow.md +++ b/docs/source/deployment/databricks/databricks_deployment_workflow.md @@ -253,8 +253,8 @@ The Databricks API enables you to programmatically interact with Databricks serv 1. [Set up your Kedro project for deployment on Databricks](#set-up-your-project-for-deployment-to-databricks) 2. Create a JSON file containing your job's configuration. -3. Use the Jobs API's [`/create` endpoint](https://docs.databricks.com/workflows/jobs/jobs-api-updates.html#create) to create a new job. -4. Use the Jobs API's [`/runs/submit` endpoint](https://docs.databricks.com/workflows/jobs/jobs-api-updates.html#runs-submit) to run your newly created job. +3. Use the Jobs API's [`/create` endpoint](https://docs.databricks.com/en/reference/jobs-2.0-api.html#create) to create a new job. +4. Use the Jobs API's [`/runs/submit` endpoint](https://docs.databricks.com/en/reference/jobs-2.0-api.html#runs-submit) to run your newly created job. ### How to use the Databricks CLI to automatically deploy a Kedro project diff --git a/kedro/framework/cli/starters.py b/kedro/framework/cli/starters.py index fbafbb56c1..bdbfb1e512 100644 --- a/kedro/framework/cli/starters.py +++ b/kedro/framework/cli/starters.py @@ -345,7 +345,6 @@ def new( # noqa: PLR0913 checkout = _select_checkout_branch_for_cookiecutter(checkout) elif starter_alias is not None: template_path = starter_alias - checkout = _select_checkout_branch_for_cookiecutter(checkout) else: template_path = str(TEMPLATE_PATH) @@ -520,7 +519,7 @@ def _get_available_tags(template_path: str) -> list: # tags: ['/tags/version', '/tags/version^{}'] # unique_tags: {'version'} - except git.GitCommandError: + except git.GitCommandError: # pragma: no cover return [] return sorted(unique_tags) diff --git a/tests/framework/cli/test_starters.py b/tests/framework/cli/test_starters.py index 7f2641da10..87c434dfd1 100644 --- a/tests/framework/cli/test_starters.py +++ b/tests/framework/cli/test_starters.py @@ -861,9 +861,6 @@ def test_git_repo(self, fake_kedro_cli, mock_determine_repo_dir, mock_cookiecutt "template": "git+https://github.com/fake/fake.git", "directory": None, } - starters_version = mock_determine_repo_dir.call_args[1].pop("checkout", None) - - assert starters_version in [version, "main"] assert kwargs.items() <= mock_determine_repo_dir.call_args[1].items() del kwargs["directory"] assert kwargs.items() <= mock_cookiecutter.call_args[1].items() @@ -910,9 +907,6 @@ def test_git_repo_custom_directory( "template": "git+https://github.com/fake/fake.git", "directory": "my_directory", } - starters_version = mock_determine_repo_dir.call_args[1].pop("checkout", None) - - assert starters_version in [version, "main"] assert kwargs.items() <= mock_determine_repo_dir.call_args[1].items() assert kwargs.items() <= mock_cookiecutter.call_args[1].items() From 70734ce00ee46b58c85b4cf04afbe89d32c06758 Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Thu, 2 Jan 2025 16:37:58 +0000 Subject: [PATCH 2/9] Added node import to the pipeline template (#4395) * Added node import to template Signed-off-by: Elena Khaustova * Fixed docs Signed-off-by: Elena Khaustova * Added node import to template Signed-off-by: Elena Khaustova * Added release notes Signed-off-by: Elena Khaustova --------- Signed-off-by: Elena Khaustova --- RELEASE.md | 1 + .../pipeline/{{ cookiecutter.pipeline_name }}/pipeline.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index f3651ec992..7af26f6ca4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,6 +6,7 @@ ## Bug fixes and other changes * Added validation to ensure dataset versions consistency across catalog. * Fixed a bug in project creation when using a custom starter template offline. +* Added `node` import to the pipeline template. ## Breaking changes to the API ## Documentation changes diff --git a/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/pipeline.py b/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/pipeline.py index 587123c64c..b932c16531 100644 --- a/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/pipeline.py +++ b/kedro/templates/pipeline/{{ cookiecutter.pipeline_name }}/pipeline.py @@ -3,7 +3,7 @@ generated using Kedro {{ cookiecutter.kedro_version }} """ -from kedro.pipeline import Pipeline, pipeline +from kedro.pipeline import node, Pipeline, pipeline # noqa def create_pipeline(**kwargs) -> Pipeline: From 057de3457a778698001d9fd2f5797ca3b4a9ba3a Mon Sep 17 00:00:00 2001 From: Ravi Kumar Pilla Date: Fri, 3 Jan 2025 12:38:45 -0600 Subject: [PATCH 3/9] Improve OmegaConfigLoader performance (#4367) * improve global and runtime param resolver * testing with slack suggestions * fix type lint issue * update release note Signed-off-by: ravi_kumar_pilla --------- Signed-off-by: ravi_kumar_pilla --- RELEASE.md | 1 + kedro/config/omegaconf_config.py | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 7af26f6ca4..c19418f0ca 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,6 +2,7 @@ ## Major features and improvements * Implemented `KedroDataCatalog.to_config()` method that converts the catalog instance into a configuration format suitable for serialization. +* Improve OmegaConfigLoader performance ## Bug fixes and other changes * Added validation to ensure dataset versions consistency across catalog. diff --git a/kedro/config/omegaconf_config.py b/kedro/config/omegaconf_config.py index 8d82ebf360..3f40624a4b 100644 --- a/kedro/config/omegaconf_config.py +++ b/kedro/config/omegaconf_config.py @@ -126,7 +126,8 @@ def __init__( # noqa: PLR0913 self.base_env = base_env or "" self.default_run_env = default_run_env or "" self.merge_strategy = merge_strategy or {} - + self._globals_oc: DictConfig | None = None + self._runtime_params_oc: DictConfig | None = None self.config_patterns = { "catalog": ["catalog*", "catalog*/**", "**/catalog*"], "parameters": ["parameters*", "parameters*/**", "**/parameters*"], @@ -346,12 +347,11 @@ def load_and_merge_dir_config( OmegaConf.merge(*aggregate_config, self.runtime_params), resolve=True ) + merged_config_container = OmegaConf.to_container( + OmegaConf.merge(*aggregate_config), resolve=True + ) return { - k: v - for k, v in OmegaConf.to_container( - OmegaConf.merge(*aggregate_config), resolve=True - ).items() - if not k.startswith("_") + k: v for k, v in merged_config_container.items() if not k.startswith("_") } @staticmethod @@ -436,9 +436,12 @@ def _get_globals_value(self, variable: str, default_value: Any = _NO_VALUE) -> A raise InterpolationResolutionError( "Keys starting with '_' are not supported for globals." ) - globals_oc = OmegaConf.create(self._globals) + + if not self._globals_oc: + self._globals_oc = OmegaConf.create(self._globals) + interpolated_value = OmegaConf.select( - globals_oc, variable, default=default_value + self._globals_oc, variable, default=default_value ) if interpolated_value != _NO_VALUE: return interpolated_value @@ -449,9 +452,11 @@ def _get_globals_value(self, variable: str, default_value: Any = _NO_VALUE) -> A def _get_runtime_value(self, variable: str, default_value: Any = _NO_VALUE) -> Any: """Return the runtime params values to the resolver""" - runtime_oc = OmegaConf.create(self.runtime_params) + if not self._runtime_params_oc: + self._runtime_params_oc = OmegaConf.create(self.runtime_params) + interpolated_value = OmegaConf.select( - runtime_oc, variable, default=default_value + self._runtime_params_oc, variable, default=default_value ) if interpolated_value != _NO_VALUE: return interpolated_value From 5721edada43f99e0d4f8f34d133d5d677fdf71c8 Mon Sep 17 00:00:00 2001 From: Ravi Kumar Pilla Date: Mon, 6 Jan 2025 13:20:16 -0600 Subject: [PATCH 4/9] Safeguard hooks when user incorrectly registers a hook class in settings.py (#4392) * add error message for hook reg * update release note Signed-off-by: ravi_kumar_pilla --------- Signed-off-by: ravi_kumar_pilla --- RELEASE.md | 1 + kedro/framework/hooks/manager.py | 6 ++++++ tests/framework/hooks/test_manager.py | 31 ++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index c19418f0ca..4f2ac76362 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -8,6 +8,7 @@ * Added validation to ensure dataset versions consistency across catalog. * Fixed a bug in project creation when using a custom starter template offline. * Added `node` import to the pipeline template. +* Safeguard hooks when user incorrectly registers a hook class in settings.py. ## Breaking changes to the API ## Documentation changes diff --git a/kedro/framework/hooks/manager.py b/kedro/framework/hooks/manager.py index ceec064246..cc1e49d822 100644 --- a/kedro/framework/hooks/manager.py +++ b/kedro/framework/hooks/manager.py @@ -4,6 +4,7 @@ import logging from collections.abc import Iterable +from inspect import isclass from typing import Any from pluggy import PluginManager @@ -48,6 +49,11 @@ def _register_hooks(hook_manager: PluginManager, hooks: Iterable[Any]) -> None: # case hooks have already been registered, so we perform a simple check # here to avoid an error being raised and break user's workflow. if not hook_manager.is_registered(hooks_collection): + if isclass(hooks_collection): + raise TypeError( + "KedroSession expects hooks to be registered as instances. " + "Have you forgotten the `()` when registering a hook class ?" + ) hook_manager.register(hooks_collection) diff --git a/tests/framework/hooks/test_manager.py b/tests/framework/hooks/test_manager.py index 42dc3e9f64..6120ba256f 100644 --- a/tests/framework/hooks/test_manager.py +++ b/tests/framework/hooks/test_manager.py @@ -1,6 +1,11 @@ import pytest +from pluggy import PluginManager -from kedro.framework.hooks.manager import _create_hook_manager, _NullPluginManager +from kedro.framework.hooks.manager import ( + _create_hook_manager, + _NullPluginManager, + _register_hooks, +) from kedro.framework.hooks.specs import ( DataCatalogSpecs, DatasetSpecs, @@ -10,6 +15,10 @@ ) +class ExampleHook: + pass + + @pytest.mark.parametrize( "hook_specs,hook_name,hook_params", [ @@ -73,3 +82,23 @@ def test_null_plugin_manager_returns_none_when_called(): assert ( plugin_manager.hook.before_dataset_saved(dataset_name="mock", data=[]) is None ) + + +@pytest.mark.parametrize( + "hooks, should_raise", + [ + ([ExampleHook], True), + ([ExampleHook()], False), + ], +) +def test_register_hooks(hooks, should_raise): + mock_hook_manager = PluginManager("test_project") + + if should_raise: + with pytest.raises( + TypeError, match="KedroSession expects hooks to be registered as instances" + ): + _register_hooks(mock_hook_manager, hooks) + else: + _register_hooks(mock_hook_manager, hooks) + assert mock_hook_manager.is_registered(hooks[0]) From 0bb470d4917b7e56abfbed4a9431ad5bb2ecbd83 Mon Sep 17 00:00:00 2001 From: Ravi Kumar Pilla Date: Mon, 6 Jan 2025 13:51:37 -0600 Subject: [PATCH 5/9] Update error message when executing kedro run without pipeline (#4391) * improve err message for empty project kedro run * Update docs/source/get_started/new_project.md Co-authored-by: Yury Fedotov <102987839+yury-fedotov@users.noreply.github.com> Signed-off-by: Ravi Kumar Pilla * update suggestion * update release note Signed-off-by: ravi_kumar_pilla --------- Signed-off-by: Ravi Kumar Pilla Signed-off-by: ravi_kumar_pilla Co-authored-by: Yury Fedotov <102987839+yury-fedotov@users.noreply.github.com> --- RELEASE.md | 1 + docs/source/get_started/new_project.md | 4 ++++ kedro/framework/cli/starters.py | 8 ++++++++ kedro/pipeline/pipeline.py | 3 ++- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 4f2ac76362..552fa27f41 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -8,6 +8,7 @@ * Added validation to ensure dataset versions consistency across catalog. * Fixed a bug in project creation when using a custom starter template offline. * Added `node` import to the pipeline template. +* Update error message when executing kedro run without pipeline. * Safeguard hooks when user incorrectly registers a hook class in settings.py. ## Breaking changes to the API diff --git a/docs/source/get_started/new_project.md b/docs/source/get_started/new_project.md index 10410cee1b..de1e56b77a 100644 --- a/docs/source/get_started/new_project.md +++ b/docs/source/get_started/new_project.md @@ -132,6 +132,10 @@ Now run the project: kedro run ``` +```{warning} +`kedro run` requires at least one pipeline with nodes. Please define a pipeline before running this command and ensure it is registred in `pipeline_registry.py`. +``` + ```{note} The first time you type a `kedro` command in a new project, you will be asked whether you wish to opt into [usage analytics](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-telemetry). Your decision is recorded in the `.telemetry` file so that subsequent calls to `kedro` in this project do not ask this question again. ``` diff --git a/kedro/framework/cli/starters.py b/kedro/framework/cli/starters.py index bdbfb1e512..f7082311db 100644 --- a/kedro/framework/cli/starters.py +++ b/kedro/framework/cli/starters.py @@ -13,6 +13,7 @@ import stat import sys import tempfile +import warnings from itertools import groupby from pathlib import Path from typing import TYPE_CHECKING, Any, Callable @@ -272,6 +273,13 @@ def _print_selection_and_prompt_info( "It has been created with an example pipeline.", fg="green", ) + else: + warnings.warn( + "Your project does not contain any pipelines with nodes. " + "Please ensure that at least one pipeline has been defined before " + "executing 'kedro run'.", + UserWarning, + ) # Give hint for skipping interactive flow if interactive: diff --git a/kedro/pipeline/pipeline.py b/kedro/pipeline/pipeline.py index 749eea8548..826acd1b13 100644 --- a/kedro/pipeline/pipeline.py +++ b/kedro/pipeline/pipeline.py @@ -772,7 +772,8 @@ def filter( # noqa: PLR0913 if not filtered_pipeline.nodes: raise ValueError( - "Pipeline contains no nodes after applying all provided filters" + "Pipeline contains no nodes after applying all provided filters. " + "Please ensure that at least one pipeline with nodes has been defined." ) return filtered_pipeline From 847c433304d4233da1311c92a04bb75bf76a5b8f Mon Sep 17 00:00:00 2001 From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:21:58 +0000 Subject: [PATCH 6/9] Document the convention for filepath related attributes in custom dataset creation (#4403) * Document the magic behaviour of filepath attributes Signed-off-by: Ankita Katiyar * pin vale Signed-off-by: Ankita Katiyar --------- Signed-off-by: Ankita Katiyar --- .github/workflows/docs-language-linter.yml | 1 + docs/source/data/how_to_create_a_custom_dataset.md | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/docs-language-linter.yml b/.github/workflows/docs-language-linter.yml index 93e1ce9c67..ebdf5c3528 100644 --- a/.github/workflows/docs-language-linter.yml +++ b/.github/workflows/docs-language-linter.yml @@ -15,3 +15,4 @@ jobs: - uses: errata-ai/vale-action@reviewdog with: reporter: github-pr-check + version: 3.9.2 # temp diff --git a/docs/source/data/how_to_create_a_custom_dataset.md b/docs/source/data/how_to_create_a_custom_dataset.md index 7f39987dd7..43536fbf82 100644 --- a/docs/source/data/how_to_create_a_custom_dataset.md +++ b/docs/source/data/how_to_create_a_custom_dataset.md @@ -40,6 +40,10 @@ This typing is optional however, and defaults to `Any` type. The `_EPHEMERAL` boolean attribute in `AbstractDataset` indicates if a dataset is persistent. For example, in the case of {py:class}`~kedro.io.MemoryDataset`, which is not persistent, it is set to True. By default, `_EPHEMERAL` is set to False. +```{note} +The parameter to specify the location of the data file/folder must be called either `filename`, `filepath`, or `path` in the constructor function of the custom dataset class to comply with the Kedro convention. +``` + Here is an example skeleton for `ImageDataset`:
From c6bc8d5a94ff6b4b8c730deb9098c5f3c5a03aa8 Mon Sep 17 00:00:00 2001 From: Dmitry Sorokin <40151847+DimedS@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:23:04 +0000 Subject: [PATCH 7/9] Remove outdated telemetry notification (#4397) Signed-off-by: Dmitry Sorokin Signed-off-by: Dmitry Sorokin <40151847+DimedS@users.noreply.github.com> --- docs/source/get_started/new_project.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/source/get_started/new_project.md b/docs/source/get_started/new_project.md index de1e56b77a..c22b6b50b5 100644 --- a/docs/source/get_started/new_project.md +++ b/docs/source/get_started/new_project.md @@ -136,10 +136,6 @@ kedro run `kedro run` requires at least one pipeline with nodes. Please define a pipeline before running this command and ensure it is registred in `pipeline_registry.py`. ``` -```{note} -The first time you type a `kedro` command in a new project, you will be asked whether you wish to opt into [usage analytics](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-telemetry). Your decision is recorded in the `.telemetry` file so that subsequent calls to `kedro` in this project do not ask this question again. -``` - ## Visualise a Kedro project This section swiftly introduces project visualisation using Kedro-Viz. See the {doc}`Kedro-Viz documentation` for more detail. From 396a1f52e4770595bf300a644eff409dba7cdf00 Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:57:51 +0000 Subject: [PATCH 8/9] Replace trufflehog with detect-secrets (#4404) * Added secret-scan Signed-off-by: Elena Khaustova * Removed trufflehog Signed-off-by: Elena Khaustova * Removed secret scan pre-commit hook Signed-off-by: Elena Khaustova * Added action to detect secrets Signed-off-by: Elena Khaustova * Renamed scan command for GH action Signed-off-by: Elena Khaustova * Detect secrets test Signed-off-by: Elena Khaustova * Removed test check Signed-off-by: Elena Khaustova * Updated release notes Signed-off-by: Elena Khaustova --------- Signed-off-by: Elena Khaustova --- .github/workflows/all-checks.yml | 12 +- .github/workflows/detect-secrets.yml | 38 +++++ .pre-commit-config.yaml | 10 +- .secrets.baseline | 219 +++++++++++++++++++++++++++ Makefile | 3 - RELEASE.md | 3 +- pyproject.toml | 2 +- trufflehog-ignore.txt | 13 -- 8 files changed, 275 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/detect-secrets.yml create mode 100644 .secrets.baseline delete mode 100644 trufflehog-ignore.txt diff --git a/.github/workflows/all-checks.yml b/.github/workflows/all-checks.yml index c108d1ad42..3b8dbab421 100644 --- a/.github/workflows/all-checks.yml +++ b/.github/workflows/all-checks.yml @@ -54,7 +54,6 @@ jobs: python-version: ${{ matrix.python-version }} branch: ${{ inputs.branch }} - pip-compile: strategy: matrix: @@ -65,3 +64,14 @@ jobs: os: ${{ matrix.os }} python-version: ${{ matrix.python-version }} branch: ${{ inputs.branch }} + + detect-secrets: + strategy: + matrix: + os: [ ubuntu-latest ] + python-version: [ "3.11" ] + uses: ./.github/workflows/detect-secrets.yml + with: + os: ${{ matrix.os }} + python-version: ${{ matrix.python-version }} + branch: ${{ inputs.branch }} diff --git a/.github/workflows/detect-secrets.yml b/.github/workflows/detect-secrets.yml new file mode 100644 index 0000000000..15c3d77524 --- /dev/null +++ b/.github/workflows/detect-secrets.yml @@ -0,0 +1,38 @@ +name: Detect secrets on Kedro + +on: + workflow_call: + inputs: + os: + type: string + python-version: + type: string + branch: + type: string + default: '' + +jobs: + lint: + runs-on: ${{ inputs.os }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ inputs.branch }} + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + - name: Cache python packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{inputs.os}}-python-${{inputs.python-version}} + - name: Install dependencies + run: | + make install-test-requirements + make install-pre-commit + - name: pip freeze + run: uv pip freeze --system + - name: Scan all tracked files + run: git ls-files -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aea1855741..fd0e331184 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,11 +40,9 @@ repos: pass_filenames: false entry: lint-imports - - repo: local + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 hooks: - - id: secret_scan - name: "Secret scan" - language: system + - id: detect-secrets + args: ['--baseline', '.secrets.baseline'] exclude: ^features/steps/test_starter - pass_filenames: false - entry: make secret-scan diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000000..965f48cbac --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,219 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], + "results": { + "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml": [ + { + "type": "Secret Keyword", + "filename": "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "a62f2225bf70bfaccbc7f1ef2a397836717377de", + "is_verified": false, + "line_number": 8 + }, + { + "type": "Secret Keyword", + "filename": "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997", + "is_verified": false, + "line_number": 16 + } + ], + "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml": [ + { + "type": "Secret Keyword", + "filename": "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4", + "is_verified": false, + "line_number": 9 + }, + { + "type": "Secret Keyword", + "filename": "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997", + "is_verified": false, + "line_number": 18 + } + ], + "tests/config/test_omegaconf_config.py": [ + { + "type": "Basic Auth Credentials", + "filename": "tests/config/test_omegaconf_config.py", + "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", + "is_verified": false, + "line_number": 39 + } + ], + "tests/framework/context/test_context.py": [ + { + "type": "Basic Auth Credentials", + "filename": "tests/framework/context/test_context.py", + "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", + "is_verified": false, + "line_number": 63 + } + ], + "tests/io/conftest.py": [ + { + "type": "Secret Keyword", + "filename": "tests/io/conftest.py", + "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e", + "is_verified": false, + "line_number": 71 + }, + { + "type": "Secret Keyword", + "filename": "tests/io/conftest.py", + "hashed_secret": "3c3b274d119ff5a5ec6c1e215c1cb794d9973ac1", + "is_verified": false, + "line_number": 117 + }, + { + "type": "Secret Keyword", + "filename": "tests/io/conftest.py", + "hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70", + "is_verified": false, + "line_number": 131 + } + ], + "tests/io/test_data_catalog.py": [ + { + "type": "Secret Keyword", + "filename": "tests/io/test_data_catalog.py", + "hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70", + "is_verified": false, + "line_number": 529 + } + ], + "tests/io/test_kedro_data_catalog.py": [ + { + "type": "Secret Keyword", + "filename": "tests/io/test_kedro_data_catalog.py", + "hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70", + "is_verified": false, + "line_number": 482 + } + ] + }, + "generated_at": "2025-01-08T12:21:43Z" +} diff --git a/Makefile b/Makefile index ccc8b5a63e..bfc0f3b47d 100644 --- a/Makefile +++ b/Makefile @@ -27,9 +27,6 @@ e2e-tests-fast: pip-compile: pip-compile -q -o - -secret-scan: - trufflehog --max_depth 1 --exclude_paths trufflehog-ignore.txt . - build-docs: uv pip install -e ".[docs]" ./docs/build-docs.sh "docs" diff --git a/RELEASE.md b/RELEASE.md index 552fa27f41..f522b029b8 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,7 +2,8 @@ ## Major features and improvements * Implemented `KedroDataCatalog.to_config()` method that converts the catalog instance into a configuration format suitable for serialization. -* Improve OmegaConfigLoader performance +* Improve OmegaConfigLoader performance. +* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base. ## Bug fixes and other changes * Added validation to ensure dataset versions consistency across catalog. diff --git a/pyproject.toml b/pyproject.toml index 0881b6627b..f98d2ebef5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ dynamic = ["readme", "version"] test = [ "behave==1.2.6", "coverage[toml]", + "detect-secrets~=1.5.0", "import-linter==2.1", "ipylab>=1.0.0", "ipython~=8.10", @@ -72,7 +73,6 @@ test = [ "pytest>=7.2,<9.0", "s3fs>=2021.4, <2025.1", # Upper bound set arbitrarily, to be reassessed in late 2024 "requests_mock", - "trufflehog~=2.1", # mypy related dependencies "pandas-stubs", "types-PyYAML", diff --git a/trufflehog-ignore.txt b/trufflehog-ignore.txt deleted file mode 100644 index cb5551a327..0000000000 --- a/trufflehog-ignore.txt +++ /dev/null @@ -1,13 +0,0 @@ -docs/package.json -docs/package-lock.json -docs/source/meta/images/KedroArchitecture.drawio -docs/source/nodes_and_pipelines/nodes.md -static/img/kedro_gitflow.svg -.idea/ -.git/ -.mypy_cache/ -.coverage.* -.*\.log -.*\.iml -tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py -docs/source/meta/images/kedro_gitflow.svg From 71650a0e8d90a8b8beb522ccff291c8bc2e14137 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Date: Mon, 13 Jan 2025 13:49:11 +0000 Subject: [PATCH 9/9] Switch to linkcode extension (#4410) * Switch to linkcode extension Signed-off-by: Ankita Katiyar * remove os Signed-off-by: Ankita Katiyar * remove os Signed-off-by: Ankita Katiyar * remove os Signed-off-by: Ankita Katiyar * Refactor Signed-off-by: Ankita Katiyar * use os Signed-off-by: Ankita Katiyar --------- Signed-off-by: Ankita Katiyar --- docs/source/conf.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index cea2a550f8..16798fd9e6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,6 +14,8 @@ from __future__ import annotations import importlib +import inspect +import os import re import sys from inspect import getmembers, isclass, isfunction @@ -21,6 +23,7 @@ from click import secho, style +import kedro from kedro import __version__ as release # -- Project information ----------------------------------------------------- @@ -47,7 +50,7 @@ "sphinx_autodoc_typehints", "sphinx.ext.doctest", "sphinx.ext.ifconfig", - "sphinx.ext.viewcode", + "sphinx.ext.linkcode", "sphinx_copybutton", "myst_parser", "notfound.extension", @@ -534,3 +537,26 @@ def setup(app): myst_heading_anchors = 5 myst_enable_extensions = ["colon_fence"] + +def linkcode_resolve(domain, info): + """Resolve a GitHub URL corresponding to a Python object.""" + if domain != 'py': + return None + + try: + mod = sys.modules[info['module']] + obj = mod + for attr in info['fullname'].split('.'): + obj = getattr(obj, attr) + obj = inspect.unwrap(obj) + + filename = inspect.getsourcefile(obj) + source, lineno = inspect.getsourcelines(obj) + relpath = os.path.relpath(filename, start=os.path.dirname( + kedro.__file__)) + + return 'https://github.com/kedro-org/kedro/blob/main/kedro/%s#L%d#L%d' % ( + relpath, lineno, lineno + len(source) - 1 + ) + except (KeyError, ImportError, AttributeError, TypeError, OSError, ValueError): + return None