Skip to content

Commit

Permalink
Merge branch 'main' into click-validation-options
Browse files Browse the repository at this point in the history
  • Loading branch information
lrcouto authored Jan 13, 2025
2 parents 48934d5 + 71650a0 commit 3f184a8
Show file tree
Hide file tree
Showing 20 changed files with 378 additions and 50 deletions.
12 changes: 11 additions & 1 deletion .github/workflows/all-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ jobs:
python-version: ${{ matrix.python-version }}
branch: ${{ inputs.branch }}


pip-compile:
strategy:
matrix:
Expand All @@ -65,3 +64,14 @@ jobs:
os: ${{ matrix.os }}
python-version: ${{ matrix.python-version }}
branch: ${{ inputs.branch }}

detect-secrets:
strategy:
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.11" ]
uses: ./.github/workflows/detect-secrets.yml
with:
os: ${{ matrix.os }}
python-version: ${{ matrix.python-version }}
branch: ${{ inputs.branch }}
38 changes: 38 additions & 0 deletions .github/workflows/detect-secrets.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Detect secrets on Kedro

on:
workflow_call:
inputs:
os:
type: string
python-version:
type: string
branch:
type: string
default: ''

jobs:
lint:
runs-on: ${{ inputs.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.branch }}
- name: Set up Python ${{ inputs.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python-version }}
- name: Cache python packages
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{inputs.os}}-python-${{inputs.python-version}}
- name: Install dependencies
run: |
make install-test-requirements
make install-pre-commit
- name: pip freeze
run: uv pip freeze --system
- name: Scan all tracked files
run: git ls-files -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline
1 change: 1 addition & 0 deletions .github/workflows/docs-language-linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ jobs:
- uses: errata-ai/vale-action@reviewdog
with:
reporter: github-pr-check
version: 3.9.2 # temp
10 changes: 4 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,9 @@ repos:
pass_filenames: false
entry: lint-imports

- repo: local
- repo: https://github.com/Yelp/detect-secrets
rev: v1.5.0
hooks:
- id: secret_scan
name: "Secret scan"
language: system
- id: detect-secrets
args: ['--baseline', '.secrets.baseline']
exclude: ^features/steps/test_starter
pass_filenames: false
entry: make secret-scan
219 changes: 219 additions & 0 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
{
"version": "1.5.0",
"plugins_used": [
{
"name": "ArtifactoryDetector"
},
{
"name": "AWSKeyDetector"
},
{
"name": "AzureStorageKeyDetector"
},
{
"name": "Base64HighEntropyString",
"limit": 4.5
},
{
"name": "BasicAuthDetector"
},
{
"name": "CloudantDetector"
},
{
"name": "DiscordBotTokenDetector"
},
{
"name": "GitHubTokenDetector"
},
{
"name": "GitLabTokenDetector"
},
{
"name": "HexHighEntropyString",
"limit": 3.0
},
{
"name": "IbmCloudIamDetector"
},
{
"name": "IbmCosHmacDetector"
},
{
"name": "IPPublicDetector"
},
{
"name": "JwtTokenDetector"
},
{
"name": "KeywordDetector",
"keyword_exclude": ""
},
{
"name": "MailchimpDetector"
},
{
"name": "NpmDetector"
},
{
"name": "OpenAIDetector"
},
{
"name": "PrivateKeyDetector"
},
{
"name": "PypiTokenDetector"
},
{
"name": "SendGridDetector"
},
{
"name": "SlackDetector"
},
{
"name": "SoftlayerDetector"
},
{
"name": "SquareOAuthDetector"
},
{
"name": "StripeDetector"
},
{
"name": "TelegramBotTokenDetector"
},
{
"name": "TwilioKeyDetector"
}
],
"filters_used": [
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
},
{
"path": "detect_secrets.filters.heuristic.is_indirect_reference"
},
{
"path": "detect_secrets.filters.heuristic.is_likely_id_string"
},
{
"path": "detect_secrets.filters.heuristic.is_lock_file"
},
{
"path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string"
},
{
"path": "detect_secrets.filters.heuristic.is_potential_uuid"
},
{
"path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
},
{
"path": "detect_secrets.filters.heuristic.is_sequential_string"
},
{
"path": "detect_secrets.filters.heuristic.is_swagger_file"
},
{
"path": "detect_secrets.filters.heuristic.is_templated_secret"
}
],
"results": {
"features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml": [
{
"type": "Secret Keyword",
"filename": "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml",
"hashed_secret": "a62f2225bf70bfaccbc7f1ef2a397836717377de",
"is_verified": false,
"line_number": 8
},
{
"type": "Secret Keyword",
"filename": "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml",
"hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
"is_verified": false,
"line_number": 16
}
],
"kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml": [
{
"type": "Secret Keyword",
"filename": "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml",
"hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4",
"is_verified": false,
"line_number": 9
},
{
"type": "Secret Keyword",
"filename": "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml",
"hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997",
"is_verified": false,
"line_number": 18
}
],
"tests/config/test_omegaconf_config.py": [
{
"type": "Basic Auth Credentials",
"filename": "tests/config/test_omegaconf_config.py",
"hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684",
"is_verified": false,
"line_number": 39
}
],
"tests/framework/context/test_context.py": [
{
"type": "Basic Auth Credentials",
"filename": "tests/framework/context/test_context.py",
"hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684",
"is_verified": false,
"line_number": 63
}
],
"tests/io/conftest.py": [
{
"type": "Secret Keyword",
"filename": "tests/io/conftest.py",
"hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e",
"is_verified": false,
"line_number": 71
},
{
"type": "Secret Keyword",
"filename": "tests/io/conftest.py",
"hashed_secret": "3c3b274d119ff5a5ec6c1e215c1cb794d9973ac1",
"is_verified": false,
"line_number": 117
},
{
"type": "Secret Keyword",
"filename": "tests/io/conftest.py",
"hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70",
"is_verified": false,
"line_number": 131
}
],
"tests/io/test_data_catalog.py": [
{
"type": "Secret Keyword",
"filename": "tests/io/test_data_catalog.py",
"hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70",
"is_verified": false,
"line_number": 529
}
],
"tests/io/test_kedro_data_catalog.py": [
{
"type": "Secret Keyword",
"filename": "tests/io/test_kedro_data_catalog.py",
"hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70",
"is_verified": false,
"line_number": 482
}
]
},
"generated_at": "2025-01-08T12:21:43Z"
}
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ e2e-tests-fast:
pip-compile:
pip-compile -q -o -

secret-scan:
trufflehog --max_depth 1 --exclude_paths trufflehog-ignore.txt .

build-docs:
uv pip install -e ".[docs]"
./docs/build-docs.sh "docs"
Expand Down
6 changes: 6 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@

## Major features and improvements
* Implemented `KedroDataCatalog.to_config()` method that converts the catalog instance into a configuration format suitable for serialization.
* Improve OmegaConfigLoader performance.
* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base.

## Bug fixes and other changes
* Added validation to ensure dataset versions consistency across catalog.
* Fixed a bug in project creation when using a custom starter template offline.
* Added `node` import to the pipeline template.
* Update error message when executing kedro run without pipeline.
* Safeguard hooks when user incorrectly registers a hook class in settings.py.

## Breaking changes to the API
## Documentation changes
Expand Down
28 changes: 27 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@
from __future__ import annotations

import importlib
import inspect
import os
import re
import sys
from inspect import getmembers, isclass, isfunction
from pathlib import Path

from click import secho, style

import kedro
from kedro import __version__ as release

# -- Project information -----------------------------------------------------
Expand All @@ -47,7 +50,7 @@
"sphinx_autodoc_typehints",
"sphinx.ext.doctest",
"sphinx.ext.ifconfig",
"sphinx.ext.viewcode",
"sphinx.ext.linkcode",
"sphinx_copybutton",
"myst_parser",
"notfound.extension",
Expand Down Expand Up @@ -534,3 +537,26 @@ def setup(app):

myst_heading_anchors = 5
myst_enable_extensions = ["colon_fence"]

def linkcode_resolve(domain, info):
"""Resolve a GitHub URL corresponding to a Python object."""
if domain != 'py':
return None

try:
mod = sys.modules[info['module']]
obj = mod
for attr in info['fullname'].split('.'):
obj = getattr(obj, attr)
obj = inspect.unwrap(obj)

filename = inspect.getsourcefile(obj)
source, lineno = inspect.getsourcelines(obj)
relpath = os.path.relpath(filename, start=os.path.dirname(
kedro.__file__))

return 'https://github.com/kedro-org/kedro/blob/main/kedro/%s#L%d#L%d' % (
relpath, lineno, lineno + len(source) - 1
)
except (KeyError, ImportError, AttributeError, TypeError, OSError, ValueError):
return None
4 changes: 4 additions & 0 deletions docs/source/data/how_to_create_a_custom_dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ This typing is optional however, and defaults to `Any` type.

The `_EPHEMERAL` boolean attribute in `AbstractDataset` indicates if a dataset is persistent. For example, in the case of {py:class}`~kedro.io.MemoryDataset`, which is not persistent, it is set to True. By default, `_EPHEMERAL` is set to False.

```{note}
The parameter to specify the location of the data file/folder must be called either `filename`, `filepath`, or `path` in the constructor function of the custom dataset class to comply with the Kedro convention.
```

Here is an example skeleton for `ImageDataset`:

<details>
Expand Down
Loading

0 comments on commit 3f184a8

Please sign in to comment.