Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into edgarrmondragon/feat/singer-decimal
Browse files Browse the repository at this point in the history
edgarrmondragon committed Jan 30, 2025
2 parents b0b807d + 1ea1520 commit e27b663
Showing 108 changed files with 2,356 additions and 1,418 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug.yml
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ body:
attributes:
label: Singer SDK Version
description: Version of the library you are using
placeholder: "0.42.1"
placeholder: "0.44.1"
validations:
required: true
- type: checkboxes
6 changes: 6 additions & 0 deletions .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
@@ -3,3 +3,9 @@ contact_links:
- name: Meltano Community
url: https://meltano.com/slack
about: Join us on Slack.
- name: Start a discussion
url: https://github.com/meltano/sdk/discussions/new
about: Start a GitHub discussion.
- name: Singer SDK Documentation
url: https://sdk.meltano.com
about: Learn more about the Singer SDK.
2 changes: 1 addition & 1 deletion .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@ updates:
- "patch"
versioning-strategy: increase-if-necessary
- package-ecosystem: pip
directory: "/.github/workflows"
directory: "/.github/workflows/resources"
schedule:
interval: weekly
time: "12:00"
2 changes: 1 addition & 1 deletion .github/workflows/api-changes.yml
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@ jobs:

- name: Install tools
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
python -Im pip install -U pip
pipx install griffe nox
9 changes: 8 additions & 1 deletion .github/workflows/codspeed.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: codspeed
name: Performance Testing with CodSpeed 🐇

on:
push:
@@ -24,6 +24,13 @@ on:
# performance analysis in order to generate initial data.
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

env:
FORCE_COLOR: "1"

jobs:
benchmarks:
runs-on: ubuntu-latest
5 changes: 0 additions & 5 deletions .github/workflows/constraints.txt

This file was deleted.

22 changes: 9 additions & 13 deletions .github/workflows/cookiecutter-e2e.yml
Original file line number Diff line number Diff line change
@@ -30,36 +30,32 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Upgrade pip
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
run: |
pip install pip
pip --version

- uses: astral-sh/setup-uv@v5
with:
version: ">=0.4.30"

- name: Install Poetry
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
UV_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
pipx install poetry
uv tool install poetry
poetry --version
- uses: actions/setup-python@v5
with:
python-version: 3.x

- uses: astral-sh/setup-uv@v4
with:
version: ">=0.4.30"

- name: Install pre-commit
env:
UV_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
uv tool install --with=pre-commit-uv pre-commit
pre-commit --version
- name: Install Nox
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
UV_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
uv tool install nox
nox --version
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@ jobs:
with:
name: Packages
path: dist
- uses: actions/attest-build-provenance@v1
- uses: actions/attest-build-provenance@v2
id: attest
with:
subject-path: "./dist/singer_sdk*"
@@ -65,7 +65,7 @@ jobs:
name: Packages
path: dist
- name: Publish
uses: pypa/gh-action-pypi-publish@v1.12.2
uses: pypa/gh-action-pypi-publish@v1.12.4

upload-to-release:
name: Upload files to release
5 changes: 5 additions & 0 deletions .github/workflows/resources/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
griffe~=1.5
pip==25.0
poetry==2.0.1
pre-commit==4.1.0
nox==2024.10.9
18 changes: 8 additions & 10 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -4,29 +4,27 @@ on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- "cookiecutter/**"
- "samples/**"
- "singer_sdk/**"
- "tests/**"
- "noxfile.py"
- "poetry.lock"
- "pyproject.toml"
- ".github/workflows/test.yml"
- ".github/workflows/constraints.txt"
- ".github/workflows/resources/requirements.txt"
push:
branches:
- main
- v*
paths:
- "cookiecutter/**"
- "samples/**"
- "singer_sdk/**"
- "tests/**"
- "noxfile.py"
- "poetry.lock"
- "pyproject.toml"
- ".github/workflows/test.yml"
- ".github/workflows/constraints.txt"
- ".github/workflows/resources/requirements.txt"
workflow_dispatch:
inputs: {}

@@ -73,14 +71,14 @@ jobs:

- name: Upgrade pip
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
pip install pip
pip --version
- name: Install Nox
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
pipx install 'nox[uv]'
nox --version
@@ -128,14 +126,14 @@ jobs:

- name: Upgrade pip
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
pip install pip
pip --version
- name: Install Nox
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
pipx install 'nox[uv]'
nox --version
@@ -161,7 +159,7 @@ jobs:

- name: Upgrade pip
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
pip install pip
pip --version
@@ -173,7 +171,7 @@ jobs:

- name: Install Nox
env:
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/constraints.txt
PIP_CONSTRAINT: ${{ github.workspace }}/.github/workflows/resources/requirements.txt
run: |
pipx install 'nox[uv]'
nox --version
2 changes: 1 addition & 1 deletion .github/workflows/version_bump.yml
Original file line number Diff line number Diff line change
@@ -51,7 +51,7 @@ jobs:

- name: Bump version
id: cz-bump
uses: commitizen-tools/commitizen-action@0.22.0
uses: commitizen-tools/commitizen-action@0.23.1
with:
increment: ${{ github.event.inputs.bump != 'auto' && github.event.inputs.bump || '' }}
prerelease: ${{ github.event.inputs.prerelease != 'none' && github.event.inputs.prerelease || '' }}
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -37,14 +37,14 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.30.0
rev: 0.31.0
hooks:
- id: check-dependabot
- id: check-github-workflows
- id: check-readthedocs

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.1
rev: v0.9.3
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
@@ -59,6 +59,6 @@ repos:
)$
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
rev: 2.0.1
hooks:
- id: poetry-check
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ version: 2
build:
os: ubuntu-24.04
tools:
python: "3.12"
python: "3.13"

sphinx:
builder: html
70 changes: 70 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -5,6 +5,76 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v0.44.1 (2025-01-29)

### 🐛 Fixes

- [#2847](https://github.com/meltano/sdk/issues/2847) Update Cookiecutter templates
- [#2844](https://github.com/meltano/sdk/issues/2844) Avoid writing an empty state -- _**Thanks @joaopamaral!**_
- [#2843](https://github.com/meltano/sdk/issues/2843) Use a SQLAlchemy to generate an insert statement

## v0.44.0 (2025-01-23)

### ✨ New

- [#2830](https://github.com/meltano/sdk/issues/2830) Allow developers to mark stream schema and settings fields as deprecated
- [#2829](https://github.com/meltano/sdk/issues/2829) Support a `x-sql-datatype` JSON Schema annotation to let targets customize SQL type handling
- [#2819](https://github.com/meltano/sdk/issues/2819) Add SHA256 encryption method to inline stream maps -- _**Thanks @ben-schulz-mh!**_

### 📦 Packaging changes

- [#2407](https://github.com/meltano/sdk/issues/2407) Use Poetry support for PEP 621
- [#2822](https://github.com/meltano/sdk/issues/2822) Make paramiko and transitive SSH dependencies optional
- [#2821](https://github.com/meltano/sdk/issues/2821) Require urllib3 < 2 on Python < 3.10

## v0.43.1 (2024-12-10)

### 🐛 Fixes

- [#2807](https://github.com/meltano/sdk/issues/2807) Allow developers to set `RESTStream.http_method`

## v0.43.0 (2024-12-10)

### ✨ New

- [#2482](https://github.com/meltano/sdk/issues/2482) Allow SQL tap developers to auto-skip certain schemas from discovery
- [#2784](https://github.com/meltano/sdk/issues/2784) Added a new built-in setting `activate_version` for targets to optionally disable processing of `ACTIVATE_VERSION` messages
- [#2780](https://github.com/meltano/sdk/issues/2780) Numeric values are now parsed as `decimal.Decimal` in REST and GraphQL stream responses
- [#2775](https://github.com/meltano/sdk/issues/2775) Log a stream's bookmark (if it's avaiable) when its sync starts
- [#2703](https://github.com/meltano/sdk/issues/2703) Targets now emit record count from the built-in batch file processor
- [#2774](https://github.com/meltano/sdk/issues/2774) Accept a `maxLength` limit for VARCHARs
- [#2769](https://github.com/meltano/sdk/issues/2769) Add `versioning-strategy` to dependabot config of Cookiecutter templates
- [#2765](https://github.com/meltano/sdk/issues/2765) The last received Singer message is now logged when the target fails
- [#2762](https://github.com/meltano/sdk/issues/2762) Support other content-types in REST streams

### 🐛 Fixes

- [#2790](https://github.com/meltano/sdk/issues/2790) Ensure the required global folder tap settings are merged into the concrete implementation settings
- [#2785](https://github.com/meltano/sdk/issues/2785) Use FS-specific `listdir` in folder tap
- [#2778](https://github.com/meltano/sdk/issues/2778) The path of the offending field is now printed for config validation errors
- [#2770](https://github.com/meltano/sdk/issues/2770) Respect standard Singer stream metadata `table-key-properties`, `replication-key` and `forced-replication-method`
- [#2755](https://github.com/meltano/sdk/issues/2755) Safely compare UUID replication keys with state bookmarks -- _**Thanks @nikzavada!**_

### ⚙️ Under the Hood

- [#2805](https://github.com/meltano/sdk/issues/2805) Rename setting `activate_version` to `process_activate_version_messages`
- [#2788](https://github.com/meltano/sdk/issues/2788) Fail early if input files to `--catalog` or `--state` do not exist
- [#2781](https://github.com/meltano/sdk/issues/2781) Added a class method to instantiate `SQLToJSONSchema` from the tap configuration
- [#2566](https://github.com/meltano/sdk/issues/2566) Standardize on JSON Schema Draft 2020-12 to validate stream schemas
- [#2751](https://github.com/meltano/sdk/issues/2751) Dropped support for Python 3.8

### ⚡ Performance Improvements

- [#2793](https://github.com/meltano/sdk/issues/2793) Improved discovery performance for SQL taps

### 📚 Documentation Improvements

- [#2796](https://github.com/meltano/sdk/issues/2796) Document how to configure nested stream maps values with environment variables in Meltano

### 📦 Packaging changes

- [#2797](https://github.com/meltano/sdk/issues/2797) SQL taps now require SQLAlchemy 2.0+

## v0.42.1 (2024-11-11)

### 🐛 Fixes
Original file line number Diff line number Diff line change
@@ -10,11 +10,12 @@ permissions:
jobs:
build:
runs-on: ubuntu-latest
outputs:
version: {{ '${{ steps.baipp.outputs.package_version }}' }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: hynek/build-and-inspect-python-package@v2
id: baipp

publish:
name: Publish to PyPI
@@ -23,7 +24,13 @@ jobs:
## TODO: optionally provide the name of the environment for the trusted
## publisher on PyPI
## https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment
# environment: pypi
# environment:
# name: pypi
{%- if cookiecutter.variant != "None (Skip)" %}
# url: https://pypi.org/project/{{cookiecutter.variant}}-{{cookiecutter.mapper_id}}/{{ '${{ steps.baipp.outputs.package_version }}' }}
{%- else %}
# url: https://pypi.org/project/{{cookiecutter.mapper_id}}/{{ '${{ steps.baipp.outputs.package_version }}' }}
{%- endif %}
if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/download-artifact@v4
@@ -42,4 +49,4 @@ jobs:
- name: Publish
## TODO: create a trusted publisher on PyPI
## https://docs.pypi.org/trusted-publishers/
uses: pypa/gh-action-pypi-publish@v1.12.2
uses: pypa/gh-action-pypi-publish@v1.12.3
Original file line number Diff line number Diff line change
@@ -18,19 +18,19 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.4
rev: 0.31.0
hooks:
- id: check-dependabot
- id: check-github-workflows

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.3
rev: v0.9.3
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.13.0
rev: v1.14.1
hooks:
- id: mypy
Original file line number Diff line number Diff line change
@@ -6,11 +6,11 @@ Built with the [Meltano Mapper SDK](https://sdk.meltano.com) for Singer Mappers.

<!--
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPi repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPI repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
## Installation
Install from PyPi:
Install from PyPI:
```bash
pipx install {{ cookiecutter.mapper_id }}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tool.poetry]
[project]
{%- if cookiecutter.variant != "None (Skip)" %}
name = "{{cookiecutter.variant}}-{{cookiecutter.mapper_id}}"
{%- else %}
@@ -7,14 +7,15 @@ name = "{{cookiecutter.mapper_id}}"
version = "0.0.1"
description = "Singer mapper {{cookiecutter.name}}, built with the Meltano Singer SDK."
readme = "README.md"
authors = ["{{ cookiecutter.admin_name }} <{{ cookiecutter.admin_email }}>"]
authors = [{ name = "{{ cookiecutter.admin_name }}", email = "{{ cookiecutter.admin_email }}" }]
keywords = [
"ELT",
"Mapper",
"{{cookiecutter.name}}",
]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
@@ -23,26 +24,32 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
license = "Apache-2.0"
license-files = [ "LICENSE" ]
requires-python = ">=3.9"
dynamic = ["dependencies"]

[tool.poetry]
{%- if cookiecutter.variant != "None (Skip)" %}
packages = [
{ include = "{{cookiecutter.library_name}}" },
]
{%- endif %}

[tool.poetry.dependencies]
python = ">=3.9"
singer-sdk = { version="~=0.42.1"{{ ', extras = ["faker"]' if cookiecutter.faker_extra }} }
singer-sdk = { version="~=0.44.1"{{ ', extras = ["faker"]' if cookiecutter.faker_extra }} }
fs-s3fs = { version = "~=1.1.1", optional = true }

[tool.poetry.group.dev.dependencies]
pytest = ">=8"
singer-sdk = { version="~=0.42.1", extras = ["testing"] }
singer-sdk = { version="~=0.44.1", extras = ["testing"] }

[tool.poetry.extras]
s3 = ["fs-s3fs"]

[tool.pytest.ini_options]
addopts = '--durations=10'
addopts = [
"--durations=10",
]

[tool.mypy]
python_version = "3.12"
@@ -53,10 +60,7 @@ target-version = "py39"

[tool.ruff.lint]
ignore = [
"ANN101", # missing-type-self
"ANN102", # missing-type-cls
"COM812", # missing-trailing-comma
"ISC001", # single-line-implicit-string-concatenation
]
select = ["ALL"]

@@ -67,7 +71,7 @@ allow-star-arg-any = true
convention = "google"

[build-system]
requires = ["poetry-core"]
requires = ["poetry-core>=2,<3"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
Original file line number Diff line number Diff line change
@@ -6,6 +6,8 @@ requires =
tox>=4.19

[testenv]
pass_env =
{{cookiecutter.mapper_id.replace('-', '_').upper()}}_*
deps =
pytest
commands =
Original file line number Diff line number Diff line change
@@ -10,11 +10,12 @@ permissions:
jobs:
build:
runs-on: ubuntu-latest
outputs:
version: {{ '${{ steps.baipp.outputs.package_version }}' }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: hynek/build-and-inspect-python-package@v2
id: baipp

publish:
name: Publish to PyPI
@@ -23,7 +24,13 @@ jobs:
## TODO: optionally provide the name of the environment for the trusted
## publisher on PyPI
## https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment
# environment: pypi
# environment:
# name: pypi
{%- if cookiecutter.variant != "None (Skip)" %}
# url: https://pypi.org/project/{{cookiecutter.variant}}-{{cookiecutter.tap_id}}/{{ '${{ steps.baipp.outputs.package_version }}' }}
{%- else %}
# url: https://pypi.org/project/{{cookiecutter.tap_id}}/{{ '${{ steps.baipp.outputs.package_version }}' }}
{%- endif %}
if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/download-artifact@v4
@@ -42,4 +49,4 @@ jobs:
- name: Publish
## TODO: create a trusted publisher on PyPI
## https://docs.pypi.org/trusted-publishers/
uses: pypa/gh-action-pypi-publish@v1.12.2
uses: pypa/gh-action-pypi-publish@v1.12.3
Original file line number Diff line number Diff line change
@@ -18,20 +18,20 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.4
rev: 0.31.0
hooks:
- id: check-dependabot
- id: check-github-workflows

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.3
rev: v0.9.3
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.13.0
rev: v1.14.1
hooks:
- id: mypy
additional_dependencies:
7 changes: 4 additions & 3 deletions cookiecutter/tap-template/{{cookiecutter.tap_id}}/README.md
Original file line number Diff line number Diff line change
@@ -6,11 +6,11 @@ Built with the [Meltano Tap SDK](https://sdk.meltano.com) for Singer Taps.

<!--
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPi repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPI repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
## Installation
Install from PyPi:
Install from PyPI:
```bash
pipx install {{ cookiecutter.tap_id }}
@@ -121,7 +121,8 @@ Now you can test and orchestrate using Meltano:
```bash
# Test invocation:
meltano invoke {{ cookiecutter.tap_id }} --version
# OR run a test `elt` pipeline:

# OR run a test ELT pipeline:
meltano run {{ cookiecutter.tap_id }} target-jsonl
```

29 changes: 18 additions & 11 deletions cookiecutter/tap-template/{{cookiecutter.tap_id}}/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tool.poetry]
[project]
{%- if cookiecutter.variant != "None (Skip)" %}
name = "{{cookiecutter.variant}}-{{cookiecutter.tap_id}}"
{%- else %}
@@ -7,13 +7,14 @@ name = "{{cookiecutter.tap_id}}"
version = "0.0.1"
description = "Singer tap for {{cookiecutter.source_name}}, built with the Meltano Singer SDK."
readme = "README.md"
authors = ["{{ cookiecutter.admin_name }} <{{ cookiecutter.admin_email }}>"]
authors = [{ name = "{{ cookiecutter.admin_name }}", email = "{{ cookiecutter.admin_email }}" }]
keywords = [
"ELT",
"{{cookiecutter.source_name}}",
]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
@@ -22,36 +23,45 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
license = "Apache-2.0"
license-files = [ "LICENSE" ]
requires-python = ">=3.9"
dynamic = ["dependencies"]

[tool.poetry]
{%- if cookiecutter.variant != "None (Skip)" %}
packages = [
{ include = "{{cookiecutter.library_name}}" },
]
{%- endif %}

[tool.poetry.dependencies]
python = ">=3.9"
singer-sdk = { version="~=0.42.1", extras = [
singer-sdk = { version="~=0.44.1", extras = [
{%- if cookiecutter.auth_method == "JWT" -%}"jwt", {% endif -%}
{%- if cookiecutter.faker_extra -%}"faker",{%- endif -%}
] }
fs-s3fs = { version = "~=1.1.1", optional = true }
{%- if cookiecutter.stream_type in ["REST", "GraphQL"] %}
requests = "~=2.32.3"
{%- endif %}
{%- if cookiecutter.stream_type == "SQL" %}
sqlalchemy = "~=2.0.36"
{%- endif %}

[tool.poetry.group.dev.dependencies]
pytest = ">=8"
{%- if cookiecutter.auth_method == "JWT" %}
singer-sdk = { version="~=0.42.1", extras = ["jwt", "testing"] }
singer-sdk = { version="~=0.44.1", extras = ["jwt", "testing"] }
{%- else %}
singer-sdk = { version="~=0.42.1", extras = ["testing"] }
singer-sdk = { version="~=0.44.1", extras = ["testing"] }
{%- endif %}

[tool.poetry.extras]
s3 = ["fs-s3fs"]

[tool.pytest.ini_options]
addopts = '--durations=10'
addopts = [
"--durations=10",
]

[tool.mypy]
python_version = "3.12"
@@ -67,10 +77,7 @@ target-version = "py39"

[tool.ruff.lint]
ignore = [
"ANN101", # missing-type-self
"ANN102", # missing-type-cls
"COM812", # missing-trailing-comma
"ISC001", # single-line-implicit-string-concatenation
]
select = ["ALL"]

@@ -81,7 +88,7 @@ allow-star-arg-any = true
convention = "google"

[build-system]
requires = ["poetry-core"]
requires = ["poetry-core>=2,<3"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
2 changes: 2 additions & 0 deletions cookiecutter/tap-template/{{cookiecutter.tap_id}}/tox.ini
Original file line number Diff line number Diff line change
@@ -6,6 +6,8 @@ requires =
tox>=4.19

[testenv]
pass_env =
{{cookiecutter.tap_id.replace('-', '_').upper()}}_*
deps =
pytest
commands =
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
import decimal
import typing as t

import requests # noqa: TCH002
import requests # noqa: TC002
from singer_sdk.streams import {{ cookiecutter.stream_type }}Stream

{%- if cookiecutter.auth_method in ("OAuth2", "JWT") %}
Original file line number Diff line number Diff line change
@@ -12,29 +12,29 @@
{% if cookiecutter.auth_method == "API Key" -%}
from singer_sdk.authenticators import APIKeyAuthenticator
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.pagination import BaseAPIPaginator # noqa: TCH002
from singer_sdk.pagination import BaseAPIPaginator # noqa: TC002
from singer_sdk.streams import {{ cookiecutter.stream_type }}Stream

{% elif cookiecutter.auth_method == "Bearer Token" -%}
from singer_sdk.authenticators import BearerTokenAuthenticator
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.pagination import BaseAPIPaginator # noqa: TCH002
from singer_sdk.pagination import BaseAPIPaginator # noqa: TC002
from singer_sdk.streams import {{ cookiecutter.stream_type }}Stream

{% elif cookiecutter.auth_method == "Basic Auth" -%}
from requests.auth import HTTPBasicAuth
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.pagination import BaseAPIPaginator # noqa: TCH002
from singer_sdk.pagination import BaseAPIPaginator # noqa: TC002
from singer_sdk.streams import {{ cookiecutter.stream_type }}Stream

{% elif cookiecutter.auth_method == "Custom or N/A" -%}
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.pagination import BaseAPIPaginator # noqa: TCH002
from singer_sdk.pagination import BaseAPIPaginator # noqa: TC002
from singer_sdk.streams import {{ cookiecutter.stream_type }}Stream

{% elif cookiecutter.auth_method in ("OAuth2", "JWT") -%}
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.pagination import BaseAPIPaginator # noqa: TCH002
from singer_sdk.pagination import BaseAPIPaginator # noqa: TC002
from singer_sdk.streams import {{ cookiecutter.stream_type }}Stream

from {{ cookiecutter.library_name }}.auth import {{ cookiecutter.source_name }}Authenticator
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@

import typing as t

import sqlalchemy # noqa: TCH002
import sqlalchemy # noqa: TC002
from singer_sdk import SQLConnector, SQLStream


Original file line number Diff line number Diff line change
@@ -10,11 +10,12 @@ permissions:
jobs:
build:
runs-on: ubuntu-latest
outputs:
version: {{ '${{ steps.baipp.outputs.package_version }}' }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: hynek/build-and-inspect-python-package@v2
id: baipp

publish:
name: Publish to PyPI
@@ -23,7 +24,13 @@ jobs:
## TODO: optionally provide the name of the environment for the trusted
## publisher on PyPI
## https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment
# environment: pypi
# environment:
# name: pypi
{%- if cookiecutter.variant != "None (Skip)" %}
# url: https://pypi.org/project/{{cookiecutter.variant}}-{{cookiecutter.target_id}}/{{ '${{ steps.baipp.outputs.package_version }}' }}
{%- else %}
# url: https://pypi.org/project/{{cookiecutter.target_id}}/{{ '${{ steps.baipp.outputs.package_version }}' }}
{%- endif %}
if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/download-artifact@v4
@@ -42,4 +49,4 @@ jobs:
- name: Publish
## TODO: create a trusted publisher on PyPI
## https://docs.pypi.org/trusted-publishers/
uses: pypa/gh-action-pypi-publish@v1.12.2
uses: pypa/gh-action-pypi-publish@v1.12.3
Original file line number Diff line number Diff line change
@@ -18,20 +18,20 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.4
rev: 0.31.0
hooks:
- id: check-dependabot
- id: check-github-workflows

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.3
rev: v0.9.3
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.13.0
rev: v1.14.1
hooks:
- id: mypy
additional_dependencies:
Original file line number Diff line number Diff line change
@@ -6,11 +6,11 @@ Build with the [Meltano Target SDK](https://sdk.meltano.com).

<!--
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPi repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPI repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
## Installation
Install from PyPi:
Install from PyPI:
```bash
pipx install {{ cookiecutter.target_id }}
@@ -66,8 +66,8 @@ You can easily run `{{ cookiecutter.target_id }}` by itself or in a pipeline usi
```bash
{{ cookiecutter.target_id }} --version
{{ cookiecutter.target_id }} --help
# Test using the "Carbon Intensity" sample:
tap-carbon-intensity | {{ cookiecutter.target_id }} --config /path/to/{{ cookiecutter.target_id }}-config.json
# Test using the "Smoke Test" tap:
tap-smoke-test | {{ cookiecutter.target_id }} --config /path/to/{{ cookiecutter.target_id }}-config.json
```

## Developer Resources
@@ -122,8 +122,9 @@ Now you can test and orchestrate using Meltano:
```bash
# Test invocation:
meltano invoke {{ cookiecutter.target_id }} --version
# OR run a test `elt` pipeline with the Carbon Intensity sample tap:
meltano run tap-carbon-intensity {{ cookiecutter.target_id }}

# OR run a test ELT pipeline with the Smoke Test sample tap:
meltano run tap-smoke-test {{ cookiecutter.target_id }}
```

### SDK Dev Guide
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tool.poetry]
[project]
{%- if cookiecutter.variant != "None (Skip)" %}
name = "{{cookiecutter.variant}}-{{cookiecutter.target_id}}"
{%- else %}
@@ -7,13 +7,14 @@ name = "{{cookiecutter.target_id}}"
version = "0.0.1"
description = "Singer target for {{cookiecutter.destination_name}}, built with the Meltano Singer SDK."
readme = "README.md"
authors = ["{{ cookiecutter.admin_name }} <{{ cookiecutter.admin_email }}>"]
authors = [{ name = "{{ cookiecutter.admin_name }}", email = "{{ cookiecutter.admin_email }}" }]
keywords = [
"ELT",
"{{cookiecutter.destination_name}}",
]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
@@ -22,30 +23,31 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
license = "Apache-2.0"
license-files = [ "LICENSE" ]
requires-python = ">=3.9"
dynamic = ["dependencies"]

[tool.poetry]
{%- if cookiecutter.variant != "None (Skip)" %}
packages = [
{ include = "{{cookiecutter.library_name}}" },
]
{%- endif %}

[tool.poetry.dependencies]
python = ">=3.9"
singer-sdk = { version="~=0.42.1"{{ ', extras = ["faker"]' if cookiecutter.faker_extra }} }
singer-sdk = { version="~=0.44.1"{{ ', extras = ["faker"]' if cookiecutter.faker_extra }} }
fs-s3fs = { version = "~=1.1.1", optional = true }
{%- if cookiecutter.serialization_method != "SQL" %}
requests = "~=2.32.3"
{%- endif %}

[tool.poetry.dev-dependencies]
pytest = ">=8"
singer-sdk = { version="~=0.42.1", extras = ["testing"] }
singer-sdk = { version="~=0.44.1", extras = ["testing"] }

[tool.poetry.extras]
s3 = ["fs-s3fs"]

[tool.pytest.ini_options]
addopts = '--durations=10'

[tool.mypy]
python_version = "3.12"
warn_unused_configs = true
@@ -55,10 +57,7 @@ target-version = "py39"

[tool.ruff.lint]
ignore = [
"ANN101", # missing-type-self
"ANN102", # missing-type-cls
"COM812", # missing-trailing-comma
"ISC001", # single-line-implicit-string-concatenation
]
select = ["ALL"]

@@ -69,7 +68,7 @@ allow-star-arg-any = true
convention = "google"

[build-system]
requires = ["poetry-core"]
requires = ["poetry-core>=2,<3"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
Original file line number Diff line number Diff line change
@@ -6,6 +6,8 @@ requires =
tox>=4.19

[testenv]
pass_env =
{{cookiecutter.target_id.replace('-', '_').upper()}}_*
deps =
pytest
commands =
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@
author = "Meltano Core Team and Contributors"

# The full version, including alpha/beta/rc tags
release = "0.42.1"
release = "0.44.1"


# -- General configuration -------------------------------------------------------------
4 changes: 4 additions & 0 deletions docs/guides/sql-tap.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Building SQL taps

```{warning}
Starting with version `0.43.0`, SQL taps require SQLAlchemy 2.0 or newer.
```

## Mapping SQL types to JSON Schema

Starting with version `0.41.0`, the Meltano Singer SDK provides a clean way to map SQL types to JSON Schema. This is useful when the SQL dialect you are using has custom types that need to be mapped accordingly to JSON Schema.
53 changes: 53 additions & 0 deletions docs/guides/sql-target.md
Original file line number Diff line number Diff line change
@@ -56,3 +56,56 @@ class MyConnector(SQLConnector):
to_sql.register_format_handler("uri", URI)
return to_sql
```

### Use the `x-sql-datatype` JSON Schema extension

You can register new type handlers for the `x-sql-datatype` extension:

```python
from my_sqlalchemy_dialect import URI


class MyConnector(SQLConnector):
@functools.cached_property
def jsonschema_to_sql(self):
to_sql = JSONSchemaToSQL()
to_sql.register_sql_datatype_handler("smallint", sa.types.SMALLINT)
return to_sql
```

Then you can annotate the tap' catalog to specify the SQL type:

````{tab} meltano.yml
```yaml
# https://docs.meltano.com/concepts/plugins/#schema-extra
plugins:
extractors:
- name: tap-example
schema:
addresses:
number:
x-sql-datatype: smallint
```
````

````{tab} JSON catalog
```json
{
"streams": [
{
"stream": "addresses",
"tap_stream_id": "addresses",
"schema": {
"type": "object",
"properties": {
"number": {
"type": "integer",
"x-sql-datatype": "smallint"
}
}
}
}
]
}
```
````
2 changes: 1 addition & 1 deletion docs/python_tips.md
Original file line number Diff line number Diff line change
@@ -39,7 +39,7 @@ creating and managing virtual environments, these two tools automate the process
`poetry add -D <pippable-dev-only-ref>`.
- If version conflicts occur, relax the version constraints in `pyproject.toml` for the
libraries where the conflict is reported, then try again.
- Poetry can also publish your libraries to PyPi.
- Poetry can also publish your libraries to PyPI.
- **pipx**: Install pipx once, and then use pipx _instead of_ pip.
- If you are using poetry for development, then all other pip-installables should be
executable tools and programs, which is what pipx is designed for.
39 changes: 34 additions & 5 deletions docs/stream_maps.md
Original file line number Diff line number Diff line change
@@ -230,11 +230,12 @@ can be referenced directly by mapping expressions.

The following functions and namespaces are available for use in mapping expressions:

| Function | Description |
| :------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [`md5()`](inv:python:py:module:#hashlib) | Returns an inline MD5 hash of any string, outputting the string representation of the hash's hex digest. This is defined by the SDK internally with native python: [`hashlib.md5(<input>.encode("utf-8")).hexdigest()`](inv:python:py:method:#hashlib.hash.hexdigest). |
| [`datetime`](inv:python:py:module:#datetime) | This is the datetime module object from the Python standard library. You can access [`datetime.datetime`](inv:python:py:class:#datetime.datetime), [`datetime.timedelta`](inv:python:py:class:#datetime.timedelta), etc. |
| [`json`](inv:python:py:module:#json) | This is the json module object from the Python standard library. Primarily used for calling [`json.dumps()`](inv:python:py:function:#json.dumps) and [`json.loads()`](inv:python:py:function:#json.loads). |
| Function | Description |
| :--------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [`md5()`](inv:python:py:function:#hashlib.md5) | Returns an inline MD5 hash of any string, outputting the string representation of the hash's hex digest. This is defined by the SDK internally with native python: [`hashlib.md5(<input>.encode("utf-8")).hexdigest()`](inv:python:py:method:#hashlib.hash.hexdigest). |
| [`sha256()`](inv:python:py:function:#hashlib.sha256) | Returns an inline SHA256 hash of any string, outputting the string representation of the hash's hex digest. This is defined by the SDK internally with native python: [`hashlib.sha256(<input>.encode("utf-8")).hexdigest()`](inv:python:py:method:#hashlib.hash.hexdigest). |
| [`datetime`](inv:python:py:module:#datetime) | This is the datetime module object from the Python standard library. You can access [`datetime.datetime`](inv:python:py:class:#datetime.datetime), [`datetime.timedelta`](inv:python:py:class:#datetime.timedelta), etc. |
| [`json`](inv:python:py:module:#json) | This is the json module object from the Python standard library. Primarily used for calling [`json.dumps()`](inv:python:py:function:#json.dumps) and [`json.loads()`](inv:python:py:function:#json.loads). |

#### Built-in Variable Names

@@ -778,3 +779,31 @@ the `key_properties` in an extract-load pipeline. For instance, it is common to
"append-only" loading behavior in certain targets, as may be required for historical reporting. This does not change the
underlying nature of the `primary_key` configuration in the upstream source data, only how it will be landed or deduped
in the downstream source.


### Q: How do I use Meltano environment variables to configure stream maps?

**Answer:** Environment variables in Meltano can be used to configure stream maps, but you first need to add the corresponding settings
to your plugins `settings` option. For example:

```yaml
plugins:
extractors:
- name: tap-csv
variant: meltanolabs
pip_url: git+https://github.com/MeltanoLabs/tap-csv.git
settings:
- name: stream_maps.customers.email
- name: stream_maps.customers.email_domain
- name: stream_maps.customers.email_hash
- name: stream_maps.customers.__else__
- name: stream_maps.stream_map_config
```

Then, you can set the following environment variables:

```shell
TAP_CSV_STREAM_MAPS_CUSTOMERS_EMAIL_DOMAIN='email.split("@")[-1]'
TAP_CSV_STREAM_MAPS_CUSTOMERS_EMAIL_HASH='md5(config["hash_seed"] + email)'
TAP_CSV_STREAM_MAP_CONFIG_HASH_SEED='01AWZh7A6DzGm6iJZZ2T'
```
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
@@ -105,7 +105,7 @@ def benches(session: nox.Session) -> None:
@nox.session(name="deps", python=main_python_version)
def dependencies(session: nox.Session) -> None:
"""Check issues with dependencies."""
session.install(".[s3,testing]")
session.install(".[docs,faker,jwt,parquet,s3,ssh,testing]")
session.install("deptry")
session.run("deptry", "singer_sdk", *session.posargs)

2,101 changes: 1,164 additions & 937 deletions poetry.lock

Large diffs are not rendered by default.

199 changes: 97 additions & 102 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
[tool.poetry]
[project]
name = "singer-sdk"
version = "0.0.0"
dynamic = [
"version"
]
description = "A framework for building Singer taps"
authors = ["Meltano Team and Contributors <hello@meltano.com>"]
maintainers = ["Meltano Team and Contributors <hello@meltano.com>"]
readme = "README.md"
homepage = "https://sdk.meltano.com/en/latest/"
repository = "https://github.com/meltano/sdk"
documentation = "https://sdk.meltano.com/en/latest/"
authors = [{ name = "Meltano Team and Contributors", email = "hello@meltano.com" }]
maintainers = [{ name = "Meltano Team and Contributors", email = "hello@meltano.com" }]
keywords = [
"Meltano",
"Singer",
@@ -28,101 +26,106 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Application Frameworks",
"Typing :: Typed",
]
license = "Apache-2.0"

[tool.poetry.urls]
"Issue Tracker" = "https://github.com/meltano/sdk/issues"
"Twitter" = "https://twitter.com/meltanodata/"
"Changelog" = "https://github.com/meltano/sdk/blob/main/CHANGELOG.md"
"Slack" = "https://meltano.com/slack"
"Youtube" = "https://www.youtube.com/meltano"

[tool.poetry.dependencies]
python = ">=3.9"
backoff = { version = ">=2.0.0", python = "<4" }
backports-datetime-fromisoformat = { version = ">=2.0.1", python = "<3.11" }
click = "~=8.0"
fs = ">=2.4.16"
fsspec = ">=2024.9.0"
importlib-metadata = { version = "<9.0.0", python = "<3.12" }
importlib-resources = { version = ">=5.12.0,!=6.2.0,!=6.3.0,!=6.3.1", python = "<3.10" }
inflection = ">=0.5.1"
joblib = ">=1.3.0"
jsonpath-ng = ">=1.5.3"
jsonschema = ">=4.16.0"
packaging = ">=23.1"
python-dotenv = ">=0.20"
PyYAML = ">=6.0"
referencing = ">=0.30.0"
requests = ">=2.25.1"
# TODO: remove this constraint once we get rid of the `fs` dependency
# newer setuptools versions are incompatible with some dependencies (fs)
setuptools = "<=70.3.0"
simpleeval = ">=0.9.13,!=1.0.1"
simplejson = ">=3.17.6"
sqlalchemy = ">=1.4,<3.0"
typing-extensions = ">=4.5.0"
readme = "README.md"
license.file = "LICENSE"
requires-python = ">=3.9"

dependencies = [
'backoff>=2.0.0; python_version<"4"',
'backports-datetime-fromisoformat>=2.0.1; python_version<"3.11"',
"click~=8.0",
"fs>=2.4.16",
"fsspec>=2024.9.0",
'importlib-metadata<9.0.0; python_version<"3.12"',
'importlib-resources>=5.12.0,!=6.2.0,!=6.3.0,!=6.3.1; python_version<"3.10"',
"inflection>=0.5.1",
"joblib>=1.3.0",
"jsonpath-ng>=1.5.3",
"jsonschema>=4.16.0",
"packaging>=23.1",
"python-dotenv>=0.20",
"PyYAML>=6.0",
"referencing>=0.30.0",
"requests>=2.25.1",
# TODO: remove this constraint once we get rid of the `fs` dependency
# newer setuptools versions are incompatible with some dependencies (fs)
"setuptools<=70.3.0",
"simpleeval>=0.9.13,!=1.0.1",
"simplejson>=3.17.6",
"sqlalchemy>=1.4,<3.0",
"typing-extensions>=4.5.0",
'urllib3<2; python_version<"3.10"',
'urllib3; python_version>="3.10"',
]

[project.optional-dependencies]
# Sphinx dependencies installed as optional 'docs' extras
# https://github.com/readthedocs/readthedocs.org/issues/4912#issuecomment-664002569
furo = {version = ">=2024.5.6", optional = true}
myst-parser = {version = ">=3", optional = true}
sphinx = {version = ">=7", optional = true}
sphinx-copybutton = {version = ">=0.5.2", optional = true}
sphinx-inline-tabs = {version = ">=2023.4.21", optional = true}
sphinx-notfound-page = {version = ">=1.0.0", optional = true}
sphinx-reredirects = {version = ">=0.1.5", optional = true}
docs = [
"furo>=2024.5.6",
"myst-parser>=3",
"pytest>=7.2.1",
"sphinx>=7",
"sphinx-copybutton>=0.5.2",
"sphinx-inline-tabs>=2023.4.21",
"sphinx-notfound-page>=1.0.0",
"sphinx-reredirects>=0.1.5",
]

# File storage dependencies installed as optional 'filesystem' extras
fs-s3fs = {version = ">=1.1.1", optional = true}
s3fs = { version = ">=2024.9.0", optional = true }
s3 = [
"fs-s3fs>=1.1.1",
"s3fs>=2024.9.0",
]

# Parquet file dependencies installed as optional 'parquet' extras
# We add Python constraints to force Poetry to add the latest supported Numpy version
# for all Python versions to 'poetry.lock'. If we don't do this, Poetry will add only
# the version of Numpy that is compatible with the earliest Python version supported
# by this project, but that may not be compatible with the latest Python version.
numpy = [
{ version = ">=1.22,<2.1", python = "==3.9", optional = true },
{ version = ">=1.22", python = ">=3.10", optional = true },
parquet = [
"numpy>=1.22,<2.1; python_version=='3.9'",
"numpy>=1.22; python_version>='3.10'",
"pyarrow>=13",
]
pyarrow = { version = ">=13", optional = true }

# Testing dependencies installed as optional 'testing' extras
pytest = {version=">=7.2.1", optional = true}
testing = [
"pytest>=7.2.1",
]

# installed as optional 'faker' extra
faker = {version = ">=22.5", optional = true}
faker = [
"faker>=22.5",
]

# Crypto extras
cryptography = { version = ">=3.4.6", optional = true }
PyJWT = { version = "~=2.4", optional = true }

# SSH extras
paramiko = ">=3.3.0"

[tool.poetry.extras]
jwt = [
"cryptography",
"PyJWT",
]
docs = [
"furo",
"myst-parser",
"pytest",
"sphinx",
"sphinx-copybutton",
"sphinx-inline-tabs",
"sphinx-notfound-page",
"sphinx-reredirects",
"cryptography>=3.4.6",
"PyJWT~=2.4",
]
s3 = ["fs-s3fs", "s3fs"]
ssh = ["paramiko"]
testing = [
"pytest",

# SSH extras
ssh = [
"paramiko>=3.3.0",
]
parquet = ["numpy", "pyarrow"]
faker = ["faker"]

[project.urls]
Homepage = "https://sdk.meltano.com/en/latest/"
Repository = "https://github.com/meltano/sdk"
Documentation = "https://sdk.meltano.com/en/latest/"
"Issue Tracker" = "https://github.com/meltano/sdk/issues"
"Twitter" = "https://twitter.com/meltanodata/"
"Changelog" = "https://github.com/meltano/sdk/blob/main/CHANGELOG.md"
"Slack" = "https://meltano.com/slack"
"Youtube" = "https://www.youtube.com/meltano"

[tool.poetry]
requires-poetry = ">=2.0"
version = "0.0.0"

[tool.poetry.requires-plugins]
poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = ["plugin"] }

[tool.poetry.group.dev.dependencies]
coverage = {extras = ["toml"], version = ">=7.4"}
@@ -187,7 +190,7 @@ xfail_strict = false

[tool.commitizen]
name = "cz_version_bump"
version = "0.42.1"
version = "0.44.1"
changelog_merge_prerelease = true
prerelease_offset = 1
tag_format = "v$major.$minor.$patch$prerelease"
@@ -235,39 +238,29 @@ show_missing = true

[tool.deptry]
known_first_party = ["singer_sdk"]
pep621_dev_dependency_groups = ["testing"]
pep621_dev_dependency_groups = ["docs", "testing"]

[tool.deptry.package_module_name_map]
backports-datetime-fromisoformat = "backports"
importlib-metadata = "importlib_metadata"
importlib-resources = "importlib_resources"
PyJWT = "jwt"
types-jsonschema = "jsonschema"
types-pytz = "pytz"
types-PyYAML = "yaml"
types-requests = "requests"

[tool.deptry.per_rule_ignores]
DEP002 = [
# Transitive constraints
"numpy",
"setuptools",
"urllib3",
# Python version-specific dependencies
"backports-datetime-fromisoformat",
# Docs extras
"furo",
"myst-parser",
"sphinx",
"sphinx-copybutton",
"sphinx-inline-tabs",
"sphinx-notfound-page",
"sphinx-reredirects",
# Plugins
"paramiko",
"fs-s3fs",
"s3fs",
]
DEP004 = [
# TODO: Make pytest a runtime dependency?
"pytest",
]

[tool.mypy]
enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
@@ -296,13 +289,16 @@ enable = true
style = "pep440"

[build-system]
requires = ["poetry-core==1.9.0", "poetry-dynamic-versioning==1.4.0"]
requires = [
"poetry-core>=2,<3",
"poetry-dynamic-versioning",
]
build-backend = "poetry_dynamic_versioning.backend"

[tool.poetry.plugins."pytest11"]
[project.entry-points."pytest11"]
singer_testing = "singer_sdk.testing.pytest_plugin"

[tool.poetry.plugins."singer_sdk.batch_encoders"]
[project.entry-points."singer_sdk.batch_encoders"]
jsonl = "singer_sdk.contrib.batch_encoder_jsonl:JSONLinesBatcher"
parquet = "singer_sdk.contrib.batch_encoder_parquet:ParquetBatcher"

@@ -311,7 +307,6 @@ extend-exclude = [
"cookiecutter/*",
]
line-length = 88
target-version = "py39"

[tool.ruff.format]
docstring-code-format = true
22 changes: 0 additions & 22 deletions samples/sample_tap_bigquery/__init__.py
Original file line number Diff line number Diff line change
@@ -13,28 +13,6 @@ def get_sqlalchemy_url(self, config: dict) -> str: # noqa: PLR6301
"""Concatenate a SQLAlchemy URL for use in connecting to the source."""
return f"bigquery://{config['project_id']}"

def get_object_names(
self,
engine,
inspected,
schema_name: str,
) -> list[tuple[str, bool]]:
"""Return discoverable object names."""
# Bigquery inspections returns table names in the form
# `schema_name.table_name` which later results in the project name
# override due to specifics in behavior of sqlalchemy-bigquery
#
# Let's strip `schema_name` prefix on the inspection

return [
(table_name.split(".")[-1], is_view)
for (table_name, is_view) in super().get_object_names(
engine,
inspected,
schema_name,
)
]


class BigQueryStream(SQLStream):
"""Stream class for BigQuery streams."""
8 changes: 4 additions & 4 deletions samples/sample_tap_dummy_json/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@ ci:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-json
exclude: |
@@ -18,20 +18,20 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.28.6
rev: 0.30.0
hooks:
- id: check-dependabot
- id: check-github-workflows

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0
rev: v0.8.1
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --show-fixes]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.10.1
rev: v1.13.0
hooks:
- id: mypy
additional_dependencies:
4 changes: 2 additions & 2 deletions samples/sample_tap_dummy_json/README.md
Original file line number Diff line number Diff line change
@@ -6,11 +6,11 @@ Built with the [Meltano Tap SDK](https://sdk.meltano.com) for Singer Taps.

<!--
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPi repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
Developer TODO: Update the below as needed to correctly describe the install procedure. For instance, if you do not have a PyPI repo, or if you want users to directly install from your git repo, you can modify this step as appropriate.
## Installation
Install from PyPi:
Install from PyPI:
```bash
pipx install tap-dummyjson
17 changes: 5 additions & 12 deletions samples/sample_tap_dummy_json/tox.ini
Original file line number Diff line number Diff line change
@@ -2,18 +2,11 @@

[tox]
envlist = py3{9,10,11,12,13}
isolated_build = true
requires =
tox>=4.19

[testenv]
allowlist_externals = poetry
deps =
pytest
commands =
poetry install -v
poetry run pytest

[testenv:pytest]
# Run the python tests.
# To execute, run `tox -e pytest`
envlist = py3{8,9,10,11,12,313}
commands =
poetry install -v
poetry run pytest
pytest {posargs}
42 changes: 42 additions & 0 deletions samples/sample_target_sqlite/__init__.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,8 @@

from __future__ import annotations

import datetime
import sqlite3
import typing as t

from singer_sdk import SQLConnector, SQLSink, SQLTarget
@@ -10,6 +12,46 @@
DB_PATH_CONFIG = "path_to_db"


def adapt_date_iso(val):
"""Adapt datetime.date to ISO 8601 date."""
return val.isoformat()


def adapt_datetime_iso(val):
"""Adapt datetime.datetime to timezone-naive ISO 8601 date."""
return val.isoformat()


def adapt_datetime_epoch(val):
"""Adapt datetime.datetime to Unix timestamp."""
return int(val.timestamp())


sqlite3.register_adapter(datetime.date, adapt_date_iso)
sqlite3.register_adapter(datetime.datetime, adapt_datetime_iso)
sqlite3.register_adapter(datetime.datetime, adapt_datetime_epoch)


def convert_date(val):
"""Convert ISO 8601 date to datetime.date object."""
return datetime.date.fromisoformat(val.decode())


def convert_datetime(val):
"""Convert ISO 8601 datetime to datetime.datetime object."""
return datetime.datetime.fromisoformat(val.decode())


def convert_timestamp(val):
"""Convert Unix epoch timestamp to datetime.datetime object."""
return datetime.datetime.fromtimestamp(int(val), tz=datetime.timezone.utc)


sqlite3.register_converter("date", convert_date)
sqlite3.register_converter("datetime", convert_datetime)
sqlite3.register_converter("timestamp", convert_timestamp)


class SQLiteConnector(SQLConnector):
"""The connector for SQLite.
3 changes: 1 addition & 2 deletions singer_sdk/_singerlib/catalog.py
Original file line number Diff line number Diff line change
@@ -268,8 +268,7 @@ def _breadcrumb_is_selected(self, breadcrumb: Breadcrumb) -> bool: # noqa: PLR0
return md_entry.selected_by_default

logger.debug(
"Selection metadata omitted for '%s'. "
"Using parent value of selected=%s.",
"Selection metadata omitted for '%s'. Using parent value of selected=%s.",
breadcrumb,
parent_value,
)
24 changes: 18 additions & 6 deletions singer_sdk/_singerlib/schema.py
Original file line number Diff line number Diff line change
@@ -42,6 +42,8 @@
"anyOf",
"patternProperties",
"allOf",
# JSON Schema extensions
"x-sql-datatype",
]


@@ -84,6 +86,9 @@ class Schema:
contentMediaType: str | None = None # noqa: N815
contentEncoding: str | None = None # noqa: N815

# JSON Schema extensions
x_sql_datatype: str | None = None

def to_dict(self) -> dict[str, t.Any]:
"""Return the raw JSON Schema as a (possibly nested) dict.
@@ -99,12 +104,14 @@ def to_dict(self) -> dict[str, t.Any]:
result["items"] = self.items.to_dict()

for key in STANDARD_KEYS:
if self.__dict__.get(key) is not None:
result[key] = self.__dict__[key]
attr = key.replace("-", "_")
if (val := self.__dict__.get(attr)) is not None:
result[key] = val

for key in META_KEYS:
if self.__dict__.get(key) is not None:
result[f"${key}"] = self.__dict__[key]
attr = key.replace("-", "_")
if (val := self.__dict__.get(attr)) is not None:
result[f"${key}"] = val

return result

@@ -142,6 +149,7 @@ def from_dict(
... "description": "Age in years which must be equal to or greater than zero.",
... "type": "integer",
... "minimum": 0,
... "x-sql-datatype": "smallint",
... },
... },
... "required": ["firstName", "lastName"],
@@ -153,6 +161,8 @@ def from_dict(
"The person's first name."
>>> schema.properties["age"].minimum
0
>>> schema.properties["age"].x_sql_datatype
'smallint'
>>> schema.schema
'http://json-schema.org/draft/2020-12/schema'
""" # noqa: E501
@@ -168,12 +178,14 @@ def from_dict(
kwargs["items"] = cls.from_dict(items, **schema_defaults)

for key in STANDARD_KEYS:
attr = key.replace("-", "_")
if key in data:
kwargs[key] = data[key]
kwargs[attr] = data[key]

for key in META_KEYS:
attr = key.replace("-", "_")
if f"${key}" in data:
kwargs[key] = data[f"${key}"]
kwargs[attr] = data[f"${key}"]

return cls(**kwargs)

190 changes: 136 additions & 54 deletions singer_sdk/connectors/sql.py
Original file line number Diff line number Diff line change
@@ -13,10 +13,12 @@
from functools import lru_cache

import sqlalchemy as sa
from sqlalchemy.engine import reflection

from singer_sdk import typing as th
from singer_sdk._singerlib import CatalogEntry, MetadataMapping, Schema
from singer_sdk.exceptions import ConfigValidationError
from singer_sdk.helpers._compat import SingerSDKDeprecationWarning
from singer_sdk.helpers._util import dump_json, load_json
from singer_sdk.helpers.capabilities import TargetLoadMethods

@@ -253,6 +255,10 @@ class JSONSchemaToSQL:
This class provides a mapping from JSON Schema types to SQLAlchemy types.
.. versionadded:: 0.42.0
.. versionchanged:: 0.44.0
Added the
:meth:`singer_sdk.connectors.sql.JSONSchemaToSQL.register_sql_datatype_handler`
method to map custom ``x-sql-datatype`` annotations into SQLAlchemy types.
"""

def __init__(self, *, max_varchar_length: int | None = None) -> None:
@@ -290,6 +296,8 @@ def __init__(self, *, max_varchar_length: int | None = None) -> None:
"singer.decimal": self._handle_singer_decimal,
}

self._sql_datatype_mapping: dict[str, JSONtoSQLHandler] = {}

self._fallback_type: type[sa.types.TypeEngine] = sa.types.VARCHAR

@classmethod
@@ -392,6 +400,25 @@ def register_format_handler(
""" # noqa: E501
self._format_handlers[format_name] = handler

def register_sql_datatype_handler(
self,
sql_datatype: str,
handler: JSONtoSQLHandler,
) -> None:
"""Register a custom ``x-sql-datatype`` handler.
Args:
sql_datatype: The x-sql-datatype string.
handler: Either a SQLAlchemy type class or a callable that takes a schema
dict and returns a SQLAlchemy type instance.
Example:
>>> from sqlalchemy.types import SMALLINT
>>> to_sql = JSONSchemaToSQL()
>>> to_sql.register_sql_datatype_handler("smallint", SMALLINT)
"""
self._sql_datatype_mapping[sql_datatype] = handler

def handle_multiple_types(self, types: t.Sequence[str]) -> sa.types.TypeEngine: # noqa: ARG002, PLR6301
"""Handle multiple types by returning a VARCHAR.
@@ -428,10 +455,20 @@ def _get_type_from_schema(self, schema: dict) -> sa.types.TypeEngine | None:
Returns:
SQL type if one can be determined, None otherwise.
"""
# Check if this is a string with format first
if schema.get("type") == "string" and "format" in schema:
format_type = self._handle_format(schema)
if format_type is not None:
# Check x-sql-datatype first
if x_sql_datatype := schema.get("x-sql-datatype"):
if handler := self._sql_datatype_mapping.get(x_sql_datatype):
return self._invoke_handler(handler, schema)

warnings.warn(
f"This target does not support the x-sql-datatype '{x_sql_datatype}'",
UserWarning,
stacklevel=2,
)

# Check if this is a string with format then
if schema.get("type") == "string" and "format" in schema: # noqa: SIM102
if (format_type := self._handle_format(schema)) is not None:
return format_type

# Then check regular types
@@ -867,6 +904,10 @@ def create_engine(self) -> Engine:
pool_pre_ping=True,
)

@deprecated(
"This method is deprecated. Use or override `FullyQualifiedName` instead.",
category=SingerSDKDeprecationWarning,
)
def quote(self, name: str) -> str:
"""Quote a name if it needs quoting, using '.' as a name-part delimiter.
@@ -880,7 +921,7 @@ def quote(self, name: str) -> str:
Returns:
str: The quoted name.
"""
return ".".join(
return ".".join( # pragma: no cover
[
self._dialect.identifier_preparer.quote(name_part)
for name_part in name.split(".")
@@ -911,7 +952,12 @@ def get_schema_names( # noqa: PLR6301
"""
return inspected.get_schema_names()

def get_object_names(
@deprecated(
"This method is deprecated.",
category=SingerSDKDeprecationWarning,
stacklevel=1,
)
def get_object_names( # pragma: no cover
self,
engine: Engine, # noqa: ARG002
inspected: Inspector,
@@ -941,10 +987,14 @@ def get_object_names(
def discover_catalog_entry(
self,
engine: Engine, # noqa: ARG002
inspected: Inspector,
schema_name: str,
inspected: Inspector, # noqa: ARG002
schema_name: str | None,
table_name: str,
is_view: bool, # noqa: FBT001
*,
reflected_columns: list[reflection.ReflectedColumn] | None = None,
reflected_pk: reflection.ReflectedPrimaryKeyConstraint | None = None,
reflected_indices: list[reflection.ReflectedIndex] | None = None,
) -> CatalogEntry:
"""Create `CatalogEntry` object for the given table or a view.
@@ -954,44 +1004,48 @@ def discover_catalog_entry(
schema_name: Schema name to inspect
table_name: Name of the table or a view
is_view: Flag whether this object is a view, returned by `get_object_names`
reflect_indices: Whether to reflect indices
reflected_columns: List of reflected columns
reflected_pk: Reflected primary key
reflected_indices: List of reflected indices
Returns:
`CatalogEntry` object for the given table or a view
"""
# Initialize unique stream name
unique_stream_id = f"{schema_name}-{table_name}"
unique_stream_id = f"{schema_name}-{table_name}" if schema_name else table_name

# Backwards-compatibility
reflected_columns = reflected_columns or []
reflected_indices = reflected_indices or []

# Detect key properties
possible_primary_keys: list[list[str]] = []
pk_def = inspected.get_pk_constraint(table_name, schema=schema_name)
if pk_def and "constrained_columns" in pk_def: # type: ignore[redundant-expr]
possible_primary_keys.append(pk_def["constrained_columns"])
if reflected_pk and "constrained_columns" in reflected_pk:
possible_primary_keys.append(reflected_pk["constrained_columns"])

# An element of the columns list is ``None`` if it's an expression and is
# returned in the ``expressions`` list of the reflected index.
possible_primary_keys.extend(
index_def["column_names"] # type: ignore[misc]
for index_def in inspected.get_indexes(table_name, schema=schema_name)
for index_def in reflected_indices
if index_def.get("unique", False)
)

key_properties = next(iter(possible_primary_keys), None)
key_properties = next(iter(possible_primary_keys), [])

# Initialize columns list
table_schema = th.PropertiesList()
for column_def in inspected.get_columns(table_name, schema=schema_name):
column_name = column_def["name"]
is_nullable = column_def.get("nullable", False)
jsonschema_type: dict = self.to_jsonschema_type(column_def["type"])
table_schema.append(
th.Property(
name=column_name,
wrapped=th.CustomType(jsonschema_type),
nullable=is_nullable,
required=column_name in key_properties if key_properties else False,
),
properties = [
th.Property(
name=column["name"],
wrapped=th.CustomType(self.to_jsonschema_type(column["type"])),
nullable=column.get("nullable", False),
required=column["name"] in key_properties,
description=column.get("comment"),
)
schema = table_schema.to_dict()
for column in reflected_columns
]
schema = th.PropertiesList(*properties).to_dict()

# Initialize available replication methods
addl_replication_methods: list[str] = [""] # By default an empty list.
@@ -1024,30 +1078,59 @@ def discover_catalog_entry(
replication_key=None, # Must be defined by user
)

def discover_catalog_entries(self) -> list[dict]:
def discover_catalog_entries(
self,
*,
exclude_schemas: t.Sequence[str] = (),
reflect_indices: bool = True,
) -> list[dict]:
"""Return a list of catalog entries from discovery.
Args:
exclude_schemas: A list of schema names to exclude from discovery.
reflect_indices: Whether to reflect indices to detect potential primary
keys.
Returns:
The discovered catalog entries as a list.
"""
result: list[dict] = []
engine = self._engine
inspected = sa.inspect(engine)
object_kinds = (
(reflection.ObjectKind.TABLE, False),
(reflection.ObjectKind.ANY_VIEW, True),
)
for schema_name in self.get_schema_names(engine, inspected):
# Iterate through each table and view
for table_name, is_view in self.get_object_names(
engine,
inspected,
schema_name,
):
catalog_entry = self.discover_catalog_entry(
engine,
inspected,
schema_name,
table_name,
is_view,
if schema_name in exclude_schemas:
continue

primary_keys = inspected.get_multi_pk_constraint(schema=schema_name)

if reflect_indices:
indices = inspected.get_multi_indexes(schema=schema_name)
else:
indices = {}

for object_kind, is_view in object_kinds:
columns = inspected.get_multi_columns(
schema=schema_name,
kind=object_kind,
)

result.extend(
self.discover_catalog_entry(
engine,
inspected,
schema_name,
table,
is_view,
reflected_columns=columns[schema, table],
reflected_pk=primary_keys.get((schema, table)),
reflected_indices=indices.get((schema, table), []),
).to_dict()
for schema, table in columns
)
result.append(catalog_entry.to_dict())

return result

@@ -1281,8 +1364,7 @@ def prepare_schema(self, schema_name: str) -> None:
Args:
schema_name: The target schema name.
"""
schema_exists = self.schema_exists(schema_name)
if not schema_exists:
if not self.schema_exists(schema_name):
self.create_schema(schema_name)

def prepare_table(
@@ -1491,19 +1573,19 @@ def _get_type_sort_key(
) -> tuple[int, int]:
# return rank, with higher numbers ranking first

_len = int(getattr(sql_type, "length", 0) or 0)
len_ = int(getattr(sql_type, "length", 0) or 0)

_pytype = t.cast("type", sql_type.python_type)
if issubclass(_pytype, (str, bytes)):
return 900, _len
if issubclass(_pytype, datetime):
return 600, _len
if issubclass(_pytype, float):
return 400, _len
if issubclass(_pytype, int):
return 300, _len
pytype = t.cast("type", sql_type.python_type)
if issubclass(pytype, (str, bytes)):
return 900, len_
if issubclass(pytype, datetime):
return 600, len_
if issubclass(pytype, float):
return 400, len_
if issubclass(pytype, int):
return 300, len_

return 0, _len
return 0, len_

return sorted(sql_types, key=_get_type_sort_key, reverse=True)

15 changes: 11 additions & 4 deletions singer_sdk/contrib/filesystem/tap.py
Original file line number Diff line number Diff line change
@@ -105,6 +105,8 @@ class FolderTap(Tap, t.Generic[_T]):
This should be a subclass of `FileStream`.
"""

dynamic_catalog: bool = True

config_jsonschema: t.ClassVar[dict] = {"properties": {}}

@classmethod
@@ -124,11 +126,16 @@ def append_builtin_config(cls: type[FolderTap], config_jsonschema: dict) -> None
config_jsonschema: [description]
"""

def _merge_missing(source_jsonschema: dict, target_jsonschema: dict) -> None:
def _merge_missing(src: dict, tgt: dict) -> None:
# Append any missing properties in the target with those from source.
for k, v in source_jsonschema["properties"].items():
if k not in target_jsonschema["properties"]:
target_jsonschema["properties"][k] = v
for k, v in src["properties"].items():
if k not in tgt["properties"]:
tgt["properties"][k] = v

# Merge the required fields
source_required = src.get("required", [])
target_required = tgt.get("required", [])
tgt["required"] = list(set(source_required + target_required))

_merge_missing(BASE_CONFIG_SCHEMA, config_jsonschema)

9 changes: 9 additions & 0 deletions singer_sdk/helpers/capabilities.py
Original file line number Diff line number Diff line change
@@ -144,6 +144,15 @@
description="The default target database schema name to use for all streams.",
),
).to_dict()
ACTIVATE_VERSION_CONFIG = PropertiesList(
Property(
"process_activate_version_messages",
BooleanType,
default=True,
title="Process `ACTIVATE_VERSION` messages",
description="Whether to process `ACTIVATE_VERSION` messages.",
),
).to_dict()
ADD_RECORD_METADATA_CONFIG = PropertiesList(
Property(
"add_record_metadata",
13 changes: 13 additions & 0 deletions singer_sdk/mapper.py
Original file line number Diff line number Diff line change
@@ -63,6 +63,18 @@ def md5(string: str) -> str:
return hashlib.md5(string.encode("utf-8")).hexdigest() # noqa: S324


def sha256(string: str) -> str:
"""Digest a string using SHA256. This is a function for inline calculations.
Args:
string: String to digest.
Returns:
A string digested into SHA256.
"""
return hashlib.sha256(string.encode("utf-8")).hexdigest()


StreamMapsDict: TypeAlias = dict[str, t.Union[str, dict, None]]


@@ -307,6 +319,7 @@ def functions(self) -> dict[str, t.Callable]:
"""
funcs: dict[str, t.Any] = simpleeval.DEFAULT_FUNCTIONS.copy()
funcs["md5"] = md5
funcs["sha256"] = sha256
funcs["datetime"] = datetime
funcs["bool"] = bool
funcs["json"] = json
2 changes: 1 addition & 1 deletion singer_sdk/mapper_base.py
Original file line number Diff line number Diff line change
@@ -88,7 +88,7 @@ def map_activate_version_message(
"""
...

def map_batch_message( # noqa: PLR6301
def map_batch_message(
self,
message_dict: dict,
) -> t.Iterable[singer.Message]:
13 changes: 6 additions & 7 deletions singer_sdk/plugin_base.py
Original file line number Diff line number Diff line change
@@ -160,14 +160,13 @@ def __init__(
validate_config: True to require validation of config settings.
Raises:
ValueError: If config is not a dict or path string.
TypeError: If config is not a dict or path string.
"""
if not config:
config_dict = {}
elif isinstance(config, (str, PurePath)):
config = config or {}
if isinstance(config, (str, PurePath)):
config_dict = read_json_file(config)
warnings.warn(
"Passsing a config file path is deprecated. Please pass the config "
"Passing a config file path is deprecated. Please pass the config "
"as a dictionary instead.",
SingerSDKDeprecationWarning,
stacklevel=2,
@@ -179,7 +178,7 @@ def __init__(
# list will override those of earlier ones.
config_dict.update(read_json_file(config_path))
warnings.warn(
"Passsing a list of config file paths is deprecated. Please pass the "
"Passing a list of config file paths is deprecated. Please pass the "
"config as a dictionary instead.",
SingerSDKDeprecationWarning,
stacklevel=2,
@@ -188,7 +187,7 @@ def __init__(
config_dict = config
else:
msg = f"Error parsing config of type '{type(config).__name__}'." # type: ignore[unreachable]
raise ValueError(msg)
raise TypeError(msg)
if parse_env_config:
self.logger.info("Parsing env var for settings config...")
config_dict.update(self._env_var_config)
11 changes: 10 additions & 1 deletion singer_sdk/sinks/core.py
Original file line number Diff line number Diff line change
@@ -399,6 +399,15 @@ def include_sdc_metadata_properties(self) -> bool:
"""
return self.config.get("add_record_metadata", False)

@property
def process_activate_version_messages(self) -> bool:
"""Check if activate version messages should be processed.
Returns:
True if activate version messages should be processed.
"""
return self.config.get("process_activate_version_messages", True)

@property
def datetime_error_treatment(self) -> DatetimeErrorTreatmentEnum:
"""Return a treatment to use for datetime parse errors: ERROR. MAX, or NULL.
@@ -447,7 +456,7 @@ def _add_sdc_metadata_to_record(
or datetime.datetime.now(tz=datetime.timezone.utc)
).isoformat()
record["_sdc_deleted_at"] = record.get("_sdc_deleted_at")
record["_sdc_sequence"] = int(round(time.time() * 1000))
record["_sdc_sequence"] = round(time.time() * 1000)
record["_sdc_table_version"] = message.get("version")
record["_sdc_sync_started_at"] = self.sync_started_at

43 changes: 28 additions & 15 deletions singer_sdk/sinks/sql.py
Original file line number Diff line number Diff line change
@@ -4,12 +4,12 @@

import re
import typing as t
import warnings
from collections import defaultdict
from copy import copy
from textwrap import dedent

import sqlalchemy as sa
from sqlalchemy.sql import quoted_name
from sqlalchemy.sql import insert
from sqlalchemy.sql.expression import bindparam

from singer_sdk.connectors import SQLConnector
@@ -282,19 +282,26 @@ def generate_insert_statement(
Returns:
An insert statement.
"""
property_names = list(self.conform_schema(schema)["properties"].keys())
column_identifiers = [
self.connector.quote(quoted_name(name, quote=True))
for name in property_names
]
statement = dedent(
f"""\
INSERT INTO {full_table_name}
({", ".join(column_identifiers)})
VALUES ({", ".join([f":{name}" for name in property_names])})
""",
conformed_schema = self.conform_schema(schema)
property_names = list(conformed_schema["properties"])

_, schema_name, table_name = self.connector.parse_full_table_name(
full_table_name
)
return statement.rstrip()

table = sa.Table(
table_name,
sa.MetaData(),
*[
sa.Column(
name, sa.String
) # Assuming all columns are of type String for simplicity # noqa: E501
for name in property_names
],
schema=schema_name,
)

return insert(table)

def bulk_insert_records(
self,
@@ -321,7 +328,13 @@ def bulk_insert_records(
full_table_name,
schema,
)
if isinstance(insert_sql, str):
if isinstance(insert_sql, str): # pragma: no cover
warnings.warn(
"Generating a SQL insert statement as a string is deprecated. "
"Please return an SQLAlchemy Executable object instead.",
DeprecationWarning,
stacklevel=2,
)
insert_sql = sa.text(insert_sql)

conformed_records = [self.conform_record(record) for record in records]
6 changes: 4 additions & 2 deletions singer_sdk/streams/core.py
Original file line number Diff line number Diff line change
@@ -817,8 +817,10 @@ def _increment_stream_state(

def _write_state_message(self) -> None:
"""Write out a STATE message with the latest state."""
if (not self._is_state_flushed) and (
self.tap_state != self._last_emitted_state
if (
(not self._is_state_flushed)
and self.tap_state
and (self.tap_state != self._last_emitted_state)
):
self._tap.write_message(singer.StateMessage(value=self.tap_state))
self._last_emitted_state = copy.deepcopy(self.tap_state)
38 changes: 35 additions & 3 deletions singer_sdk/streams/rest.py
Original file line number Diff line number Diff line change
@@ -91,6 +91,8 @@ def __init__(
name: str | None = None,
schema: dict[str, t.Any] | Schema | None = None,
path: str | None = None,
*,
http_method: str | None = None,
) -> None:
"""Initialize the HTTP stream.
@@ -99,11 +101,13 @@ def __init__(
schema: JSON schema for records in this stream.
name: Name of this stream.
path: URL path for this entity stream.
http_method: HTTP method to use for requests.
"""
super().__init__(name=name, schema=schema, tap=tap)
if path:
self.path = path
self._http_headers: dict = {"User-Agent": self.user_agent}
self._http_method = http_method
self._requests_session = requests.Session()

@staticmethod
@@ -151,12 +155,37 @@ def rest_method(self) -> str:
.. deprecated:: 0.43.0
Override :meth:`~singer_sdk.RESTStream.http_method` instead.
"""
return "GET"
return self._http_method or "GET"

@rest_method.setter
@deprecated(
"Use `http_method` instead.",
category=SingerSDKDeprecationWarning,
)
def rest_method(self, value: str) -> None:
"""Set the HTTP method for requests.
Args:
value: The HTTP method to use for requests.
.. deprecated:: 0.43.0
Override :meth:`~singer_sdk.RESTStream.http_method` instead.
"""
self._http_method = value

@property
def http_method(self) -> str:
"""HTTP method to use for requests. Defaults to "GET"."""
return self.rest_method
return self._http_method or self.rest_method

@http_method.setter
def http_method(self, value: str) -> None:
"""Set the HTTP method for requests.
Args:
value: The HTTP method to use for requests.
"""
self._http_method = value

@property
def requests_session(self) -> requests.Session:
@@ -758,6 +787,8 @@ def __init__(
name: str | None = None,
schema: dict[str, t.Any] | Schema | None = None,
path: str | None = None,
*,
http_method: str | None = None,
) -> None:
"""Initialize the REST stream.
@@ -766,8 +797,9 @@ def __init__(
schema: JSON schema for records in this stream.
name: Name of this stream.
path: URL path for this entity stream.
http_method: HTTP method to use for requests
"""
super().__init__(tap, name, schema, path)
super().__init__(tap, name, schema, path, http_method=http_method)
self._compiled_jsonpath = None
self._next_page_token_compiled_jsonpath = None

31 changes: 23 additions & 8 deletions singer_sdk/tap_base.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@

import abc
import contextlib
import pathlib
import typing as t
import warnings
from enum import Enum
@@ -105,7 +106,7 @@ def __init__(
elif catalog is not None:
self._input_catalog = Catalog.from_dict(read_json_file(catalog))
warnings.warn(
"Passsing a catalog file path is deprecated. Please pass the catalog "
"Passing a catalog file path is deprecated. Please pass the catalog "
"as a dictionary or Catalog object instead.",
SingerSDKDeprecationWarning,
stacklevel=2,
@@ -123,7 +124,7 @@ def __init__(
elif state:
state_dict = read_json_file(state)
warnings.warn(
"Passsing a state file path is deprecated. Please pass the state "
"Passing a state file path is deprecated. Please pass the state "
"as a dictionary instead.",
SingerSDKDeprecationWarning,
stacklevel=2,
@@ -459,7 +460,8 @@ def sync_all(self) -> None:
"""Sync all streams."""
self._reset_state_progress_markers()
self._set_compatible_replication_methods()
self.write_message(StateMessage(value=self.state))
if self.state:
self.write_message(StateMessage(value=self.state))

stream: Stream
for stream in self.streams.values():
@@ -494,8 +496,8 @@ def invoke( # type: ignore[override]
about: bool = False,
about_format: str | None = None,
config: tuple[str, ...] = (),
state: str | None = None,
catalog: str | None = None,
state: pathlib.Path | None = None,
catalog: pathlib.Path | None = None,
) -> None:
"""Invoke the tap's command line interface.
@@ -619,12 +621,20 @@ def get_singer_command(cls: type[Tap]) -> click.Command:
click.Option(
["--catalog"],
help="Use a Singer catalog file with the tap.",
type=click.Path(),
type=click.Path(
path_type=pathlib.Path,
exists=True,
dir_okay=False,
),
),
click.Option(
["--state"],
help="Use a bookmarks file for incremental replication.",
type=click.Path(),
type=click.Path(
path_type=pathlib.Path,
exists=True,
dir_okay=False,
),
),
],
)
@@ -648,6 +658,9 @@ class SQLTap(Tap):
querying a database's system tables).
"""

exclude_schemas: t.Sequence[str] = []
"""Hard-coded list of stream names to skip when discovering the catalog."""

_tap_connector: SQLConnector | None = None

def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
@@ -691,7 +704,9 @@ def catalog_dict(self) -> dict:
connector = self.tap_connector

result: dict[str, list[dict]] = {"streams": []}
result["streams"].extend(connector.discover_catalog_entries())
result["streams"].extend(
connector.discover_catalog_entries(exclude_schemas=self.exclude_schemas),
)

self._catalog_dict = result
return self._catalog_dict
18 changes: 18 additions & 0 deletions singer_sdk/target_base.py
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@
from singer_sdk.helpers._batch import BaseBatchFileEncoding
from singer_sdk.helpers._classproperty import classproperty
from singer_sdk.helpers.capabilities import (
ACTIVATE_VERSION_CONFIG,
ADD_RECORD_METADATA_CONFIG,
BATCH_CONFIG,
TARGET_BATCH_SIZE_ROWS_CONFIG,
@@ -454,6 +455,19 @@ def _process_activate_version_message(self, message_dict: dict) -> None:

for stream_map in self.mapper.stream_maps[stream_name]:
sink = self.get_sink(stream_map.stream_alias)
if not sink.process_activate_version_messages:
self.logger.warning(
"`ACTIVATE_VERSION` messages are not enabled for '%s'. Ignoring.",
stream_map.stream_alias,
)
continue
if not sink.include_sdc_metadata_properties:
self.logger.warning(
"The `ACTIVATE_VERSION` feature uses the `_sdc_deleted_at` and "
"`_sdc_deleted_at` metadata properties so they will be added to "
"the schema for '%s' even though `add_record_metadata` is "
"disabled.",
)
sink.activate_version(message_dict["version"])

def _process_batch_message(self, message_dict: dict) -> None:
@@ -621,6 +635,9 @@ def _merge_missing(source_jsonschema: dict, target_jsonschema: dict) -> None:

capabilities = cls.capabilities

if PluginCapabilities.ACTIVATE_VERSION in capabilities:
_merge_missing(ACTIVATE_VERSION_CONFIG, config_jsonschema)

if PluginCapabilities.BATCH in capabilities:
_merge_missing(BATCH_CONFIG, config_jsonschema)

@@ -660,6 +677,7 @@ def capabilities(self) -> list[CapabilitiesEnum]:
sql_target_capabilities: list[CapabilitiesEnum] = super().capabilities
sql_target_capabilities.extend(
[
PluginCapabilities.ACTIVATE_VERSION,
TargetCapabilities.TARGET_SCHEMA,
TargetCapabilities.HARD_DELETE,
]
12 changes: 6 additions & 6 deletions singer_sdk/testing/tap_tests.py
Original file line number Diff line number Diff line change
@@ -199,9 +199,9 @@ def test(self) -> None:
f"Length of set of records IDs ({count_unique_records})"
f" is not equal to number of records ({count_records})."
)
assert all(
all(k is not None for k in pk) for pk in record_ids
), "Primary keys contain some key values that are null."
assert all(all(k is not None for k in pk) for pk in record_ids), (
"Primary keys contain some key values that are null."
)


class AttributeIsDateTimeTest(AttributeTestTemplate):
@@ -382,9 +382,9 @@ class AttributeNotNullTest(AttributeTestTemplate):
def test(self) -> None:
"""Run test."""
for r in self.stream_records:
assert (
r.get(self.attribute_name) is not None
), f"Detected null values for attribute ('{self.attribute_name}')."
assert r.get(self.attribute_name) is not None, (
f"Detected null values for attribute ('{self.attribute_name}')."
)

@classmethod
def evaluate(
12 changes: 6 additions & 6 deletions singer_sdk/testing/templates.py
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ def id(self) -> str:
msg = "ID not implemented."
raise NotImplementedError(msg)

def setup(self) -> None: # noqa: PLR6301
def setup(self) -> None:
"""Test setup, called before `.test()`.
This method is useful for preparing external resources (databases, folders etc.)
@@ -63,7 +63,7 @@ def test(self) -> None:
"""Main Test body, called after `.setup()` and before `.validate()`."""
self.runner.sync_all()

def validate(self) -> None: # noqa: PLR6301
def validate(self) -> None:
"""Test validation, called after `.test()`.
This method is particularly useful in Target tests, to validate that records
@@ -75,7 +75,7 @@ def validate(self) -> None: # noqa: PLR6301
msg = "Method not implemented."
raise NotImplementedError(msg)

def teardown(self) -> None: # noqa: PLR6301
def teardown(self) -> None:
"""Test Teardown.
This method is useful for cleaning up external resources
@@ -322,9 +322,9 @@ def run( # type: ignore[override]
"""
# get input from file
if getattr(self, "singer_filepath", None):
assert (
self.singer_filepath.is_file()
), f"Singer file {self.singer_filepath} does not exist."
assert self.singer_filepath.is_file(), (
f"Singer file {self.singer_filepath} does not exist."
)
runner.input_filepath = self.singer_filepath
super().run(config, resource, runner)

12 changes: 12 additions & 0 deletions singer_sdk/typing.py
Original file line number Diff line number Diff line change
@@ -655,6 +655,8 @@ def __init__( # noqa: PLR0913
*,
nullable: bool | None = None,
title: str | None = None,
deprecated: bool | None = None,
**kwargs: t.Any,
) -> None:
"""Initialize Property object.
@@ -677,6 +679,8 @@ def __init__( # noqa: PLR0913
displayed to the user as hints of the expected format of inputs.
nullable: If True, the property may be null.
title: Optional. A short, human-readable title for the property.
deprecated: If True, mark this property as deprecated.
**kwargs: Additional keyword arguments to pass to the parent class.
"""
self.name = name
self.wrapped = wrapped
@@ -688,6 +692,8 @@ def __init__( # noqa: PLR0913
self.examples = examples or None
self.nullable = nullable
self.title = title
self.deprecated = deprecated
self.kwargs = kwargs

@property
def type_dict(self) -> dict: # type: ignore[override]
@@ -745,6 +751,12 @@ def to_dict(self) -> dict:
type_dict.update({"enum": self.allowed_values})
if self.examples:
type_dict.update({"examples": self.examples})

if self.deprecated is not None:
type_dict["deprecated"] = self.deprecated

type_dict.update(self.kwargs)

return {self.name: type_dict}


4 changes: 2 additions & 2 deletions tests/core/rest/test_pagination.py
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ def test_paginator_base_missing_implementation():

with pytest.raises(
TypeError,
match="Can't instantiate abstract class .* '?get_next'?",
match=r"Can't instantiate abstract class .* '?get_next'?",
):
BaseAPIPaginator(0)

@@ -57,7 +57,7 @@ def test_paginator_hateoas_missing_implementation():

with pytest.raises(
TypeError,
match="Can't instantiate abstract class .* '?get_next_url'?",
match=r"Can't instantiate abstract class .* '?get_next_url'?",
):
BaseHATEOASPaginator()

16 changes: 9 additions & 7 deletions tests/core/sinks/test_sql_sink.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import typing as t
from textwrap import dedent

import pytest
from sqlalchemy.sql import Insert

from samples.sample_duckdb import DuckDBConnector
from singer_sdk.sinks.sql import SQLSink
@@ -55,10 +55,12 @@ def sink(self, target: DuckDBTarget, schema: dict) -> DuckDBSink:

def test_generate_insert_statement(self, sink: DuckDBSink, schema: dict):
"""Test that the insert statement is generated correctly."""
expected = dedent(
"""\
INSERT INTO foo
(id, col_ts, "table")
VALUES (:id, :col_ts, :table)"""
stmt = sink.generate_insert_statement("foo", schema=schema)
assert isinstance(stmt, Insert)
assert stmt.table.name == "foo"
assert stmt.table.columns.keys() == ["id", "col_ts", "table"]

# Rendered SQL should look like:
assert str(stmt) == (
'INSERT INTO foo (id, col_ts, "table") VALUES (:id, :col_ts, :table)'
)
assert sink.generate_insert_statement("foo", schema=schema) == expected
6 changes: 3 additions & 3 deletions tests/core/test_catalog_selection.py
Original file line number Diff line number Diff line change
@@ -245,6 +245,6 @@ def test_record_property_pop(
breadcrumb=(),
)

assert (
record_pop == record_selected
), f"Expected record={record_selected}, got {record_pop}"
assert record_pop == record_selected, (
f"Expected record={record_selected}, got {record_pop}"
)
83 changes: 83 additions & 0 deletions tests/core/test_connector_sql.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,8 @@
from singer_sdk.exceptions import ConfigValidationError

if t.TYPE_CHECKING:
from pathlib import Path

from sqlalchemy.engine import Engine


@@ -349,6 +351,32 @@ def test_adapt_column_type(self, connector: DuckDBConnector):
assert result.keys() == ["id", "name"]
assert result.cursor.description[1][1] == "STRING"

@pytest.mark.parametrize(
"exclude_schemas,expected_streams",
[
([], 1),
(["memory.my_schema"], 0),
],
)
def test_discover_catalog_entries_exclude_schemas(
self,
connector: DuckDBConnector,
exclude_schemas: list[str],
expected_streams: int,
):
with connector._engine.connect() as conn, conn.begin():
conn.execute(sa.text("CREATE SCHEMA my_schema"))
conn.execute(
sa.text(
"CREATE TABLE my_schema.test_table (id INTEGER PRIMARY KEY, name STRING)", # noqa: E501
)
)
entries = connector.discover_catalog_entries(
exclude_schemas=exclude_schemas,
reflect_indices=False,
)
assert len(entries) == expected_streams


def test_adapter_without_json_serde():
registry.register(
@@ -702,3 +730,58 @@ def test_singer_decimal(self):
jsonschema_type = {"type": ["string"], "format": "singer.decimal"}
result = json_schema_to_sql.to_sql_type(jsonschema_type)
assert isinstance(result, sa.types.DECIMAL)

def test_annotation_sql_datatype(self):
json_schema_to_sql = JSONSchemaToSQL()
json_schema_to_sql.register_sql_datatype_handler("json", sa.types.JSON)
jsonschema_type = {"type": ["string"], "x-sql-datatype": "json"}
result = json_schema_to_sql.to_sql_type(jsonschema_type)
assert isinstance(result, sa.types.JSON)

unknown_type = {"type": ["string"], "x-sql-datatype": "unknown"}
with pytest.warns(
UserWarning,
match="This target does not support the x-sql-datatype",
):
result = json_schema_to_sql.to_sql_type(unknown_type)

assert isinstance(result, sa.types.VARCHAR)


def test_bench_discovery(benchmark, tmp_path: Path):
def _discover_catalog(connector):
connector.discover_catalog_entries()

number_of_tables = 250
number_of_views = 250
number_of_columns = 10
db_path = tmp_path / "foo.db"
engine = sa.create_engine(f"sqlite:///{db_path}")

columns_fragment = ",".join(f"col_{i} VARCHAR" for i in range(number_of_columns))

# Seed a large number of tables
table_ddl = f"""
CREATE TABLE table_{{n}} (
id INTEGER NOT NULL,
{columns_fragment},
PRIMARY KEY (id)
);
"""

# Seed a large number of views
view_ddl = """
CREATE VIEW view_{n} AS
SELECT * FROM table_{n};
"""

with engine.connect() as conn:
for i in range(number_of_tables):
conn.execute(sa.text(table_ddl.format(n=i)))

for i in range(number_of_views):
conn.execute(sa.text(view_ddl.format(n=i)))

connector = SQLConnector(config={"sqlalchemy_url": f"sqlite:///{db_path}"})

benchmark(_discover_catalog, connector)
14 changes: 8 additions & 6 deletions tests/core/test_jsonschema_helpers.py
Original file line number Diff line number Diff line change
@@ -495,12 +495,14 @@ def test_inbuilt_type(json_type: JSONTypeHelper, expected_json_schema: dict):
IntegerType,
allowed_values=[1, 2, 3, 4, 5, 6, 7, 8, 9],
examples=[1, 2, 3],
deprecated=True,
),
{
"my_prop9": {
"type": ["integer", "null"],
"enum": [1, 2, 3, 4, 5, 6, 7, 8, 9],
"examples": [1, 2, 3],
"deprecated": True,
},
},
{is_integer_type},
@@ -606,13 +608,13 @@ def test_property_creation(
property_name = next(iter(property_dict.keys()))
property_node = property_dict[property_name]
if check_fn in type_fn_checks_true:
assert (
check_fn(property_node) is True
), f"{check_fn.__name__} was not True for {property_dict!r}"
assert check_fn(property_node) is True, (
f"{check_fn.__name__} was not True for {property_dict!r}"
)
else:
assert (
check_fn(property_node) is False
), f"{check_fn.__name__} was not False for {property_dict!r}"
assert check_fn(property_node) is False, (
f"{check_fn.__name__} was not False for {property_dict!r}"
)


def test_wrapped_type_dict():
1 change: 1 addition & 0 deletions tests/core/test_mapper.py
Original file line number Diff line number Diff line change
@@ -704,6 +704,7 @@ def discover_streams(self):
{
"mystream": {
"email_hash": "md5(email)",
"email_hash_sha256": "sha256(email)",
"fixed_count": "int(count-1)",
"__else__": None,
},
46 changes: 23 additions & 23 deletions tests/core/test_parent_child.py
Original file line number Diff line number Diff line change
@@ -104,19 +104,19 @@ def test_parent_context_fields_in_child(tap: MyTap):
messages = _get_messages(tap)

# Parent schema is emitted
assert messages[1]
assert messages[1]["type"] == SingerMessageType.SCHEMA
assert messages[1]["stream"] == parent_stream.name
assert messages[1]["schema"] == parent_stream.schema
assert messages[0]
assert messages[0]["type"] == SingerMessageType.SCHEMA
assert messages[0]["stream"] == parent_stream.name
assert messages[0]["schema"] == parent_stream.schema

# Child schema is emitted
assert messages[2]
assert messages[2]["type"] == SingerMessageType.SCHEMA
assert messages[2]["stream"] == child_stream.name
assert messages[2]["schema"] == child_stream.schema
assert messages[1]
assert messages[1]["type"] == SingerMessageType.SCHEMA
assert messages[1]["stream"] == child_stream.name
assert messages[1]["schema"] == child_stream.schema

# Child records are emitted
child_record_messages = messages[3:6]
child_record_messages = messages[2:5]
assert child_record_messages
assert all(msg["type"] == SingerMessageType.RECORD for msg in child_record_messages)
assert all(msg["stream"] == child_stream.name for msg in child_record_messages)
@@ -156,13 +156,13 @@ def test_child_deselected_parent(tap_with_deselected_parent: MyTap):
messages = _get_messages(tap_with_deselected_parent)

# First message is a schema for the child stream, not the parent
assert messages[1]
assert messages[1]["type"] == SingerMessageType.SCHEMA
assert messages[1]["stream"] == child_stream.name
assert messages[1]["schema"] == child_stream.schema
assert messages[0]
assert messages[0]["type"] == SingerMessageType.SCHEMA
assert messages[0]["stream"] == child_stream.name
assert messages[0]["schema"] == child_stream.schema

# Child records are emitted
child_record_messages = messages[2:5]
child_record_messages = messages[1:4]
assert child_record_messages
assert all(msg["type"] == SingerMessageType.RECORD for msg in child_record_messages)
assert all(msg["stream"] == child_stream.name for msg in child_record_messages)
@@ -237,30 +237,30 @@ def discover_streams(self):
messages = _get_messages(tap)

# Parent schema is emitted
assert messages[1]
assert messages[1]["type"] == SingerMessageType.SCHEMA
assert messages[1]["stream"] == parent_stream.name
assert messages[1]["schema"] == parent_stream.schema
assert messages[0]
assert messages[0]["type"] == SingerMessageType.SCHEMA
assert messages[0]["stream"] == parent_stream.name
assert messages[0]["schema"] == parent_stream.schema

# Child schemas are emitted
schema_messages = messages[2:9:3]
schema_messages = messages[1:8:3]
assert schema_messages
assert all(msg["type"] == SingerMessageType.SCHEMA for msg in schema_messages)
assert all(msg["stream"] == child_stream.name for msg in schema_messages)
assert all(msg["schema"] == child_stream.schema for msg in schema_messages)

# Child records are emitted
child_record_messages = messages[3:10:3]
child_record_messages = messages[2:9:3]
assert child_record_messages
assert all(msg["type"] == SingerMessageType.RECORD for msg in child_record_messages)
assert all(msg["stream"] == child_stream.name for msg in child_record_messages)
assert all("pid" in msg["record"] for msg in child_record_messages)

# State messages are emitted
state_messages = messages[4:11:3]
state_messages = messages[3:10:3]
assert state_messages
assert all(msg["type"] == SingerMessageType.STATE for msg in state_messages)

# Parent record is emitted
assert messages[11]
assert messages[11]["type"] == SingerMessageType.RECORD
assert messages[10]
assert messages[10]["type"] == SingerMessageType.RECORD
23 changes: 23 additions & 0 deletions tests/core/test_plugin_base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from __future__ import annotations

import typing as t

import pytest

from singer_sdk.plugin_base import SDK_PACKAGE_NAME, MapperNotInitialized, PluginBase
from singer_sdk.typing import IntegerType, PropertiesList, Property, StringType

if t.TYPE_CHECKING:
from pathlib import Path


class PluginTest(PluginBase):
"""Example Plugin for tests."""
@@ -16,6 +21,24 @@ class PluginTest(PluginBase):
).to_dict()


def test_config_path(tmp_path: Path):
"""Test that the config path is correctly set."""
config_json = '{"prop1": "hello", "prop2": 123}'
config_path = tmp_path / "config.json"
config_path.write_text(config_json)

with pytest.deprecated_call():
plugin = PluginTest(config=config_path)

assert plugin.config == {"prop1": "hello", "prop2": 123}


def test_invalid_config_type():
"""Test that invalid config types raise an error."""
with pytest.raises(TypeError, match="Error parsing config of type 'tuple'"):
PluginTest(config=(("prop1", "hello"), ("prop2", 123)))


def test_get_env_var_config(monkeypatch: pytest.MonkeyPatch):
"""Test settings parsing from environment variables."""
monkeypatch.delenv("PLUGIN_TEST_PROP1", raising=False)
4 changes: 2 additions & 2 deletions tests/core/test_sql_typing.py
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ def test_convert_jsonschema_type_to_sql_type(
jsonschema_type: dict,
sql_type: sa.types.TypeEngine,
):
with pytest.warns(DeprecationWarning):
with pytest.warns(DeprecationWarning, match="Use `JSONSchemaToSQL` instead"):
result = th.to_sql_type(jsonschema_type)
assert isinstance(result, sql_type.__class__)
assert str(result) == str(sql_type)
@@ -71,7 +71,7 @@ def test_convert_sql_type_to_jsonschema_type(
sql_type: sa.types.TypeEngine,
is_of_jsonschema_type: dict,
):
with pytest.warns(DeprecationWarning):
with pytest.warns(DeprecationWarning, match="Use `SQLToJSONSchema` instead"):
result = th.to_jsonschema_type(sql_type)

assert result == is_of_jsonschema_type
6 changes: 3 additions & 3 deletions tests/core/test_state_handling.py
Original file line number Diff line number Diff line change
@@ -151,9 +151,9 @@ def test_null_replication_value(caplog):
check_sorted=check_sorted,
)

assert (
stream_state["replication_key_value"] == "2021-05-17T20:41:16Z"
), "State should not be updated."
assert stream_state["replication_key_value"] == "2021-05-17T20:41:16Z", (
"State should not be updated."
)
assert caplog.records[0].levelname == "WARNING"
assert "is null" in caplog.records[0].message

49 changes: 49 additions & 0 deletions tests/core/test_streams.py
Original file line number Diff line number Diff line change
@@ -10,12 +10,15 @@

import pytest
import requests
import requests_mock.adapter as requests_mock_adapter

from singer_sdk._singerlib import Catalog, MetadataMapping
from singer_sdk.exceptions import (
FatalAPIError,
InvalidReplicationKeyException,
)
from singer_sdk.helpers._classproperty import classproperty
from singer_sdk.helpers._compat import SingerSDKDeprecationWarning
from singer_sdk.helpers._compat import datetime_fromisoformat as parse
from singer_sdk.helpers.jsonpath import _compile_jsonpath
from singer_sdk.streams.core import REPLICATION_FULL_TABLE, REPLICATION_INCREMENTAL
@@ -569,6 +572,52 @@ def prepare_request_payload(self, context, next_page_token): # noqa: ARG002
]


def test_mutate_http_method(tap: Tap, requests_mock: requests_mock.Mocker):
"""Test HTTP method can be overridden."""

def callback(request: requests.PreparedRequest, context: requests_mock.Context):
if request.method == "POST":
return {
"data": [
{"id": 1, "value": "abc"},
{"id": 2, "value": "def"},
]
}

# Method not allowed
context.status_code = 405
context.reason = "Method Not Allowed"
return {"error": "Check your method"}

class PostStream(RestTestStream):
records_jsonpath = "$.data[*]"
path = "/endpoint"

stream = PostStream(tap, http_method="PUT")
requests_mock.request(
requests_mock_adapter.ANY,
url="https://example.com/endpoint",
json=callback,
)

with pytest.raises(FatalAPIError, match="Method Not Allowed"):
list(stream.request_records(None))

with pytest.warns(SingerSDKDeprecationWarning):
stream.rest_method = "GET"

with pytest.raises(FatalAPIError, match="Method Not Allowed"):
list(stream.request_records(None))

stream.http_method = "POST"

records = list(stream.request_records(None))
assert records == [
{"id": 1, "value": "abc"},
{"id": 2, "value": "def"},
]


def test_parse_response(tap: Tap):
content = """[
{"id": 1, "value": 3.14159},
2 changes: 1 addition & 1 deletion tests/core/test_testing.py
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ def test_module_deprecations():

with pytest.raises(
AttributeError,
match="module singer_sdk.testing has no attribute",
match="module singer_sdk\\.testing has no attribute",
):
testing.foo # noqa: B018

12 changes: 6 additions & 6 deletions tests/external/test_tap_gitlab.py
Original file line number Diff line number Diff line change
@@ -49,13 +49,13 @@ def test_gitlab_replication_keys(gitlab_config: dict | None):
f"Incorrect 'replication_key' in catalog: ({key_props_2})\n\n"
f"Catalog entry was: {catalog_entry}"
)
assert tap.streams[
stream_name
].is_timestamp_replication_key, "Failed to detect `is_timestamp_replication_key`"
assert tap.streams[stream_name].is_timestamp_replication_key, (
"Failed to detect `is_timestamp_replication_key`"
)

assert tap.streams[
"commits"
].is_timestamp_replication_key, "Failed to detect `is_timestamp_replication_key`"
assert tap.streams["commits"].is_timestamp_replication_key, (
"Failed to detect `is_timestamp_replication_key`"
)


def test_gitlab_sync_epic_issues(gitlab_config: dict | None):
8 changes: 4 additions & 4 deletions tests/samples/conftest.py
Original file line number Diff line number Diff line change
@@ -15,11 +15,11 @@
@pytest.fixture
def csv_config(outdir: str) -> dict:
"""Get configuration dictionary for target-csv."""
return {"target_folder": outdir}
return {"target_folder": outdir, "add_record_metadata": False}


@pytest.fixture
def _sqlite_sample_db(sqlite_connector):
def sqlite_sample_db(sqlite_connector: SQLiteConnector):
"""Return a path to a newly constructed sample DB."""
with sqlite_connector._connect() as conn, conn.begin():
for t in range(3):
@@ -72,12 +72,12 @@ def sqlite_sample_db_catalog(sqlite_sample_db_config) -> Catalog:

@pytest.fixture
def sqlite_sample_tap(
_sqlite_sample_db,
sqlite_sample_db,
sqlite_sample_db_config,
sqlite_sample_db_state,
sqlite_sample_db_catalog,
) -> SQLiteTap:
_ = _sqlite_sample_db
_ = sqlite_sample_db
return SQLiteTap(
config=sqlite_sample_db_config,
catalog=sqlite_sample_db_catalog.to_dict(),
2 changes: 1 addition & 1 deletion tests/samples/test_tap_countries.py
Original file line number Diff line number Diff line change
@@ -140,7 +140,7 @@ def tally_messages(messages: list) -> t.Counter:
assert counter["SCHEMA", "countries"] == 1
assert counter["BATCH", "countries"] == 1

assert counter["STATE",] == 3
assert counter["STATE",] == 2


@pytest.mark.snapshot
10 changes: 8 additions & 2 deletions tests/samples/test_tap_csv.py
Original file line number Diff line number Diff line change
@@ -87,7 +87,10 @@ def test_tap_stream_transformed_catalog_schema_matches_record(
runner: TapTestRunner,
stream: CSVStream,
):
with pytest.warns(UserWarning):
with pytest.warns(
UserWarning,
match="Fields in transformed catalog but not in records",
):
super().test_tap_stream_transformed_catalog_schema_matches_record(
config,
resource,
@@ -102,5 +105,8 @@ def test_tap_stream_returns_record(
runner: TapTestRunner,
stream: CSVStream,
):
with pytest.warns(UserWarning):
with pytest.warns(
UserWarning,
match="No records returned in stream",
):
super().test_tap_stream_returns_record(config, resource, runner, stream)
165 changes: 154 additions & 11 deletions tests/samples/test_target_sqlite.py
Original file line number Diff line number Diff line change
@@ -30,6 +30,23 @@
from singer_sdk.target_base import SQLTarget


def get_table(config: dict, table_name: str) -> sa.Table:
"""Get SQLAlchemy metadata and table for inspection.

Args:
config: Target configuration dictionary containing database path
table_name: Name of the table to inspect

Returns:
Tuple of (metadata, table)
"""
db_path = config["path_to_db"]
engine = sa.create_engine(f"sqlite:///{db_path}")
meta = sa.MetaData()
meta.reflect(bind=engine)
return meta.tables[table_name]


@pytest.fixture
def path_to_target_db(tmp_path: Path) -> Path:
return Path(f"{tmp_path}/target_test.db")
@@ -50,7 +67,32 @@ def sqlite_sample_target(sqlite_target_test_config):
@pytest.fixture
def sqlite_sample_target_hard_delete(sqlite_target_test_config):
"""Get a sample target object with hard_delete disabled."""
return SQLiteTarget(config={**sqlite_target_test_config, "hard_delete": True})
return SQLiteTarget(
config={
**sqlite_target_test_config,
"hard_delete": True,
"add_record_metadata": False,
}
)


@pytest.fixture
def sqlite_sample_target_no_activate_version(sqlite_target_test_config):
"""Get a sample target object with hard_delete disabled."""
return SQLiteTarget(
config={
**sqlite_target_test_config,
"process_activate_version_messages": False,
}
)


@pytest.fixture
def sqlite_target_add_record_metadata(sqlite_target_test_config):
"""Get a sample target object with add_record_metadata enabled."""
return SQLiteTarget(
config={**sqlite_target_test_config, "add_record_metadata": True}
)


@pytest.fixture
@@ -79,6 +121,7 @@ def test_sync_sqlite_to_sqlite(
- Confirm the STDOUT from the original sample DB matches with the
STDOUT from the re-tapped target DB.
"""
initial_state = deepcopy(sqlite_sample_tap.state)
orig_stdout, _, _, _ = tap_to_target_sync_test(
sqlite_sample_tap,
sqlite_sample_target,
@@ -87,6 +130,7 @@ def test_sync_sqlite_to_sqlite(
tapped_config = dict(sqlite_sample_target.config)
tapped_target = SQLiteTap(
config=tapped_config,
state=initial_state,
catalog=sqlite_sample_db_catalog.to_dict(),
)
new_stdout, _ = tap_sync_test(tapped_target)
@@ -96,6 +140,8 @@ def test_sync_sqlite_to_sqlite(
new_lines = new_stdout.readlines()
assert len(orig_lines) > 0, "Orig tap output should not be empty."
assert len(new_lines) > 0, "(Re-)tapped target output should not be empty."
assert orig_lines[0] == new_lines[0]
assert "STATE" in new_lines[0]
assert len(orig_lines) == len(new_lines)

line_num = 0
@@ -116,9 +162,9 @@ def test_sync_sqlite_to_sqlite(
msg = f"Could not parse JSON in new line {line_num}: {new_out}"
raise RuntimeError(msg) from e

assert (
tapped_json["type"] == orig_json["type"]
), f"Mismatched message type on line {line_num}."
assert tapped_json["type"] == orig_json["type"], (
f"Mismatched message type on line {line_num}."
)
if tapped_json["type"] == "SCHEMA":
assert (
tapped_json["schema"]["properties"].keys()
@@ -268,6 +314,108 @@ def test_sqlite_activate_version(
finalize=True,
)

# Check that the record metadata was added
table = get_table(sqlite_sample_target_hard_delete.config, test_tbl)

assert "_sdc_table_version" in table.columns
assert type(table.columns["_sdc_table_version"].type) is sa.INTEGER

assert "_sdc_deleted_at" in table.columns
assert type(table.columns["_sdc_deleted_at"].type) is sa.DATETIME


def test_sqlite_no_activate_version(
sqlite_sample_target_no_activate_version: SQLTarget,
):
"""Test handling the activate_version message for the SQLite target.

Test performs the following actions:

- Sends an activate_version message for a table that doesn't exist (which should
have no effect)
"""
test_tbl = f"zzz_tmp_{str(uuid4()).split('-')[-1]}"
schema_msg = {
"type": "SCHEMA",
"stream": test_tbl,
"schema": th.PropertiesList(th.Property("col_a", th.StringType())).to_dict(),
}

tap_output = "\n".join(
json.dumps(msg)
for msg in [
schema_msg,
{"type": "ACTIVATE_VERSION", "stream": test_tbl, "version": 12345},
{
"type": "RECORD",
"stream": test_tbl,
"record": {"col_a": "samplerow1"},
"version": 12345,
},
]
)

target_sync_test(
sqlite_sample_target_no_activate_version,
input=StringIO(tap_output),
finalize=True,
)

# Check that the record metadata was added
table = get_table(sqlite_sample_target_no_activate_version.config, test_tbl)

assert "col_a" in table.columns
assert "_sdc_table_version" not in table.columns
assert "_sdc_deleted_at" not in table.columns


def test_sqlite_add_record_metadata(sqlite_target_add_record_metadata: SQLTarget):
"""Test handling the activate_version message for the SQLite target.

Test performs the following actions:

- Sends an activate_version message for a table that doesn't exist (which should
have no effect)
"""
test_tbl = f"zzz_tmp_{str(uuid4()).split('-')[-1]}"
schema_msg = {
"type": "SCHEMA",
"stream": test_tbl,
"schema": th.PropertiesList(th.Property("col_a", th.StringType())).to_dict(),
}

tap_output = "\n".join(
json.dumps(msg)
for msg in [
schema_msg,
{"type": "ACTIVATE_VERSION", "stream": test_tbl, "version": 12345},
{
"type": "RECORD",
"stream": test_tbl,
"record": {"col_a": "samplerow1"},
"version": 12345,
},
]
)

target_sync_test(
sqlite_target_add_record_metadata,
input=StringIO(tap_output),
finalize=True,
)

# Check that the record metadata was added
table = get_table(sqlite_target_add_record_metadata.config, test_tbl)

assert "_sdc_received_at" in table.columns
assert type(table.columns["_sdc_received_at"].type) is sa.DATETIME

assert "_sdc_sync_started_at" in table.columns
assert type(table.columns["_sdc_sync_started_at"].type) is sa.INTEGER

assert "_sdc_table_version" in table.columns
assert type(table.columns["_sdc_table_version"].type) is sa.INTEGER


def test_sqlite_column_morph(sqlite_sample_target: SQLTarget):
"""End-to-end-to-end test for SQLite tap and target.
@@ -502,12 +650,7 @@ def test_record_with_missing_properties(
},
},
[],
dedent(
"""\
INSERT INTO test_stream
(id, name, "table")
VALUES (:id, :name, :table)""",
),
'INSERT INTO test_stream (id, name, "table") VALUES (:id, :name, :table)',
),
],
ids=[
@@ -532,7 +675,7 @@ def test_sqlite_generate_insert_statement(
sink.full_table_name,
sink.schema,
)
assert dml == expected_dml
assert str(dml) == expected_dml


def test_hostile_to_sqlite(
1 change: 0 additions & 1 deletion tests/snapshots/mapped_stream/aliased_stream.jsonl
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"aliased_stream","schema":{"properties":{"email":{"type":["string"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]},"joined_at":{"format":"date-time","type":["string","null"]}},"type":"object","required":["email"],"$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"alice@example.com","count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]},"joined_at":"2022-01-01T00:00:00Z"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"bob@example.com","count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]},"joined_at":"2022-01-01T00:00:00Z"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Loading

0 comments on commit e27b663

Please sign in to comment.