Skip to content

Commit

Permalink
Merge branch 'main' into 484-feat-support-integer-types-other-than-bi…
Browse files Browse the repository at this point in the history
…gint
  • Loading branch information
edgarrmondragon committed Jan 15, 2025
2 parents dd89406 + c02fdb9 commit f7ebe6b
Show file tree
Hide file tree
Showing 9 changed files with 308 additions and 206 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/constraints.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
meltano==3.6.0b2
meltano==3.6.0
tox==4.23.2
2 changes: 1 addition & 1 deletion .github/workflows/release_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ jobs:
tag: ${{ github.ref }}
overwrite: true
file_glob: true
- uses: pypa/gh-action-pypi-publish@v1.11.0
- uses: pypa/gh-action-pypi-publish@v1.12.3
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.2
rev: v0.8.6
hooks:
- id: ruff
args: [--fix]
Expand Down
55 changes: 28 additions & 27 deletions README.md

Large diffs are not rendered by default.

394 changes: 222 additions & 172 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ packages = [

[tool.poetry.dependencies]
python = ">=3.9"
faker = {version = "~=30.0", optional = true}
faker = {version = "~=33.1", optional = true}
psycopg = {extras = ["binary"], version = "3.2.3"}
psycopg2-binary = "2.9.10"
sqlalchemy = "~=2.0"
sshtunnel = "0.4.0"

[tool.poetry.dependencies.singer-sdk]
version = "~=0.42.0b1"
version = "~=0.43.0"

[tool.poetry.extras]
faker = ["faker"]
Expand Down
9 changes: 9 additions & 0 deletions target_postgres/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ def interpret_content_encoding(self) -> bool:
"""
return self.config.get("interpret_content_encoding", False)

@cached_property
def sanitize_null_text_characters(self) -> bool:
"""Whether to sanitize null text characters.
Returns:
True if the feature is enabled, False otherwise.
"""
return self.config.get("sanitize_null_text_characters", False)

def prepare_table( # type: ignore[override] # noqa: PLR0913
self,
full_table_name: str | FullyQualifiedName,
Expand Down
35 changes: 33 additions & 2 deletions target_postgres/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,27 @@ def generate_temp_table_name(self):
# in postgres, used a guid just in case we are using the same session
return f"{str(uuid.uuid4()).replace('-', '_')}"

def sanitize_null_text_characters(self, data):
"""Sanitizes null characters by replacing \u0000 with \ufffd."""

def replace_null_character(d):
return d.replace("\u0000", "\ufffd")

if isinstance(data, str):
data = replace_null_character(data)

elif isinstance(data, dict):
for k in data:
if isinstance(data[k], str):
data[k] = replace_null_character(data[k])

elif isinstance(data, list):
for i in range(0, len(data)):
if isinstance(data[i], str):
data[i] = replace_null_character(data[i])

return data

def generate_copy_statement(
self,
full_table_name: str | FullyQualifiedName,
Expand Down Expand Up @@ -204,7 +225,12 @@ def bulk_insert_records( # type: ignore[override]
unique_records: dict[tuple, dict] = {} # pk tuple: values
for record in records:
insert_record = {
column.name: record.get(column.name) for column in columns
column.name: (
self.sanitize_null_text_characters(record.get(column.name))
if self.connector.sanitize_null_text_characters
else record.get(column.name)
)
for column in columns
}
# No need to check for a KeyError here because the SDK already
# guarantees that all key properties exist in the record.
Expand All @@ -214,7 +240,12 @@ def bulk_insert_records( # type: ignore[override]
else:
for record in records:
insert_record = {
column.name: record.get(column.name) for column in columns
column.name: (
self.sanitize_null_text_characters(record.get(column.name))
if self.connector.sanitize_null_text_characters
else record.get(column.name)
)
for column in columns
}
data.append(insert_record)

Expand Down
11 changes: 11 additions & 0 deletions target_postgres/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,17 @@ def __init__(
"in an error if the data is not encoded as expected."
),
),
th.Property(
"sanitize_null_text_characters",
th.BooleanType,
default=False,
description=(
"If set to true, the target will sanitize null characters in "
"char/text/varchar fields, as they are not supported by Postgres. "
"See [postgres documentation](https://www.postgresql.org/docs/current/functions-string.html) " # noqa: E501
"for more information about chr(0) not being supported."
),
),
th.Property(
"ssl_enable",
th.BooleanType,
Expand Down

0 comments on commit f7ebe6b

Please sign in to comment.