From 3bd50f25b8553fb7fbdf6b8d44d7f9be9a8722d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Fri, 22 Nov 2024 12:33:42 -0600 Subject: [PATCH] Use setting for opt-in --- .pre-commit-config.yaml | 11 ----------- README.md | 1 + pyproject.toml | 3 +++ target_postgres/connector.py | 2 +- target_postgres/sinks.py | 19 +++++++++++++++---- target_postgres/target.py | 10 ++++++++++ 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5d0da2f..17ee9f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,14 +23,3 @@ repos: - id: ruff args: [--fix] - id: ruff-format - -- repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.13.0' - hooks: - - id: mypy - exclude: tests - additional_dependencies: - - types-paramiko - - types-simplejson - - types-sqlalchemy - - types-jsonschema diff --git a/README.md b/README.md index 4880900..d995330 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ This target is tested with all actively supported [Python](https://devguide.pyth | user | False | None | User name used to authenticate. | | password | False | None | Password used to authenticate. | | database | False | None | Database name. | +| use_copy | False | None | Use the COPY command to insert data. This is usually faster than INSERT statements. | | default_target_schema | False | melty | Postgres schema to send data to, example: tap-clickup | | activate_version | False | 1 | If set to false, the tap will ignore activate version messages. If set to true, add_record_metadata must be set to true as well. | | hard_delete | False | 0 | When activate version is sent from a tap this specefies if we should delete the records that don't match, or mark them with a date in the `_sdc_deleted_at` column. This config option is ignored if `activate_version` is set to false. | diff --git a/pyproject.toml b/pyproject.toml index fb439da..d2144ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,9 @@ types-jsonschema = ">=4.19.0.3" [tool.mypy] exclude = "tests" +warn_redundant_casts = true +warn_unused_configs = true +warn_unused_ignores = true [[tool.mypy.overrides]] module = ["sshtunnel"] diff --git a/target_postgres/connector.py b/target_postgres/connector.py index 28713e8..f627876 100644 --- a/target_postgres/connector.py +++ b/target_postgres/connector.py @@ -756,7 +756,7 @@ def guess_key_type(self, key_data: str) -> paramiko.PKey: paramiko.Ed25519Key, ): try: - key = key_class.from_private_key(io.StringIO(key_data)) # type: ignore[attr-defined] + key = key_class.from_private_key(io.StringIO(key_data)) except paramiko.SSHException: # noqa: PERF203 continue else: diff --git a/target_postgres/sinks.py b/target_postgres/sinks.py index 50ac888..d6959cd 100644 --- a/target_postgres/sinks.py +++ b/target_postgres/sinks.py @@ -122,7 +122,7 @@ def generate_temp_table_name(self): def generate_copy_statement( self, full_table_name: str | FullyQualifiedName, - columns: list[sa.Column], # type: ignore[override] + columns: list[sa.Column], ) -> str: """Generate a copy statement for bulk copy. @@ -196,8 +196,6 @@ def bulk_insert_records( # type: ignore[override] True if table exists, False if not, None if unsure or undetectable. """ columns = self.column_representation(schema) - copy_statement: str = self.generate_copy_statement(table.name, columns) - self.logger.info("Inserting with SQL: %s", copy_statement) data: list[dict[str, t.Any]] = [] @@ -220,7 +218,20 @@ def bulk_insert_records( # type: ignore[override] } data.append(insert_record) - self._do_copy(connection, copy_statement, columns, data) + if self.config["use_copy"]: + copy_statement: str = self.generate_copy_statement(table.name, columns) + self.logger.info("Inserting with SQL: %s", copy_statement) + self._do_copy(connection, copy_statement, columns, data) + else: + insert: str = t.cast( + str, + self.generate_insert_statement( + table.name, + columns, + ), + ) + self.logger.info("Inserting with SQL: %s", insert) + connection.execute(insert, data) return True diff --git a/target_postgres/target.py b/target_postgres/target.py index d2830da..4c7462c 100644 --- a/target_postgres/target.py +++ b/target_postgres/target.py @@ -138,6 +138,16 @@ def __init__( th.StringType, description="Database name.", ), + th.Property( + "use_copy", + th.BooleanType, + default=False, + description=( + "Use the COPY command to insert data. This is usually faster than " + "INSERT statements." + ), + title="Use COPY", + ), th.Property( "sqlalchemy_url", th.StringType,