From 7da229e343f2fa698c323b5155097f88b158391f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Mon, 27 Jan 2025 17:21:29 -0600 Subject: [PATCH] feat: Support `x-sql-datatype` for integer types --- README.md | 25 +++++++++++++++++++++++++ target_postgres/connector.py | 3 +++ target_postgres/tests/test_types.py | 8 ++++++++ 3 files changed, 36 insertions(+) diff --git a/README.md b/README.md index 5cbb6e2..9f1b413 100644 --- a/README.md +++ b/README.md @@ -302,6 +302,18 @@ If a column has multiple jsonschema types, the following order is using to order - BOOLEAN - NOTYPE +### `x-sql-datatype` extension + +This target supports the [`x-sql-datatype` extension](https://sdk.meltano.com/en/latest/guides/sql-target.html#use-the-x-sql-datatype-json-schema-extension) to the JSON schema. This extension allows you to specify the Postgres data type that should be used for a given field. This can be useful when the default mapping is not what you want. + + + +| `x-sql-datatype` | Postgres | Description | +| :--------------- | :------- | :----------------------------------------------------------------- | +| smallint | smallint | small-range integer (-32768 to +32767) | +| integer | integer | typical choice for integer (-2147483648 to +2147483647) | +| bigint | bigint | large-range integer (-9223372036854775808 to +9223372036854775807) | + ### Using the Singer catalog to narrow down the Postgres data types You can use [Singer catalog's schema](https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#schemas) to override the data types coming from the tap. The easiest way to do this is to use Meltano and its [`schema` setting](https://docs.meltano.com/concepts/plugins/#schema-extra) for the tap: @@ -320,6 +332,19 @@ plugins: maximum: 1000 ``` +Or to use the `x-sql-datatype` extension: + +```yaml +# meltano.yml +plugins: + extractors: + - name: tap-my-tap + schema: + some_stream_id: + my_column: + x-sql-datatype: smallint +``` + ## Content Encoding Support Json Schema supports the [`contentEncoding` keyword](https://datatracker.ietf.org/doc/html/rfc4648#section-8), which can be used to specify the encoding of input string types. diff --git a/target_postgres/connector.py b/target_postgres/connector.py index 228a3f0..75bfa6f 100644 --- a/target_postgres/connector.py +++ b/target_postgres/connector.py @@ -310,6 +310,9 @@ def jsonschema_to_sql(self) -> JSONSchemaToSQL: to_sql.register_format_handler("hostname", TEXT) to_sql.register_format_handler("ipv4", TEXT) to_sql.register_format_handler("ipv6", TEXT) + to_sql.register_sql_datatype_handler("smallint", SMALLINT) + to_sql.register_sql_datatype_handler("integer", INTEGER) + to_sql.register_sql_datatype_handler("bigint", BIGINT) return to_sql def to_sql_type(self, jsonschema_type: dict) -> sa.types.TypeEngine: diff --git a/target_postgres/tests/test_types.py b/target_postgres/tests/test_types.py index 065ca32..436aa8d 100644 --- a/target_postgres/tests/test_types.py +++ b/target_postgres/tests/test_types.py @@ -93,6 +93,14 @@ def test_datetime_string(self, to_postgres: JSONSchemaToPostgres): BIGINT, id="bigint", ), + pytest.param( + { + "type": "integer", + "x-sql-datatype": "smallint", + }, + SMALLINT, + id="x-sql-datatype-smallint", + ), ], ) def test_integers(