From a7d2a6a576ec8cb7c405702deaf4b40ea9d4d848 Mon Sep 17 00:00:00 2001 From: crwr45 Date: Wed, 5 Aug 2020 12:32:14 +0100 Subject: [PATCH] Feature: Use Postgres env vars in absence of explicit config This allows the tap to be added into existing Postgres environments more easily, and reduces configuration duplication. The pre-existing config still takes precedence, and the defaults in the absence of config and env vars are what they were before. Signed-off-by: crwr45 --- README.md | 57 ++++++++++++++++++++----------------- target_postgres/__init__.py | 29 +++++++++++++++++-- 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index e30c2ab2..1b08586d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ pip install singer-target-postgres 1. Create a [config file](#configjson) at `~/singer.io/target_postgres_config.json` with postgres connection information and target postgres schema. + If a value is not set in the config file, the target will use the normal + [PostgreSQL env vars](https://www.postgresql.org/docs/current/libpq-envars.html) + for Postgres configuration before taking the default. ```json { @@ -64,32 +67,34 @@ pip install singer-target-postgres The fields available to be specified in the config file are specified here. - -| Field | Type | Default | Details | -| --------------------------- | --------------------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `postgres_host` | `["string", "null"]` | `"localhost"` | | -| `postgres_port` | `["integer", "null"]` | `5432` | | -| `postgres_database` | `["string"]` | `N/A` | | -| `postgres_username` | `["string", "null"]` | `N/A` | | -| `postgres_password` | `["string", "null"]` | `null` | | -| `postgres_schema` | `["string", "null"]` | `"public"` | | -| `postgres_sslmode` | `["string", "null"]` | `"prefer"` | Refer to the [libpq](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) docs for more information about SSL | -| `postgres_sslcert` | `["string", "null"]` | `"~/.postgresql/postgresql.crt"` | Only used if a SSL request w/ a client certificate is being made | -| `postgres_sslkey` | `["string", "null"]` | `"~/.postgresql/postgresql.key"` | Only used if a SSL request w/ a client certificate is being made | -| `postgres_sslrootcert` | `["string", "null"]` | `"~/.postgresql/root.crt"` | Used for authentication of a server SSL certificate | -| `postgres_sslcrl` | `["string", "null"]` | `"~/.postgresql/root.crl"` | Used for authentication of a server SSL certificate | -| `invalid_records_detect` | `["boolean", "null"]` | `true` | Include `false` in your config to disable `target-postgres` from crashing on invalid records | -| `invalid_records_threshold` | `["integer", "null"]` | `0` | Include a positive value `n` in your config to allow for `target-postgres` to encounter at most `n` invalid records per stream before giving up. | -| `disable_collection` | `["string", "null"]` | `false` | Include `true` in your config to disable [Singer Usage Logging](#usage-logging). | -| `logging_level` | `["string", "null"]` | `"INFO"` | The level for logging. Set to `DEBUG` to get things like queries executed, timing of those queries, etc. See [Python's Logger Levels](https://docs.python.org/3/library/logging.html#levels) for information about valid values. | -| `persist_empty_tables` | `["boolean", "null"]` | `False` | Whether the Target should create tables which have no records present in Remote. | -| `max_batch_rows` | `["integer", "null"]` | `200000` | The maximum number of rows to buffer in memory before writing to the destination table in Postgres | -| `max_buffer_size` | `["integer", "null"]` | `104857600` (100MB in bytes) | The maximum number of bytes to buffer in memory before writing to the destination table in Postgres | -| `batch_detection_threshold` | `["integer", "null"]` | `5000`, or 1/40th `max_batch_rows` | How often, in rows received, to count the buffered rows and bytes to check if a flush is necessary. There's a slight performance penalty to checking the buffered records count or bytesize, so this controls how often this is polled in order to mitigate the penalty. This value is usually not necessary to set as the default is dynamically adjusted to check reasonably often. | -| `state_support` | `["boolean", "null"]` | `True` | Whether the Target should emit `STATE` messages to stdout for further consumption. In this mode, which is on by default, STATE messages are buffered in memory until all the records that occurred before them are flushed according to the batch flushing schedule the target is configured with. | -| `add_upsert_indexes` | `["boolean", "null"]` | `True` | Whether the Target should create column indexes on the important columns used during data loading. These indexes will make data loading slightly slower but the deduplication phase much faster. Defaults to on for better baseline performance. | -| `before_run_sql` | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. | -| `after_run_sql` | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. | +If a field is not set, the value from the standard[PostgreSQL env vars](https://www.postgresql.org/docs/current/libpq-envars.html) +will be used if available and set. Finally, the Default value will be used. + +| Field | Fallback Env Var | Type | Default | Details | +| --------------------------- | ---------------- | --------------------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `postgres_host` | `PGHOST` | `["string", "null"]` | `"localhost"` | | +| `postgres_port` | `PGPORT` | `["integer", "null"]` | `5432` | | +| `postgres_database` | `PGDATABASE` | `["string"]` | `N/A` | | +| `postgres_username` | `PGUSER` | `["string", "null"]` | `N/A` | | +| `postgres_password` | `PGPASSWORD` | `["string", "null"]` | `null` | | +| `postgres_schema` | | `["string", "null"]` | `"public"` | | +| `postgres_sslmode` | `PGSSLMODE` | `["string", "null"]` | `"prefer"` | Refer to the [libpq](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) docs for more information about SSL | +| `postgres_sslcert` | `PGSSLCERT` | `["string", "null"]` | `"~/.postgresql/postgresql.crt"` | Only used if a SSL request w/ a client certificate is being made | +| `postgres_sslkey` | `PGSSLKEY` | `["string", "null"]` | `"~/.postgresql/postgresql.key"` | Only used if a SSL request w/ a client certificate is being made | +| `postgres_sslrootcert` | `PGSSLROOTCERT` | `["string", "null"]` | `"~/.postgresql/root.crt"` | Used for authentication of a server SSL certificate | +| `postgres_sslcrl` | `PGSSLCRL` | `["string", "null"]` | `"~/.postgresql/root.crl"` | Used for authentication of a server SSL certificate | +| `invalid_records_detect` | | `["boolean", "null"]` | `true` | Include `false` in your config to disable `target-postgres` from crashing on invalid records | +| `invalid_records_threshold` | | `["integer", "null"]` | `0` | Include a positive value `n` in your config to allow for `target-postgres` to encounter at most `n` invalid records per stream before giving up. | +| `disable_collection` | | `["string", "null"]` | `false` | Include `true` in your config to disable [Singer Usage Logging](#usage-logging). | +| `logging_level` | | `["string", "null"]` | `"INFO"` | The level for logging. Set to `DEBUG` to get things like queries executed, timing of those queries, etc. See [Python's Logger Levels](https://docs.python.org/3/library/logging.html#levels) for information about valid values. | +| `persist_empty_tables` | | `["boolean", "null"]` | `False` | Whether the Target should create tables which have no records present in Remote. | +| `max_batch_rows` | | `["integer", "null"]` | `200000` | The maximum number of rows to buffer in memory before writing to the destination table in Postgres | +| `max_buffer_size` | | `["integer", "null"]` | `104857600` (100MB in bytes) | The maximum number of bytes to buffer in memory before writing to the destination table in Postgres | +| `batch_detection_threshold` | | `["integer", "null"]` | `5000`, or 1/40th `max_batch_rows` | How often, in rows received, to count the buffered rows and bytes to check if a flush is necessary. There's a slight performance penalty to checking the buffered records count or bytesize, so this controls how often this is polled in order to mitigate the penalty. This value is usually not necessary to set as the default is dynamically adjusted to check reasonably often. | +| `state_support` | | `["boolean", "null"]` | `True` | Whether the Target should emit `STATE` messages to stdout for further consumption. In this mode, which is on by default, STATE messages are buffered in memory until all the records that occurred before them are flushed according to the batch flushing schedule the target is configured with. | +| `add_upsert_indexes` | | `["boolean", "null"]` | `True` | Whether the Target should create column indexes on the important columns used during data loading. These indexes will make data loading slightly slower but the deduplication phase much faster. Defaults to on for better baseline performance. | +| `before_run_sql` | | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. | +| `after_run_sql` | | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. | ### Supported Versions diff --git a/target_postgres/__init__.py b/target_postgres/__init__.py index 6531bb23..6e22a673 100644 --- a/target_postgres/__init__.py +++ b/target_postgres/__init__.py @@ -1,3 +1,5 @@ +import os + from singer import utils import psycopg2 @@ -9,6 +11,20 @@ ] +CONFIG_TO_ENV_MAPPING = { + 'postgres_host': 'PGHOST', + 'postgres_port': 'PGPORT', + 'postgres_database': 'PGDATABASE', + 'postgres_username': 'PGUSER', + 'postgres_password': 'PGPASSWORD', + 'postgres_sslmode': 'PGSSLMODE', + 'postgres_sslcert': 'PGSSLCERT', + 'postgres_sslkey': 'PGSSLKEY', + 'postgres_sslrootcert': 'PGSSLROOTCERT', + 'postgres_sslcrl': 'PGSSLCRL', +} + + def main(config, input_stream=None): with psycopg2.connect( connection_factory=MillisLoggingConnection, @@ -39,7 +55,16 @@ def main(config, input_stream=None): target_tools.main(postgres_target) +def fallback_to_env_vars(config): + for conf_key, env_var in CONFIG_TO_ENV_MAPPING.items(): + if config.get(conf_key) is None: + config[conf_key] = os.environ.get(env_var) + return config + + def cli(): - args = utils.parse_args(REQUIRED_CONFIG_KEYS) + args = utils.parse_args() + config = fallback_to_env_vars(args.config) + utils.check_config(config, REQUIRED_CONFIG_KEYS) - main(args.config) + main(config)