diff --git a/README.md b/README.md index 855bfc9..05ad658 100644 --- a/README.md +++ b/README.md @@ -72,14 +72,14 @@ We recommend that you add this package as a dependency to your own `dbt` package ### Package customizations We advise you to customize your package if you -- Want to specify your own paid sources with `paid_sources`. +- Want to specify your own paid sources with `ga4_data_paid_sources`. For instance, if Google marks Reddit ads as organic, you can define them as paid. -- Want to unpack more event parameters with `extra_event_params`. +- Want to unpack more event parameters with `ga4_data_extra_event_params`. -The columns name for `extra_event_params` and `paid_sources` may be configured in `dbt_project.yml` or by passing the variables in command line: +The columns name for `ga4_data_extra_event_params` and `ga4_data_paid_sources` may be configured in `dbt_project.yml` or by passing the variables in command line: ```shell -dbt run --profiles-dir . --vars '{schema_name: , paid_sources: ["reddit.com", "youtube.com"], extra_event_params: ["page_referrer"]}' --fail-fast +dbt run --profiles-dir . --vars '{ga4_data_schema_name: , ga4_data_paid_sources: ["reddit.com", "youtube.com"], ga4_data_extra_event_params: ["page_referrer"]}' --fail-fast ``` @@ -209,7 +209,7 @@ Options: ```shell python bigquery_pipeline.py --table events --month 11 --year 2023 \ --destination bigquery --dataset test_dataset --pipeline_name my_bigquery_pipeline \ - --dbt_run_params "--fail-fast --full-refresh" --dbt_additional_vars "paid_sources=['reddit.com']" + --dbt_run_params "--fail-fast --full-refresh" --dbt_additional_vars "ga4_data_paid_sources=['reddit.com']" ``` Read more about a running pipeline: [Run a pipeline.](https://dlthub.com/docs/walkthroughs/run-a-pipeline) diff --git a/bigquery_pipeline.py b/bigquery_pipeline.py index 52675ab..4af8662 100644 --- a/bigquery_pipeline.py +++ b/bigquery_pipeline.py @@ -54,7 +54,7 @@ def transform_data(pipeline, dbt_run_params, dbt_additional_vars: Optional[dict] # and the `venv` argument to dlt.dbt.package() venv = dlt.dbt.get_venv(pipeline) dbt = dlt.dbt.package(pipeline, "dbt_transform", venv=venv) - additional_vars = {"schema_name": pipeline.dataset_name} + additional_vars = {"ga4_data_schema_name": pipeline.dataset_name} if dbt_additional_vars: additional_vars.update(dbt_additional_vars) # run the models and collect any info diff --git a/dbt_project.yml b/dbt_project.yml index 8ad7299..66575b3 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -11,9 +11,9 @@ require-dbt-version: [">=1.7.0", "<2.0.0"] profile: 'ga4_schema_redshift' vars: - schema_name: GA4_DATA - paid_sources: ['reddit.com'] - extra_event_params: + ga4_data_schema_name: GA4_DATA + ga4_data_paid_sources: ['reddit.com'] + ga4_data_extra_event_params: - 'page_referrer' # These configurations specify where dbt should look for different types of files. diff --git a/dbt_transform/models/dimensions/dim_traffic_source.sql b/dbt_transform/models/dimensions/dim_traffic_source.sql index 2ee0166..15954e9 100644 --- a/dbt_transform/models/dimensions/dim_traffic_source.sql +++ b/dbt_transform/models/dimensions/dim_traffic_source.sql @@ -5,7 +5,7 @@ SELECT traffic_source__medium, traffic_source__source, CASE - WHEN traffic_source__source IN ({{ "'" + var('paid_sources')|join("', '") + "'" }}) THEN 'paid' + WHEN traffic_source__source IN ({{ "'" + var('ga4_data_paid_sources')|join("', '") + "'" }}) THEN 'paid' ELSE 'organic' END as custom_source FROM diff --git a/dbt_transform/models/sources.yml b/dbt_transform/models/sources.yml index 82e53a8..c8d2644 100644 --- a/dbt_transform/models/sources.yml +++ b/dbt_transform/models/sources.yml @@ -2,7 +2,7 @@ version: 2 sources: - name: dlt_metadata - schema: "{{ var('schema_name') }}" + schema: "{{ var('ga4_data_schema_name') }}" tables: - name: _dlt_loads - name: _dlt_version @@ -12,7 +12,7 @@ sources: - dbt_expectations.expect_column_to_exist - name: ga4_select_star - schema: "{{ var('schema_name') }}" + schema: "{{ var('ga4_data_schema_name') }}" tables: - name: events - name: events__event_params diff --git a/dbt_transform/models/staging/stg_event_params_unpacked.sql b/dbt_transform/models/staging/stg_event_params_unpacked.sql index 38e356c..0b5cd41 100644 --- a/dbt_transform/models/staging/stg_event_params_unpacked.sql +++ b/dbt_transform/models/staging/stg_event_params_unpacked.sql @@ -43,7 +43,7 @@ SELECT END ) as {{ param }} {% endfor %} - {% for param in var('extra_event_params') %} + {% for param in var('ga4_data_extra_event_params') %} , MAX( CASE WHEN event_name = '{{ param }}' THEN param_value diff --git a/profiles.yml b/profiles.yml index 40199bf..a308e16 100644 --- a/profiles.yml +++ b/profiles.yml @@ -8,7 +8,7 @@ ga4_schema_redshift: password: "{{ env_var('PG_PASSWORD') }}" port: "{{ env_var('PG_PORT', 5439) | as_number }}" dbname: "{{ env_var('PG_DATABASE_NAME') }}" - schema: "{{ var('schema_name') }}" + schema: "{{ var('ga4_data_schema_name') }}" threads: 4 keepalives_idle: 0 # default 0, indicating the system default connect_timeout: 10 # default 10 seconds @@ -30,7 +30,7 @@ ga4_schema_snowflake: role: "{{ env_var('SF_ROLE') }}" database: "{{ env_var('SF_DATABASE_NAME') }}" warehouse: "{{ env_var('SF_WAREHOUSE') }}" - schema: "{{ var('schema_name') }}" + schema: "{{ var('ga4_data_schema_name') }}" threads: 4 # optional @@ -59,7 +59,7 @@ ga4_schema_snowflake_key_pair: role: "{{ env_var('SF_ROLE') }}" database: "{{ env_var('SF_DATABASE_NAME') }}" warehouse: "{{ env_var('SF_WAREHOUSE') }}" - schema: "{{ var('schema_name') }}" + schema: "{{ var('ga4_data_schema_name') }}" threads: 4 # optional @@ -76,7 +76,7 @@ ga4_schema_bigquery: type: bigquery method: service-account-json project: "{{ env_var('BQ_PROJECT_ID') }}" - schema: "{{ var('schema_name') }}" + schema: "{{ var('ga4_data_schema_name') }}" keyfile_json: type: service_account project_id: "{{ env_var('BQ_PROJECT_ID') }}" @@ -94,7 +94,7 @@ ga4_schema_bigquery_service_file: type: bigquery method: service-account project: "{{ env_var('BQ_PROJECT_ID') }}" - schema: "{{ var('schema_name') }}" + schema: "{{ var('ga4_data_schema_name') }}" keyfile: "{{ env_var('BQ_KEY_FILE_PATH') }}" threads: 4 keepalives_idle: 0 # default 0, indicating the system default