From b7e3fe3e748ddff9ea4e04a7fc956903f26098a0 Mon Sep 17 00:00:00 2001 From: Nobert Mumo Date: Fri, 20 Dec 2024 20:30:45 +0300 Subject: [PATCH] adding logic for the immunization data --- dbt_project.yml | 14 +++++---- models/base_layer/staging_base_layer.yml | 8 ----- models/dimensional_layer/dim_client.sql | 30 +++++++++++++++++++ models/dimensional_layer/dim_date.sql | 9 +++--- models/dimensional_layer/dim_facility.sql | 23 ++++++++++++++ .../dim_vaccination_status.sql | 14 +++++++++ models/dimensional_layer/dim_vaccine.sql | 20 +++++++++++++ .../dimensional_layer_schema.yml | 2 ++ models/dimensional_layer/fct_immunization.sql | 24 +++++++++++++++ .../intermediate_layer_schema.yml | 2 ++ models/sources.yml | 26 ++++++++++++---- models/staging_layer/staging_layer_schema.yml | 25 ++++++++++++++++ .../staging_layer/stg_client_demographics.sql | 3 ++ models/staging_layer/stg_facility.sql | 3 ++ models/staging_layer/stg_immunization.sql | 3 ++ models/staging_layer/stg_vaccine.sql | 3 ++ .../aggregate_immunization.sql | 21 +++++++++++++ .../universal_semantic_layer.yml | 1 + 18 files changed, 209 insertions(+), 22 deletions(-) delete mode 100644 models/base_layer/staging_base_layer.yml create mode 100644 models/dimensional_layer/dim_client.sql create mode 100644 models/dimensional_layer/dim_facility.sql create mode 100644 models/dimensional_layer/dim_vaccination_status.sql create mode 100644 models/dimensional_layer/dim_vaccine.sql create mode 100644 models/dimensional_layer/dimensional_layer_schema.yml create mode 100644 models/dimensional_layer/fct_immunization.sql create mode 100644 models/intermediate_layer/intermediate_layer_schema.yml create mode 100644 models/staging_layer/staging_layer_schema.yml create mode 100644 models/staging_layer/stg_client_demographics.sql create mode 100644 models/staging_layer/stg_facility.sql create mode 100644 models/staging_layer/stg_immunization.sql create mode 100644 models/staging_layer/stg_vaccine.sql create mode 100644 models/universal_semantic_layer/aggregate_immunization.sql create mode 100644 models/universal_semantic_layer/universal_semantic_layer.yml diff --git a/dbt_project.yml b/dbt_project.yml index 7d42ddf..25c751f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -10,16 +10,18 @@ profile: 'palladium_kenya_analytics' # Configuring models models: palladium_kenya_analytics: - base_layer: + staging_layer: +materialized: table - +schema: staging_base_tables + +schema: staging_layer dimensional_layer: +materialized: table +schema: dimensional_layer - reporting_layer: + intermediate_layer: +materialized: table - +schema: reporting - + +schema: intermediate_layer + universal_semantic_layer: + +materialized: table + +schema: universal_semantic_layer # Configuring where test failures are saved tests: +store_failures: true @@ -29,6 +31,8 @@ tests: # Configuring varibales to be used in the transformations vars: years_back: 3 + unknown_key: "md5(cast('-1' as varchar))" + date_unknown_key: "md5(cast('1900-01-01' as varchar))" # These configurations specify where dbt should look for different types of files. model-paths: ["models"] diff --git a/models/base_layer/staging_base_layer.yml b/models/base_layer/staging_base_layer.yml deleted file mode 100644 index d607e38..0000000 --- a/models/base_layer/staging_base_layer.yml +++ /dev/null @@ -1,8 +0,0 @@ -version: 2 - -models: - - name: - description: - columns: - - name: - description: diff --git a/models/dimensional_layer/dim_client.sql b/models/dimensional_layer/dim_client.sql new file mode 100644 index 0000000..0b1c67f --- /dev/null +++ b/models/dimensional_layer/dim_client.sql @@ -0,0 +1,30 @@ +with data as ( + select + distinct clientid, + gender, + birthdate, + maritalstatus, + program, + facilityid + from {{ ref('stg_client_demographics') }} + + union + + select + '-1' as clientid, + 'Unknown' as gender, + cast('1900-01-01' as date) as birthdate, + 'Unknown' as maritalstatus, + 'Unkonwn' as program, + -999 as facilityid + +) +select + {{ dbt_utils.surrogate_key(['clientid']) }} as client_key, + clientid as client_id, + gender, + birthdate, + maritalstatus, + program, + facilityid +from data \ No newline at end of file diff --git a/models/dimensional_layer/dim_date.sql b/models/dimensional_layer/dim_date.sql index 0cabd41..8708584 100644 --- a/models/dimensional_layer/dim_date.sql +++ b/models/dimensional_layer/dim_date.sql @@ -8,7 +8,6 @@ with date_spine as ( ), final_data as ( select - {{ dbt_utils.surrogate_key( ['date_day']) }} as date_key, cast(date_day as date) as date, date_part('year', date_day) as Year, date_part('month', date_day) as Month, @@ -17,13 +16,15 @@ final_data as ( union select - 'Unknown' as date_key, '1900-01-01'::date as date, -999 as Year, -999 as Month, -999 CalendarQuarter ) select - final_data.*, - cast(current_date as date) as load_date + {{ dbt_utils.surrogate_key( ['date']) }} as date_key, + date, + Year, + Month, + CalendarQuarter from final_data \ No newline at end of file diff --git a/models/dimensional_layer/dim_facility.sql b/models/dimensional_layer/dim_facility.sql new file mode 100644 index 0000000..235e49b --- /dev/null +++ b/models/dimensional_layer/dim_facility.sql @@ -0,0 +1,23 @@ +with data as ( + select + distinct facilityid, + facilityname , + facilitycountry, + facilityregion + from {{ ref('stg_facility') }} + + union + + select + -1 as facilityid, + 'Unknown' as facilityname, + 'Unknown' as facilitycountry, + 'Unknown' as facilityregion +) +select + {{ dbt_utils.surrogate_key(['facilityid']) }} as facility_key, + facilityid as facility_id, + facilityname as facility_name, + facilitycountry as country, + facilityregion as facility_region +from data \ No newline at end of file diff --git a/models/dimensional_layer/dim_vaccination_status.sql b/models/dimensional_layer/dim_vaccination_status.sql new file mode 100644 index 0000000..40b3a9e --- /dev/null +++ b/models/dimensional_layer/dim_vaccination_status.sql @@ -0,0 +1,14 @@ +with data as ( + select + distinct immunizationstatus + from {{ ref('stg_immunization') }} + + union + + select + '-1' as immunization_status +) +select + {{ dbt_utils.surrogate_key(['immunizationstatus']) }} as immunization_status_key, + immunizationstatus as immunization_status +from data \ No newline at end of file diff --git a/models/dimensional_layer/dim_vaccine.sql b/models/dimensional_layer/dim_vaccine.sql new file mode 100644 index 0000000..bf07363 --- /dev/null +++ b/models/dimensional_layer/dim_vaccine.sql @@ -0,0 +1,20 @@ +with data as ( +select + vaccinecode, + displaylong, + displayshort +from {{ ref('stg_vaccine') }} + +union + +select + '-1' as vaccinecode, + 'Unknown' as displaylong, + 'Unknown' as displayshort +) +select + {{ dbt_utils.surrogate_key(['vaccinecode']) }} as vaccine_key, + vaccinecode as vaccine_code, + displaylong, + displayshort +from data diff --git a/models/dimensional_layer/dimensional_layer_schema.yml b/models/dimensional_layer/dimensional_layer_schema.yml new file mode 100644 index 0000000..f2802b0 --- /dev/null +++ b/models/dimensional_layer/dimensional_layer_schema.yml @@ -0,0 +1,2 @@ +version: 2 + diff --git a/models/dimensional_layer/fct_immunization.sql b/models/dimensional_layer/fct_immunization.sql new file mode 100644 index 0000000..3759e42 --- /dev/null +++ b/models/dimensional_layer/fct_immunization.sql @@ -0,0 +1,24 @@ +select + coalesce(facility_key, {{ var('unknown_key') }}) as facility_key, + coalesce(client_key, {{ var('unknown_key') }}) as client_key, + coalesce(vaccine_key, {{ var('unknown_key') }}) as vaccine_key, + coalesce(occurence.date_key, {{ var('date_unknown_key') }}) as occurence_date_key, + coalesce(status.immunization_status_key, {{ var('unknown_key') }}) as immunization_status_key, + dosequantity as dose_quantity, + dosenumber as dose_number, + case + when age(immunization.occurrencedate, client.birthdate) = interval '12 months' then true + else false + end is_12_months_old_at_immunization, + case + when age(immunization.occurrencedate, client.birthdate) = interval '24 months' then true + else false + end is_24_months_old_at_immunization +from {{ ref ('stg_immunization') }} as immunization +left join {{ ref('dim_client') }} as client on client.client_id = immunization.clientid +left join {{ ref('dim_facility') }} as facility on facility.facility_id = immunization.facilityid +left join {{ ref('dim_vaccine') }} as vaccine on vaccine.vaccine_code = immunization.vaccinecode +left join {{ ref('dim_date') }} as occurence on occurence.date = immunization.occurrencedate +left join {{ ref('dim_vaccination_status') }} as status on status.immunization_status = immunization.immunizationstatus + + diff --git a/models/intermediate_layer/intermediate_layer_schema.yml b/models/intermediate_layer/intermediate_layer_schema.yml new file mode 100644 index 0000000..f2802b0 --- /dev/null +++ b/models/intermediate_layer/intermediate_layer_schema.yml @@ -0,0 +1,2 @@ +version: 2 + diff --git a/models/sources.yml b/models/sources.yml index 75a4120..9b300eb 100644 --- a/models/sources.yml +++ b/models/sources.yml @@ -1,8 +1,24 @@ version: 2 sources: - - name: - database: - schema: + - name: base_layer + database: analytics + schema: base_layer tables: - - name: - description: + - name: clientdemographics + description: Client demographics + - name: clientrelationship + description: client relationships + - name: encounter + description: encounter viits + - name: facility + description: facility list + - name: immunization + description: immunization encounters + - name: lab + description: lab data + - name: medication + description: medication data + - name: organization + description: organization data + - name: vaccine + description: types of vaccines diff --git a/models/staging_layer/staging_layer_schema.yml b/models/staging_layer/staging_layer_schema.yml new file mode 100644 index 0000000..1d8ed0b --- /dev/null +++ b/models/staging_layer/staging_layer_schema.yml @@ -0,0 +1,25 @@ +version: 2 + +models: + - name: stg_client_demographics + description: Data on client demographics where each entry is a single patient + columns: + - name: gender + description: Gender of client + data_tests: + - not_null + - accepted_values: + values: ['FEMALE', 'MALE'] + - name: stg_facility + description: Data on facility details + columns: + - name: facilityname + description: facilityname + data_tests: + - unique + - not_null + - name: stg_immunization + description: Data on immunization encounters + columns: + - name: vaccinecode + description: Universal vaccine code associated with a vaccine diff --git a/models/staging_layer/stg_client_demographics.sql b/models/staging_layer/stg_client_demographics.sql new file mode 100644 index 0000000..390ce37 --- /dev/null +++ b/models/staging_layer/stg_client_demographics.sql @@ -0,0 +1,3 @@ +select + * +from {{ source('base_layer', 'clientdemographics') }} \ No newline at end of file diff --git a/models/staging_layer/stg_facility.sql b/models/staging_layer/stg_facility.sql new file mode 100644 index 0000000..bee4e94 --- /dev/null +++ b/models/staging_layer/stg_facility.sql @@ -0,0 +1,3 @@ +select + * +from {{ source('base_layer', 'facility') }} \ No newline at end of file diff --git a/models/staging_layer/stg_immunization.sql b/models/staging_layer/stg_immunization.sql new file mode 100644 index 0000000..ec9a2d0 --- /dev/null +++ b/models/staging_layer/stg_immunization.sql @@ -0,0 +1,3 @@ +select + * +from {{ source('base_layer', 'immunization') }} \ No newline at end of file diff --git a/models/staging_layer/stg_vaccine.sql b/models/staging_layer/stg_vaccine.sql new file mode 100644 index 0000000..70651df --- /dev/null +++ b/models/staging_layer/stg_vaccine.sql @@ -0,0 +1,3 @@ +select + * +from {{ source('base_layer', 'vaccine') }} \ No newline at end of file diff --git a/models/universal_semantic_layer/aggregate_immunization.sql b/models/universal_semantic_layer/aggregate_immunization.sql new file mode 100644 index 0000000..043e7b4 --- /dev/null +++ b/models/universal_semantic_layer/aggregate_immunization.sql @@ -0,0 +1,21 @@ + select + facility.facility_name, + client.gender, + vaccine.displaylong as vaccine_name, + status.immunization_status, + dim_date."date" as date_of_immunization, + count(distinct case when immunization_data.is_12_months_old_at_immunization = true then client.client_key end) as no_children_immunized_at_12_months, + count(distinct case when immunization_data.is_24_months_old_at_immunization = true then client.client_key end) as no_children_immunized_at_24_months + from {{ ref('fct_immunization') }} as immunization_data + left join {{ ref('dim_facility') }} as facility on facility.facility_key = immunization_data.facility_key + left join {{ ref('dim_client') }} as client on client.client_key = immunization_data.client_key + left join {{ ref('dim_vaccine') }} as vaccine on vaccine.vaccine_key = immunization_data.vaccine_key + left join {{ ref('dim_vaccination_status') }} as status on status.immunization_status_key = immunization_data.immunization_status_key + left join {{ ref('dim_date') }} as dim_date on dim_date.date_key = immunization_data.occurence_date_key + where status.immunization_status = 'Fully Immunized' + group by + facility.facility_name, + client.gender, + vaccine.displaylong, + status.immunization_status, + dim_date."date" \ No newline at end of file diff --git a/models/universal_semantic_layer/universal_semantic_layer.yml b/models/universal_semantic_layer/universal_semantic_layer.yml new file mode 100644 index 0000000..22817d2 --- /dev/null +++ b/models/universal_semantic_layer/universal_semantic_layer.yml @@ -0,0 +1 @@ +version: 2