diff --git a/.sonarcloud.properties b/.sonarcloud.properties new file mode 100644 index 00000000..ef6ec89c --- /dev/null +++ b/.sonarcloud.properties @@ -0,0 +1 @@ +sonar.exclusions=macros/max_existing_multiro_macros.sql, models/contacts/contact.sql, models/root/stable_couchdb.sql \ No newline at end of file diff --git a/macros/max_existing_multiro_macros.sql b/macros/max_existing_multiro_macros.sql new file mode 100644 index 00000000..b949d8a2 --- /dev/null +++ b/macros/max_existing_multiro_macros.sql @@ -0,0 +1,237 @@ +{% macro max_existing(max_field, target_ref=this) -%} +{# + Attribution: https://gist.github.com/davehowell/1d6564875f35e58d8da14c9adbcb92da +#} +{# + Gets the max existing value for some field in the target, or some other ref or source. + Use in incremental models, inside `if is_incremental()` or `if execute` blocks, + otherwise the dbt model will not compile. + Useful where you have a primary key or other watermark field and want to construct SQL with + that value determined at compile time. + Why? + When using the max value multiple times in a query it will be better than inlining multiple + subqueries to fetch the same value, and in many cases, hardcoding a value in a where clause + generates a significantly better execution plan. + + Params: max_field - string, the name of the field + target_ref - string, pass in a call to ref or source to ensure dag dependency + defaults to this (current context model) + Return: A literal string of the max value - will not have any quoting + Usage: To return dates use `max_existing_date` + To return timestamps use `max_existing_timestamp` + + For integers: + + SELECT pk_column [,col2 ...] + FROM source_table_ref + WHERE True + {%- if is_incremental() %} + AND some_int_column > {{ max_existing('some_int_column') }} + {%- endif %} + + For strings - n.b. probably not useful to use strings as watermarks + + SELECT pk_column [,col2 ...] + FROM source_table_ref + WHERE True + {%- if is_incremental() %} + AND some_string_column > '{{ max_existing("some_string_column") }}' + {%- endif %} +#} + {% call statement('get_max_existing', fetch_result=True)-%} + + SELECT max({{ max_field }}) as max_existing + FROM {{ '"' ~ this.schema ~ '"' ~ '.' ~ '"' ~ this.name~ '"' if target_ref in [this] else target_ref }} + + {%- endcall %} + {% set max_existing_field = load_result('get_max_existing').table.columns['max_existing'].values()[0] %} + {% if max_existing_field == 'NULL' %} + {{ return('1990-01-01 23:00.000') }} + {% else %} + {{ return(max_existing_field) }} + {% endif %} +{%- endmacro %} + + +{% macro _max_existing_multirow(max_field, group_by_field) -%} +{# + Internal usage. Gets the max existing values for multiple groups in a target table. + + Use in incremental model, + inside `if is_incremental()` or `if execute` otherwise + the dbt model will not compile. + Params: + max_field - column or valid SQL expression to be maxed + group_by_field - column or valid SQL expression to use in group by clause + Return: agate.Table.rows https://agate.readthedocs.io/en/1.6.1/api/table.html + + Usage: + See the public version of this macro. +#} + {% call statement('get_max_existing_multirow', fetch_result=True)-%} + + SELECT + {{ group_by_field }} as group_by_field, + max({{ max_field }}) AS max_existing + FROM {{ this.schema }}.{{ this.name }} + GROUP BY {{ group_by_field }} + + {%- endcall %} + {% set result = load_result('get_max_existing_multirow') %} + + {{ return(result.table.rows) }} +{%- endmacro %} + +{% macro _two_column_matrix_to_list_dicts(agate_rows, col1_name, col2_name) %} +{# + Converts an agate.Table.rows to a list of dicts + Params: an agate.Table.rows and names of the two columns + Return: list of dicts with keys col1_name and col2_name + #} + {%- set list_dicts = [] -%} + {%- for row in agate_rows -%} + {{- list_dicts.append( {col1_name: row[0], col2_name : row[1] } ) -}} + {%- endfor -%} + {% for dict_val in list_dicts %} + {{ log(dict_val) }} + {%- endfor -%} + {{ return(list_dicts)}} +{% endmacro %} + + +{% macro max_existing_multirow(max_field, group_by_field) %} +{# + Gets the max existing values for multiple groups in a target table. + Use in incremental models, inside `if is_incremental()` or `if execute` + otherwise the dbt model will not compile. + + Params: + max_field - column or valid SQL expression to be maxed + group_by_field - column or valid SQL expression to use in group by clause + Note: expressions should not have column aliases + Return: list of dicts where dict attributes have the keys 'type' and 'max_val' + + Usage: + Iterate over the rows e.g. + + {%- for row in max_existing_multirow('pk_column', 'type_column') %} + {{ row.pk_column }} + {%- endfor %} + + Useful in an incremental model that selects from a `UNION ALL` across + many source models where the unique_key watermark field might not be unique + across those models. A surrogate key, hash or concatenated field in the target is not + adequate to check watermarks across multiple sources. + If you maintain some lineage indicator then it is possible to fetch all the max values + e.g. + Note for this example the fields 'type' and 'pk' in the target model store which parent + table the row comes from and the respective watermark of that table, + in this case a primary key. The watermark could also be some timestamp like "updated_at" + + + {%- set types = ['ada', 'grace', 'rosie'] %} + + {%- if is_incremental() %} + {%- set max_values_list = mathspace.max_existing_multirow('pk', 'type') %} + {%- endif -%} + + {%- for t in types %} + SELECT pk, col2, col3, coln FROM {{ ref(t) }} + WHERE True + + {%- if is_incremental() %} + AND pk > {{ lookup_max_value(max_values, type) }} + {%- endif -%} + + {%- if not loop.last %} + UNION ALL + {%- endif %} + + {%- endfor %} + +#} + {% set agate_rows = _max_existing_multirow(max_field, group_by_field) %} + {% set list_dicts = _two_column_matrix_to_list_dicts(agate_rows, 'type', 'max_val') %} + {{ return(list_dicts) }} +{% endmacro %} + +{% macro lookup_max_value(max_values_list, lookup_value, lookup_key='type', return_key='max_val') %} +{# + Use with the result of the max_existing_multirow macro. + Does a lookup into that list of dicts for a given lookup_key. + + Params: max_values - the output of the max_existing_multirow macro, a list of dicts + must have a 'type' and 'max_val' key + Return: literal string of the max value if found or None + None will print blank and probably cause an error in the SQL of your model. +#} + {{ return (max_values_list | selectattr(lookup_key, 'eq', lookup_value) | map(attribute=return_key) | list | first) }} +{% endmacro %} + +{% macro _cast_as_type(stringvalue, as_type) %} + {{ return("CAST('" ~ stringvalue ~ "' AS " ~ as_type ~ ")") }} +{% endmacro %} + + +{% macro max_existing_timestamp(timestampfield, target_ref=this) -%} +{# + Params: + timestampfield: date or timestamp + Returns a SQL expression casting the max value string to a timestamp. + e.g. "CAST('2019-01-01 23:00.000' AS TIMESTAMP)" +#} + {{ return(_cast_as_type(max_existing(timestampfield, target_ref), 'TIMESTAMP')) }} +{%- endmacro %} + + +{% macro max_existing_date(timestampfield, target_ref=this) -%} +{# + Params: + timestampfield: date or timestamp + target_ref: string, optional, defaults to the current context model + pass in a call to ref or source to target a different model + Returns a SQL expression casting the max value string to a timestamp. + e.g. "CAST('2019-01-01' AS DATE)" + Usage: + current model - latest value of a field called "updated_at": + {{ max_existing_date('updated_at') }} + + some other model ref - latest value of a field called "deactivated_at": + {{ max_existing_date('deactivated_at', ref('other_model')) }} + + some other model source - latest value of a field called "deactivated_at": + {{ max_existing_date('deactivated_at', source('source_name', 'table_or_view_name')) }} + +#} + {{ return(_cast_as_type(max_existing(timestampfield, target_ref), 'DATE')) }} +{%- endmacro %} + + +{% macro _max_existing_prior_date(datefield, target_ref=this) -%} + {% call statement('max_existing_prior_date', fetch_result=True)-%} + + SELECT {{ dbt_utils.dateadd('day', -1, "max(" ~ datefield ~ ")" ) }} as max_existing_prior_date + FROM {{ '"' ~ this.schema ~ '"' ~ '.' ~ '"' ~ this.name~ '"' if target_ref in [this] else target_ref }} + + {%- endcall %} + {% set max_existing_prior = load_result('max_existing_prior_date').table.columns['max_existing_prior_date'].values()[0] %} + {{ return(max_existing_prior) }} +{%- endmacro %} + + +{% macro max_existing_prior_date(datefield, target_ref=this) -%} +{# + A day prior to the max of some date field. + Params, Return and Usage as per the max_existing_date() macro +#} + {{ return(_cast_as_type(_max_existing_prior_date(datefield, target_ref), 'DATE')) }} +{%- endmacro %} + + +{% macro max_existing_prior_timestamp(datefield, target_ref=this) -%} +{# + A day prior to the max of some timestamp field + Params, Return and Usage as per the max_existing_date() macro +#} + {{ return(_cast_as_type(_max_existing_prior_date(datefield, target_ref), 'TIMESTAMP')) }} +{%- endmacro %} \ No newline at end of file diff --git a/models/contact_views/chp_with_branches.sql b/models/contact_views/chp_with_branches.sql new file mode 100644 index 00000000..5d850c05 --- /dev/null +++ b/models/contact_views/chp_with_branches.sql @@ -0,0 +1,33 @@ +{{ + config( + materialized = 'incremental', + ) +}} + +SELECT + chp.name as CHP_Name, + chp.uuid as CHP_ID, + branch.name as Branch_Name, + chp.branch_uuid as Branch_ID, + chp.phone as Phone, + chp.phone2 as Phone2, + chp.date_of_birth as DOB, + chp.parent_type as Parent_Type, + chp.area_uuid as Area_ID, + metadata.name as supervisor_name, + metadata.reported as reported +FROM + {{ ref("contactview_chp") }} AS chp, + {{ ref("contactview_branch") }} AS branch, + {{ ref("contact") }} AS metadata, + {{ ref("contact") }} cm +WHERE + chp.branch_uuid = branch.uuid AND + chp.supervisor_uuid = metadata.uuid AND + chp.uuid = cm.contact_uuid AND + branch.name != 'HQ' AND branch.name != 'HQ OVC' + {% if is_incremental() %} + AND metadata.reported >= (SELECT MAX(reported) FROM {{ this }} WHERE reported IS NOT NULL) + {% endif %} +ORDER BY + CHP_Name diff --git a/models/contact_views/contact_views.yml b/models/contact_views/contact_views.yml deleted file mode 100644 index 65546438..00000000 --- a/models/contact_views/contact_views.yml +++ /dev/null @@ -1,26 +0,0 @@ -version: 2 - -models: - - name: raw_contacts - columns: - - name: doc - tests: - - not_null - tests: - - dbt_utils.fewer_rows_than: - compare_model: ref('couchdb') - - name: contactview_metadata - columns: - - name: uuid - tests: - - not_null - - unique - - name: type - tests: - - not_null - - accepted_values: - values: - [contact, clinic, district_hospital, health_center, person] - tests: - - dbt_utils.equal_rowcount: - compare_model: ref('raw_contacts') diff --git a/models/contact_views/contactview_branch.sql b/models/contact_views/contactview_branch.sql new file mode 100644 index 00000000..b15e31d2 --- /dev/null +++ b/models/contact_views/contactview_branch.sql @@ -0,0 +1,15 @@ +{{ + config( + materialized = 'view', + ) +}} + +SELECT + ch.uuid, + ch.name, + cm.area, + cm.region +FROM + {{ ref("contactview_hospital") }} AS ch +INNER JOIN {{ ref("contact") }} AS cm +ON (cm.uuid = ch.uuid AND cm.type = 'district_hospital') diff --git a/models/contact_views/contactview_chp.sql b/models/contact_views/contactview_chp.sql new file mode 100644 index 00000000..f6445c13 --- /dev/null +++ b/models/contact_views/contactview_chp.sql @@ -0,0 +1,62 @@ +{{ + config( + materialized = 'view', + ) +}} + +SELECT + chw.name, + chw.uuid, + user_settings.username, + chw.phone, + chw.phone2, + chw.date_of_birth, + chw.parent_type, + chw.area_uuid, + chw.branch_uuid, + branch.name AS branch_name, + branch.region, + coalesce(nullif(chp.doc ->> 'supervisor'::TEXT, ''::TEXT), '563649afa0e2a13740a1982abc0a2d0d'::TEXT) AS supervisor_uuid, + chp.doc #>> '{chp_profile, g_individual_info,nin}'::TEXT[] AS nin, + chp.doc #>> '{chp_profile, g_individual_info,district_of_residence}'::TEXT[] AS district_of_residence, + chp.doc #>> '{chp_profile, g_individual_info, county}'::TEXT[] AS county, + chp.doc #>> '{chp_profile, g_individual_info, sub_county}'::TEXT[] AS sub_county, + chp.doc #>> '{chp_profile, g_individual_info, parish}'::TEXT[] AS parish, + chp.doc #>> '{chp_profile, g_individual_info, village}'::TEXT[] AS village, + chp.doc #>> '{chp_profile, g_individual_info, dob}'::TEXT[] AS dob, + chp.doc #>> '{chp_profile, g_individual_info, sex}'::TEXT[] AS sex, + chp.doc #>> '{chp_profile, g_individual_info, marital_status}'::TEXT[] AS marital_status, + chp.doc #>> '{chp_profile, g_contact_info, phone_number}'::TEXT[] AS phone_number, + chp.doc #>> '{chp_profile, g_contact_info, alternate_number}'::TEXT[] AS alternate_number, + chp.doc #>> '{chp_profile, g_contact_info, brac_bank}'::TEXT[] AS brac_bank_ac, + chp.doc #>> '{chp_profile, g_position_info, chw_type}'::TEXT[] AS chw_type, + chp.doc #>> '{chp_profile, g_position_info, start_date}'::TEXT[] AS enrolment_date, + lower(chp.doc #>> '{chp_profile, g_position_info, facility_name}'::TEXT[]) AS facility_name, + chp.doc #>> '{chp_profile, g_position_info, facility_level}'::TEXT[] AS facility_level, + chp.doc #>> '{chp_profile, g_position_info, villages_served}'::TEXT[] AS villages_served, + chp.doc #>> '{chp_profile, g_education, education_level}'::TEXT[] AS education_level, + chp.doc #>> '{chp_profile, g_education, institution}'::TEXT[] AS institution, + chp.doc #>> '{chp_profile, g_education, completion_year}'::TEXT[] AS completion_year, + chp.doc #>> '{chp_profile, g_language, speak_english}'::TEXT[] AS speak_english, + chp.doc #>> '{chp_profile, g_language, read_english}'::TEXT[] AS read_english, + chp.doc #>> '{chp_profile, g_language, write_english}'::TEXT[] AS write_english, + chp.doc #>> '{chp_profile, g_language, mother_tongue}'::TEXT[] AS mother_tongue, + chp.doc #>> '{chp_profile, g_language, other_languages}'::TEXT[] AS other_languages, + chp.doc #>> '{chp_profile, g_other_details, incentives}'::TEXT[] AS incentives, + chp.doc #>> '{chp_profile, g_other_details, chp_services}'::TEXT[] AS chp_services + FROM + {{ ref("contactview_chw") }} chw + INNER JOIN {{ ref("contact") }} AS cm ON chw.area_uuid = cm.uuid + INNER JOIN {{ ref("contact") }} AS meta ON meta.uuid = chw.uuid + INNER JOIN {{ ref("contactview_branch") }} AS branch ON chw.branch_uuid = branch.uuid + LEFT JOIN {{ ref("contact") }} AS chp ON chp.uuid = chw.uuid + LEFT JOIN + ( + SELECT + contact_id, + string_agg(doc ->>'name', ', ') AS username + FROM {{ ref("contact") }} AS c + WHERE type = 'user-settings' AND contact_id IS NOT NULL + GROUP BY contact_id + ) AS user_settings ON user_settings.contact_id = chw.uuid + \ No newline at end of file diff --git a/models/contact_views/contactview_chw.sql b/models/contact_views/contactview_chw.sql new file mode 100644 index 00000000..acc68da0 --- /dev/null +++ b/models/contact_views/contactview_chw.sql @@ -0,0 +1,19 @@ +{{ + config( + materialized = 'view', + ) +}} + +SELECT + chw.name, + pplfields.uuid, + pplfields.phone, + pplfields.phone2, + pplfields.date_of_birth, + pplfields.parent_type, + chwarea.uuid AS area_uuid, + chwarea.parent_uuid AS branch_uuid +FROM {{ ref("contactview_person_fields") }} AS pplfields +INNER JOIN {{ ref("contact") }} AS chw ON chw.uuid = pplfields.uuid +INNER JOIN {{ ref("contact") }} AS chwarea ON chw.parent_uuid = chwarea.uuid +WHERE pplfields.parent_type = 'health_center' diff --git a/models/contact_views/contactview_hospital.sql b/models/contact_views/contactview_hospital.sql new file mode 100644 index 00000000..72ed7857 --- /dev/null +++ b/models/contact_views/contactview_hospital.sql @@ -0,0 +1,11 @@ +{{ + config( + materialized = 'view', + ) +}} + +SELECT + cmd.uuid, + cmd.name +FROM {{ ref("contact") }} AS cmd +WHERE cmd.type = 'district_hospital' diff --git a/models/contact_views/contactview_metadata.sql b/models/contact_views/contactview_metadata.sql deleted file mode 100644 index c2a0fe92..00000000 --- a/models/contact_views/contactview_metadata.sql +++ /dev/null @@ -1,28 +0,0 @@ -{{ - config( - materialized = 'view', - indexes=[ - {'columns': ['contact_uuid']}, - {'columns': ['parent_uuid']}, - {'columns': ['type']}, - {'columns': ['uuid']}, - ] - ) -}} - -SELECT - raw_contacts.doc ->> '_id'::text AS uuid, - raw_contacts.doc ->> 'name'::text AS name, - raw_contacts.doc ->> 'type'::text AS type, - raw_contacts.doc ->> 'contact_type'::text AS contact_type, - raw_contacts.doc ->> 'phone'::text AS phone, - raw_contacts.doc ->> 'alternative_phone'::text AS phone2, - raw_contacts.doc ->> 'date_of_birth'::text AS date_of_birth, - raw_contacts.doc #>> '{contact,_id}'::text[] AS contact_uuid, - raw_contacts.doc #>> '{parent,_id}'::text[] AS parent_uuid, - raw_contacts.doc ->> 'is_active'::text AS active, - raw_contacts.doc ->> 'notes'::text AS notes, - '1970-01-01 03:00:00+03'::timestamp with time zone + - (((raw_contacts.doc ->> 'reported_date'::text)::numeric) / 1000::numeric)::double precision * - '00:00:01'::interval AS reported -FROM {{ ref("raw_contacts") }} diff --git a/models/contact_views/contactview_person_fields.sql b/models/contact_views/contactview_person_fields.sql new file mode 100644 index 00000000..7bb99a34 --- /dev/null +++ b/models/contact_views/contactview_person_fields.sql @@ -0,0 +1,26 @@ +{{ + config( + materialized = 'view' + ) +}} + +WITH filtered_person AS ( + SELECT + uuid, + phone, + phone2, + date_of_birth, + parent_uuid + FROM {{ ref("contact") }} + WHERE type = 'person' +) + +SELECT + person.uuid, + person.phone, + person.phone2, + person.date_of_birth, + parent.type AS parent_type +FROM filtered_person AS person +LEFT JOIN {{ ref("contact") }} AS parent +ON person.parent_uuid = parent.uuid diff --git a/models/contact_views/raw_contacts.sql b/models/contact_views/raw_contacts.sql deleted file mode 100644 index 1e8330c8..00000000 --- a/models/contact_views/raw_contacts.sql +++ /dev/null @@ -1,4 +0,0 @@ -SELECT couchdb.doc -FROM {{ ref("couchdb") }} -WHERE (couchdb.doc ->> 'type'::text) = ANY - (ARRAY ['contact'::text, 'clinic'::text, 'district_hospital'::text, 'health_center'::text, 'person'::text]) diff --git a/models/contacts/contact.sql b/models/contacts/contact.sql new file mode 100644 index 00000000..45421992 --- /dev/null +++ b/models/contacts/contact.sql @@ -0,0 +1,39 @@ +{{ + config( + materialized = 'incremental', + indexes=[ + {'columns': ['"@timestamp"'], 'type': 'btree'}, + {'columns': ['reported'], 'type': 'brin'}, + {'columns': ['contact_uuid']}, + {'columns': ['parent_uuid']}, + {'columns': ['type']}, + {'columns': ['uuid']}, + ] + ) +}} + +SELECT + doc ->> '_id'::text AS uuid, + doc ->> 'type'::text AS type, + doc ->> 'name'::text AS name, + doc ->> 'contact_type'::text AS contact_type, + doc ->> 'phone'::text AS phone, + doc ->> 'alternative_phone'::text AS phone2, + doc ->> 'date_of_birth'::text AS date_of_birth, + doc #>> '{contact,_id}'::text[] AS contact_uuid, + doc #>> '{parent,_id}'::text[] AS parent_uuid, + doc ->> 'is_active'::text AS active, + doc ->> 'notes'::text AS notes, + doc ->> 'reported_date'::text AS reported, + doc ->> 'area'::text AS area, + doc ->> 'region'::text AS region, + doc ->> 'contact_id'::text AS contact_id, + doc, + "@timestamp" + +FROM {{ ref('couchdb') }} +WHERE type = ANY + (ARRAY ['contact'::text, 'clinic'::text, 'district_hospital'::text, 'health_center'::text, 'person'::text]) +{% if is_incremental() %} + AND "@timestamp" >= (select coalesce(max("@timestamp"), '1900-01-01') from {{ this }}) +{% endif %} diff --git a/models/contacts/contacts.yml b/models/contacts/contacts.yml new file mode 100644 index 00000000..0c6478cf --- /dev/null +++ b/models/contacts/contacts.yml @@ -0,0 +1,12 @@ +version: 1 + +models: + - name: contact + columns: + - name: uuid + tests: + - not_null + - name: auto_id + constraints: + - type: not_null + - type: primary_key diff --git a/models/households/household_visits.sql b/models/households/household_visits.sql deleted file mode 100644 index 20ba9dcc..00000000 --- a/models/households/household_visits.sql +++ /dev/null @@ -1,8 +0,0 @@ -SELECT couchdb.doc -FROM {{ ref("couchdb") }} -WHERE (couchdb.doc ->> 'type'::text) = ANY - (ARRAY ['task'::text, 'form'::text]) - -{% if is_incremental() %} - AND COALESCE("@timestamp" > (SELECT MAX("@timestamp") FROM {{ this }}), True) -{% endif %} diff --git a/models/households/households.yml b/models/households/households.yml deleted file mode 100644 index 5753a8dc..00000000 --- a/models/households/households.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: 2 - -models: - - name: household_visits - columns: - - name: doc - tests: - - not_null - tests: - - dbt_utils.fewer_rows_than: - compare_model: ref('couchdb') diff --git a/models/reports_tables/reports.sql b/models/reports_tables/reports.sql deleted file mode 100644 index 9f53f1c7..00000000 --- a/models/reports_tables/reports.sql +++ /dev/null @@ -1,14 +0,0 @@ -{{ - config( - materialized = 'table', - ) -}} - -SELECT - couchdb.doc ->> '_id'::text AS uuid, - couchdb.doc ->> 'form'::text AS form, - couchdb.doc -> 'geolocation' AS location, - couchdb.doc ->> 'reported_date'::text AS reported_date, - couchdb.doc #>> '{contact,_id}'::text[] AS contact_uuid -FROM {{ ref("couchdb") }} -WHERE (couchdb.doc ->> 'type'::text) = 'data_record' diff --git a/models/reports_tables/reports_by_location.sql b/models/reports_tables/reports_by_location.sql deleted file mode 100644 index 85bc5449..00000000 --- a/models/reports_tables/reports_by_location.sql +++ /dev/null @@ -1,15 +0,0 @@ -{{ - config( - materialized = 'table', - ) -}} - -SELECT - couchdb.doc ->> '_id'::text AS uuid, - couchdb.doc ->> 'form'::text AS form, - CAST (coalesce(couchdb.doc #>> '{geolocation,longitude}', '36.826275') AS NUMERIC) AS longitude, - CAST (coalesce(couchdb.doc #>> '{geolocation,latitude}', '-1.1792353') AS NUMERIC) AS latitude, - couchdb.doc ->> 'reported_date'::text AS reported_date, - couchdb.doc #>> '{contact,_id}'::text[] AS contact_uuid -FROM {{ ref("couchdb") }} -WHERE (couchdb.doc ->> 'type'::text) = 'data_record' diff --git a/models/reports_tables/reports_tables.yml b/models/reports_tables/reports_tables.yml deleted file mode 100644 index cef65011..00000000 --- a/models/reports_tables/reports_tables.yml +++ /dev/null @@ -1,25 +0,0 @@ -version: 2 - -models: - - name: reports_by_location - columns: - - name: uuid - tests: - - not_null - - name: longitude - tests: - - not_null - - name: latitude - tests: - - not_null - tests: - - dbt_utils.equal_rowcount: - compare_model: ref('reports') - - name: reports - columns: - - name: uuid - tests: - - not_null - tests: - - dbt_utils.fewer_rows_than: - compare_model: ref('couchdb') diff --git a/models/root/couchdb.sql b/models/root/couchdb.sql index 0be64057..b163076b 100644 --- a/models/root/couchdb.sql +++ b/models/root/couchdb.sql @@ -1,13 +1,13 @@ {{ - config( - materialized = 'view', - indexes=[ - {'columns': ['"@timestamp"'], 'type': 'brin'}, - ] - ) + config( + materialized = 'view' + ) }} -SELECT - doc->>'type' AS type, - * -FROM v1.{{ env_var('POSTGRES_TABLE') }} +WITH combined_tables AS ( + SELECT * FROM {{ ref('new_couchdb') }} + UNION ALL + SELECT * FROM {{ ref('stable_couchdb') }} +) + +SELECT * FROM combined_tables diff --git a/models/root/new_couchdb.sql b/models/root/new_couchdb.sql new file mode 100644 index 00000000..70b5fb40 --- /dev/null +++ b/models/root/new_couchdb.sql @@ -0,0 +1,29 @@ +{{ + config( + materialized = 'view' + ) +}} + +with latest_timestamp as ( + SELECT coalesce(max("@timestamp"), '1900-01-01') AS max_timestamp FROM {{ ref('stable_couchdb') }} +) + +SELECT + doc ->> '_id'::text AS uuid, + doc ->> 'type'::text AS type, + doc ->> 'name'::text AS name, + doc ->> 'contact_type'::text AS contact_type, + doc ->> 'phone'::text AS phone, + doc ->> 'alternative_phone'::text AS phone2, + doc ->> 'date_of_birth'::text AS date_of_birth, + doc #>> '{contact,_id}'::text[] AS contact_uuid, + doc #>> '{parent,_id}'::text[] AS parent_uuid, + doc ->> 'is_active'::text AS active, + doc ->> 'notes'::text AS notes, + doc ->> 'reported_date'::text AS reported_date, + doc ->> 'area'::text AS area, + doc ->> 'region'::text AS region, + doc ->> 'contact_id'::text AS contact_id, + * +FROM v1.{{ env_var('POSTGRES_TABLE') }} +WHERE "@timestamp" >= (SELECT max_timestamp FROM latest_timestamp) diff --git a/models/root/root.yml b/models/root/root.yml index 9c85149c..628d07fe 100644 --- a/models/root/root.yml +++ b/models/root/root.yml @@ -15,8 +15,7 @@ models: - name: _rev tests: - not_null - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - _id - - _rev + - name: auto_id + constraints: + - type: not_null + - type: primary_key diff --git a/models/root/stable_couchdb.sql b/models/root/stable_couchdb.sql new file mode 100644 index 00000000..c0541b3c --- /dev/null +++ b/models/root/stable_couchdb.sql @@ -0,0 +1,36 @@ +{{ + config( + materialized = 'incremental', + unique_key='uuid', + indexes=[ + {'columns': ['"@timestamp"'], 'type': 'btree'}, + {'columns': ['reported_date'], 'type': 'brin'}, + {'columns': ['type']}, + {'columns': ['contact_uuid']}, + {'columns': ['parent_uuid']}, + {'columns': ['uuid']}, + ] + ) +}} + +SELECT + doc ->> '_id'::text AS uuid, + doc ->> 'type'::text AS type, + doc ->> 'name'::text AS name, + doc ->> 'contact_type'::text AS contact_type, + doc ->> 'phone'::text AS phone, + doc ->> 'alternative_phone'::text AS phone2, + doc ->> 'date_of_birth'::text AS date_of_birth, + doc #>> '{contact,_id}'::text[] AS contact_uuid, + doc #>> '{parent,_id}'::text[] AS parent_uuid, + doc ->> 'is_active'::text AS active, + doc ->> 'notes'::text AS notes, + doc ->> 'reported_date'::text AS reported_date, + doc ->> 'area'::text AS area, + doc ->> 'region'::text AS region, + doc ->> 'contact_id'::text AS contact_id, + * +FROM v1.{{ env_var('POSTGRES_TABLE') }} +{% if is_incremental() %} + WHERE "@timestamp" >= {{ max_existing_timestamp('"@timestamp"') }} +{% endif %} diff --git a/models/types/data_record.sql b/models/types/data_record.sql deleted file mode 100644 index bc937ff2..00000000 --- a/models/types/data_record.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ - config( - materialized = 'incremental', - indexes=[ - {'columns': ['"@timestamp"'], 'type': 'brin'}, - {'columns': ['"form"'], 'type': 'hash'}, - ] - ) -}} - -SELECT - doc->>'form' AS form, - doc->>'reported_date' AS reported_date, - doc->>'patient_id' AS patient_id, - * -FROM {{ ref('couchdb') }} -WHERE - doc->>'type' = 'data_record' - -{% if is_incremental() %} - AND COALESCE("@timestamp" > (SELECT MAX("@timestamp") FROM {{ this }}), True) -{% endif %} diff --git a/models/types/person.sql b/models/types/person.sql deleted file mode 100644 index 7c39dfd6..00000000 --- a/models/types/person.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ - config( - materialized = 'incremental', - indexes=[ - {'columns': ['"@timestamp"'], 'type': 'brin'}, - {'columns': ['"patient_id"'], 'type': 'hash'}, - ] - ) -}} - -SELECT - doc->>'name' AS name, - doc->>'date_of_birth' AS date_of_birth, - doc->>'phone' AS phone, - doc->>'sex' AS sex, - doc->>'reported_date' AS reported_date, - doc->>'patient_id' AS patient_id, - * -FROM {{ ref('couchdb') }} -WHERE - doc->>'type' = 'person' - -{% if is_incremental() %} - AND COALESCE("@timestamp" > (SELECT MAX("@timestamp") FROM {{ this }}), True) -{% endif %} diff --git a/models/types/types.yml b/models/types/types.yml deleted file mode 100644 index e646a59d..00000000 --- a/models/types/types.yml +++ /dev/null @@ -1,25 +0,0 @@ -version: 2 - -models: - - name: data_record - columns: - - name: _id - tests: - - not_null - - name: _rev - tests: - - not_null - tests: - - dbt_utils.fewer_rows_than: - compare_model: ref('couchdb') - - name: person - columns: - - name: _id - tests: - - not_null - - name: _rev - tests: - - not_null - tests: - - dbt_utils.fewer_rows_than: - compare_model: ref('couchdb') diff --git a/models/user_tables/chws.sql b/models/user_tables/chws.sql deleted file mode 100644 index 3f8658e7..00000000 --- a/models/user_tables/chws.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ - config( - materialized = 'table', - indexes=[ - {'columns': ['"@timestamp"'], 'type': 'brin'}, - {'columns': ['"patient_id"'], 'type': 'hash'}, - ] - ) -}} - -SELECT - doc->>'name' AS name, - doc->>'date_of_birth' AS date_of_birth, - doc->>'phone' AS phone, - doc->>'sex' AS sex, - doc->>'reported_date' AS reported_date, - doc->>'patient_id' AS patient_id, - * -FROM {{ ref('couchdb') }} -WHERE - doc->>'type' = 'person' diff --git a/models/user_tables/user_tables.yml b/models/user_tables/user_tables.yml deleted file mode 100644 index 9c5de1fe..00000000 --- a/models/user_tables/user_tables.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 - -models: - - name: chws - columns: - - name: _id - tests: - - not_null - - name: _rev - tests: - - not_null - tests: - - dbt_utils.fewer_rows_than: - compare_model: ref('couchdb') - - dbt_utils.equal_rowcount: - compare_model: ref('person') diff --git a/package-lock.yml b/package-lock.yml new file mode 100644 index 00000000..eddc3bea --- /dev/null +++ b/package-lock.yml @@ -0,0 +1,4 @@ +packages: +- package: dbt-labs/dbt_utils + version: 1.1.1 +sha1_hash: a158c48c59c2bb7d729d2a4e215aabe5bb4f3353