diff --git a/models/fct_dbt__critical_path.sql b/models/fct_dbt__critical_path.sql index 800e2239..edf7fa21 100644 --- a/models/fct_dbt__critical_path.sql +++ b/models/fct_dbt__critical_path.sql @@ -57,12 +57,12 @@ node_dependencies_deduped as ( ), -model_dependencies_with_execution_time as ( +model_dependencies_with_total_node_runtime as ( -- Model dependencies enriched with execution time select distinct node_dependencies_deduped.node_id, - latest_executions.execution_time, + latest_executions.total_node_runtime, depends_on_node_id from node_dependencies_deduped left join latest_executions on node_dependencies_deduped.node_id = latest_executions.node_id @@ -80,13 +80,13 @@ models_with_at_least_one_model_dependency as ( ), -models_with_no_model_dependencies_with_execution_time as ( +models_with_no_model_dependencies_with_total_node_runtime as ( -- Models which have no dependencies enriched with execution time -- These are models at the base of the tree select latest_models.node_id, - latest_executions.execution_time + latest_executions.total_node_runtime from latest_models left join models_with_at_least_one_model_dependency on latest_models.node_id = models_with_at_least_one_model_dependency.node_id @@ -124,7 +124,7 @@ anchor as ( select models_with_no_dependent_models.node_id, coalesce(node_dependencies_deduped.depends_on_node_id, '') as depends_on_node_id, - coalesce(latest_executions.execution_time, 0) as execution_time + coalesce(latest_executions.total_node_runtime, 0) as total_node_runtime from models_with_no_dependent_models left join node_dependencies_deduped on models_with_no_dependent_models.node_id = node_dependencies_deduped.node_id left join latest_executions on models_with_no_dependent_models.node_id = latest_executions.node_id @@ -138,15 +138,15 @@ all_needed_dependencies as ( select node_id, - execution_time, + total_node_runtime, '' as depends_on_node_id - from models_with_no_model_dependencies_with_execution_time + from models_with_no_model_dependencies_with_total_node_runtime union select node_id, - execution_time, + total_node_runtime, depends_on_node_id as depends_on_node_id - from model_dependencies_with_execution_time + from model_dependencies_with_total_node_runtime ), @@ -157,12 +157,12 @@ search_path (node_ids, total_time) as ( select array_construct(depends_on_node_id, node_id), - execution_time + total_node_runtime from anchor union all select array_cat(to_array(all_needed_dependencies.depends_on_node_id), search_path.node_ids) as node_ids, - all_needed_dependencies.execution_time + search_path.total_time + all_needed_dependencies.total_node_runtime + search_path.total_time from search_path left join all_needed_dependencies where get(search_path.node_ids, 0) = all_needed_dependencies.node_id @@ -204,7 +204,7 @@ longest_path_with_times as ( select flattened.node_id::string as node_id, flattened.index, - latest_executions.execution_time/60 as execution_minutes, + latest_executions.total_node_runtime/60 as execution_minutes, latest_models.model_materialization from flattened left join latest_executions on flattened.node_id = latest_executions.node_id @@ -212,4 +212,4 @@ longest_path_with_times as ( ) -select * from longest_path_with_times \ No newline at end of file +select * from longest_path_with_times diff --git a/models/fct_dbt__latest_full_model_executions.sql b/models/fct_dbt__latest_full_model_executions.sql index e9296816..178aab5c 100644 --- a/models/fct_dbt__latest_full_model_executions.sql +++ b/models/fct_dbt__latest_full_model_executions.sql @@ -36,12 +36,15 @@ fields as ( select artifact_generated_at, command_invocation_id, - execution_time, + compile_started_at, + query_completed_at, + total_node_runtime, model_execution_id, model_materialization, model_schema, name, node_id, + thread_id, rows_affected, status, was_full_refresh @@ -49,4 +52,4 @@ fields as ( ) -select * from fields \ No newline at end of file +select * from fields diff --git a/models/incremental/fct_dbt__model_executions.sql b/models/incremental/fct_dbt__model_executions.sql index dafbece1..1efaf0f8 100644 --- a/models/incremental/fct_dbt__model_executions.sql +++ b/models/incremental/fct_dbt__model_executions.sql @@ -49,8 +49,11 @@ fields as ( artifact_generated_at, was_full_refresh, node_id, + thread_id, status, - execution_time, + compile_started_at, + query_completed_at, + total_node_runtime, rows_affected, model_materialization, model_schema, diff --git a/models/schemas.yml b/models/schemas.yml index c6a89fcc..50e3dfab 100644 --- a/models/schemas.yml +++ b/models/schemas.yml @@ -16,7 +16,7 @@ models: - name: model_materialization - name: fct_dbt__latest_full_model_executions - description: A list of all models and executions times that make up the critical path of a full, incremental run. + description: A list of all models and executions times from the most recent, incremental run. columns: - name: node_id description: Primary key. @@ -27,8 +27,12 @@ models: description: Timestamp of when the source artifact was generated. - name: command_invocation_id description: Foreign key to fct_dbt_run_results. The id of the command which resulted in the source artifact's generation. - - name: execution_time - description: The duration of time in seconds for the model to run. + - name: compile_started_at + description: Timestamp of when a model starts to be compiled. + - name: query_completed_ats + description: Timestamp of when a model's SQL is completed + - name: total_node_runtime + description: The duration of time in seconds for the model to run. Note that this is _not_ equal to the delta between `compile_started_at` and `query_completed_at` since it includes extra tasks performed by dbt. - name: model_execution_id description: A surrogate key of command_invocation_id and node_id. - name: model_materialization @@ -58,8 +62,12 @@ models: description: Boolean value for whether this model was executed with a --full-refresh flag. - name: model_materialization description: The configured materialization of the model. - - name: execution_time - description: The duration of time in seconds for the model to run. + - name: compile_started_at + description: Timestamp of when a model starts to be compiled. + - name: query_completed_at + description: Timestamp of when a model's SQL is completed + - name: total_node_runtime + description: The duration of time in seconds for the model to run. Note that this is _not_ equal to the delta between `compile_started_at` and `query_completed_at` since it includes extra tasks performed by dbt. - name: status description: Success/fail status of the model's execution. - name: command_invocation_id diff --git a/models/staging/stg_dbt__model_executions.sql b/models/staging/stg_dbt__model_executions.sql index 00c5cb70..a1fcd796 100644 --- a/models/staging/stg_dbt__model_executions.sql +++ b/models/staging/stg_dbt__model_executions.sql @@ -28,9 +28,19 @@ fields as ( generated_at as artifact_generated_at, coalesce(data:args:full_refresh, 'false')::boolean as was_full_refresh, result.value:unique_id::string as node_id, + split(result.value:thread_id::string, '-')[1]::integer as thread_id, result.value:status::string as status, - -- Incremental models have a null execution_time, so coalesce to 0 for calculations - coalesce(result.value:execution_time::float, 0) as execution_time, + + -- The first item in the timing array is the model-level `compile` + result.value:timing[0]:started_at::timestamp_ntz as compile_started_at, + + -- The second item in the timing array is `execute`. + result.value:timing[1]:completed_at::timestamp_ntz as query_completed_at, + + -- Confusingly, this does not match the delta of the above two timestamps. + -- should we calculate it instead? + coalesce(result.value:execution_time::float, 0) as total_node_runtime, + result.value:adapter_response:rows_affected::int as rows_affected from dbt_run, lateral flatten(input => data:results) as result @@ -45,8 +55,11 @@ surrogate_key as ( artifact_generated_at, was_full_refresh, node_id, + thread_id, status, - execution_time, + compile_started_at, + query_completed_at, + total_node_runtime, rows_affected from fields