Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overwriting rendered column descriptions with the unrendered yaml #129

Merged
merged 18 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@ jobs:
poetry --version

- name: Install required packages
run: | # install duckdb extras to be able to parse manifest
poetry install -E duckdb

- name: Parse manifest
run: |
poetry install
poetry run dbt parse --project-dir demo_duckdb --profiles-dir demo_duckdb -t test

- name: Run pytest
run: |
Expand Down
1 change: 0 additions & 1 deletion demo_duckdb/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ models:
description: Date (UTC) that the order was placed

- name: status
description: '{{ doc("orders_status") }}'
tests:
- accepted_values:
values: ['placed', 'shipped', 'completed', 'return_pending', 'returned']
Expand Down
6 changes: 6 additions & 0 deletions demo_duckdb/seeds/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2
seeds:
- name: raw_orders
columns:
- name: status
description: '{{ doc("orders_status") }}' # putting this in to test if unrendered propogation works
6 changes: 5 additions & 1 deletion src/dbt_osmosis/core/column_level_knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ def get_prior_knowledge(
)
)
sorted_prior_knowledge_candidates_sources = sorted(
[k for k in prior_knowledge_candidates if k["progenitor"].startswith("source")],
[
k
for k in prior_knowledge_candidates
if (k["progenitor"].startswith("source") or k["progenitor"].startswith("seed"))
],
key=lambda k: k["generation"],
reverse=True,
)
Expand Down
54 changes: 53 additions & 1 deletion src/dbt_osmosis/core/column_level_knowledge_propagator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional

import yaml
from dbt.contracts.graph.nodes import ModelNode, SeedNode, SourceDefinition

from dbt_osmosis.core.column_level_knowledge import (
ColumnLevelKnowledge,
Knowledge,
Expand Down Expand Up @@ -36,10 +40,37 @@ def _build_node_ancestor_tree(
return family_tree


def _get_member_yaml(member: ManifestNode, project_dir: Path) -> Optional[dict]:
"""Get the yaml for a member from the file in the manifest, only returns relevant section"""
if isinstance(member, SourceDefinition):
key = "tables"
elif isinstance(member, ModelNode):
key = "models"
elif isinstance(member, SeedNode):
key = "seeds"
else:
return None

data = None
if key == "tables" and hasattr(member, "original_file_path") and member.original_file_path:
with (project_dir / Path(member.original_file_path)).open("r") as f:
data = yaml.safe_load(f)
data = next((item for item in data["sources"] if item["name"] == member.source_name), None)
elif key in ["seeds", "models"] and hasattr(member, "patch_path") and member.patch_path:
pfp: str = member.patch_path.split("://")[-1]
with (project_dir / Path(pfp)).open() as f:
data = yaml.safe_load(f)
if data:
model_yaml = next((item for item in data[key] if item["name"] == member.name), None)
return model_yaml


def _inherit_column_level_knowledge(
manifest: ManifestNode,
family_tree: Dict[str, Any],
placeholders: List[str],
project_dir: Path = Path.cwd(),
use_unrendered_descriptions: bool = False,
) -> Knowledge:
"""Inherit knowledge from ancestors in reverse insertion order to ensure that the most
recent ancestor is always the one to inherit from
Expand All @@ -50,10 +81,27 @@ def _inherit_column_level_knowledge(
member: ManifestNode = manifest.nodes.get(ancestor, manifest.sources.get(ancestor))
if not member:
continue
if use_unrendered_descriptions:
# overwrite member as the yaml
model_yaml = _get_member_yaml(member, project_dir)
for name, info in member.columns.items():
knowledge_default = {"progenitor": ancestor, "generation": generation}
knowledge.setdefault(name, knowledge_default)
deserialized_info = info.to_dict()
if (
use_unrendered_descriptions and model_yaml
): # overwrite the deserialized info with unrendered column info
col_yaml = next(
(
col
for col in model_yaml["columns"]
if col["name"] == deserialized_info["name"]
),
None,
)
if col_yaml is not None and "description" in col_yaml:
deserialized_info["description"] = col_yaml["description"]

# Handle Info:
# 1. tags are additive
# 2. descriptions are overriden
Expand Down Expand Up @@ -83,10 +131,14 @@ def get_node_columns_with_inherited_knowledge(
manifest: ManifestNode,
node: ManifestNode,
placeholders: List[str],
project_dir: Path = Path.cwd(),
use_unrendered_descriptions: bool = False,
) -> Knowledge:
"""Build a knowledgebase for the model based on iterating through ancestors"""
family_tree = _build_node_ancestor_tree(manifest, node)
knowledge = _inherit_column_level_knowledge(manifest, family_tree, placeholders)
knowledge = _inherit_column_level_knowledge(
manifest, family_tree, placeholders, project_dir, use_unrendered_descriptions
)
return knowledge

@staticmethod
Expand Down
8 changes: 7 additions & 1 deletion src/dbt_osmosis/core/osmosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def __init__(
skip_merge_meta: bool = False,
add_progenitor_to_meta: bool = False,
vars: Optional[str] = None,
use_unrendered_descriptions: bool = False,
profile: Optional[str] = None,
):
"""Initializes the DbtYamlManager class."""
Expand All @@ -117,6 +118,7 @@ def __init__(
self.skip_add_tags = skip_add_tags
self.skip_merge_meta = skip_merge_meta
self.add_progenitor_to_meta = add_progenitor_to_meta
self.use_unrendered_descriptions = use_unrendered_descriptions

if len(list(self.filtered_models())) == 0:
logger().warning(
Expand Down Expand Up @@ -1051,7 +1053,11 @@ def update_schema_file_and_node(
)

knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge(
self.manifest, node, self.placeholders
self.manifest,
node,
self.placeholders,
self.base_config.project_dir,
self.use_unrendered_descriptions,
)
n_cols_doc_inherited = (
ColumnLevelKnowledgePropagator.update_undocumented_columns_with_prior_knowledge(
Expand Down
20 changes: 20 additions & 0 deletions src/dbt_osmosis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ def wrapper(*args, **kwargs):
" my_value}'"
),
)
@click.option(
"--use-unrendered-descriptions",
is_flag=True,
help=(
"If specified, will use unrendered column descriptions in the documentation."
"This is useful for propogating docs blocks"
),
)
@click.argument("models", nargs=-1)
def refactor(
target: Optional[str] = None,
Expand All @@ -181,6 +189,7 @@ def refactor(
models: Optional[List[str]] = None,
profile: Optional[str] = None,
vars: Optional[str] = None,
use_unrendered_descriptions: bool = False,
):
"""Executes organize which syncs yaml files with database schema and organizes the dbt models
directory, reparses the project, then executes document passing down inheritable documentation
Expand Down Expand Up @@ -210,6 +219,7 @@ def refactor(
add_progenitor_to_meta=add_progenitor_to_meta,
profile=profile,
vars=vars,
use_unrendered_descriptions=use_unrendered_descriptions,
)

# Conform project structure & bootstrap undocumented models injecting columns
Expand Down Expand Up @@ -417,6 +427,14 @@ def organize(
" my_value}'"
),
)
@click.option(
"--use-unrendered-descriptions",
is_flag=True,
help=(
"If specified, will use unrendered column descriptions in the documentation."
"This is useful for propogating docs blocks"
),
)
@click.argument("models", nargs=-1)
def document(
target: Optional[str] = None,
Expand All @@ -434,6 +452,7 @@ def document(
add_progenitor_to_meta: bool = False,
profile: Optional[str] = None,
vars: Optional[str] = None,
use_unrendered_descriptions: bool = False,
):
"""Column level documentation inheritance for existing models

Expand Down Expand Up @@ -462,6 +481,7 @@ def document(
add_progenitor_to_meta=add_progenitor_to_meta,
profile=profile,
vars=vars,
use_unrendered_descriptions=use_unrendered_descriptions,
)

# Propagate documentation & inject/remove schema file columns to align with model in database
Expand Down
Loading
Loading