Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overwriting rendered column descriptions with the unrendered yaml #129

Merged
merged 18 commits into from
Mar 28, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 38 additions & 2 deletions src/dbt_osmosis/core/column_level_knowledge_propagator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, Dict, Iterable, List, Optional

from pathlib import Path
import yaml
from dbt_osmosis.core.column_level_knowledge import (
ColumnLevelKnowledge,
Knowledge,
Expand All @@ -8,7 +9,7 @@
)
from dbt_osmosis.core.log_controller import logger
from dbt_osmosis.vendored.dbt_core_interface.project import ColumnInfo, ManifestNode

from dbt.contracts.graph.nodes import SourceDefinition, SeedNode, ModelNode

def _build_node_ancestor_tree(
manifest: ManifestNode,
Expand All @@ -35,6 +36,32 @@ def _build_node_ancestor_tree(
)
return family_tree

def _get_member_yaml(member):
z3z1ma marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(member, SourceDefinition):
key = "tables"
elif isinstance(member, ModelNode):
key = "models"
elif isinstance(member, SeedNode):
key = "seeds"
else:
print(f"Unrecognized member type: {type(member)}")
z3z1ma marked this conversation as resolved.
Show resolved Hide resolved
data = None
if key == "tables" and \
hasattr(member, "original_file_path") and \
member.original_file_path:
with Path(member.original_file_path).open('r') as f:
data = yaml.safe_load(f)
data = next((item for item in data['sources'] if item['name'] == member.source_name), None)
elif key in ["seeds", "models"] and \
hasattr(member, "patch_path") and \
member.patch_path:
# patch_path is coming is as 'jaffle_shop_sqlite:/models/staging/stg_orders.yml'
real_path = member.patch_path.split(':')[-1][2:] # this obviously isn't production-worthy
VDFaller marked this conversation as resolved.
Show resolved Hide resolved
with Path(real_path).open() as f:
data = yaml.safe_load(f)
if data:
model_yaml = next((item for item in data[key] if item['name'] == member.name), None)
return model_yaml

def _inherit_column_level_knowledge(
manifest: ManifestNode,
Expand All @@ -44,16 +71,25 @@ def _inherit_column_level_knowledge(
"""Inherit knowledge from ancestors in reverse insertion order to ensure that the most
recent ancestor is always the one to inherit from
"""
use_direct_yaml_descriptions = True
knowledge: Knowledge = {}
for generation in reversed(family_tree):
for ancestor in family_tree[generation]:
member: ManifestNode = manifest.nodes.get(ancestor, manifest.sources.get(ancestor))
if not member:
continue
if use_direct_yaml_descriptions:
# overwrite member as the yaml
model_yaml = _get_member_yaml(member)
for name, info in member.columns.items():
knowledge_default = {"progenitor": ancestor, "generation": generation}
knowledge.setdefault(name, knowledge_default)
deserialized_info = info.to_dict()
if use_direct_yaml_descriptions and model_yaml: # overwrite the deserialized info with unrendered column info
col_yaml = next((col for col in model_yaml['columns'] if col['name'] == deserialized_info['name']), None)
if col_yaml is not None and "description" in col_yaml:
deserialized_info["description"] = col_yaml["description"]

# Handle Info:
# 1. tags are additive
# 2. descriptions are overriden
Expand Down
Loading