diff --git a/README.md b/README.md index 6ed344e6..67412db4 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,8 @@ dbt-metabase models \ --metabase-url https://metabase.example.com \ --metabase-username user@example.com \ --metabase-password Password123 \ - --metabase-database business + --metabase-database business \ + --include-schemas public ``` Open Metabase and go to Settings > Admin Settings > Table Metadata, you will notice that `id` column in `stg_users` is now marked as "Entity Key" and `group_id` is a "Foreign Key" pointing to `id` in `stg_groups`. @@ -209,7 +210,8 @@ dbt-metabase exposures \ --metabase-url https://metabase.example.com \ --metabase-username user@example.com \ --metabase-password Password123 \ - --output-path models/ + --output-path models/ \ + --exclude-collections temporary ``` Once the execution completes, check your output path for exposures files containing descriptions, creator details and links for Metabase questions and dashboards: @@ -274,7 +276,7 @@ Note that common configurations are in the outer block and command-specific ones Alternatively, you can invoke dbt-metabase programmatically. Below is the equivalent of CLI examples: ```python -from dbtmetabase import DbtMetabase +from dbtmetabase import DbtMetabase, Filter # Initializing instance c = DbtMetabase( @@ -285,10 +287,16 @@ c = DbtMetabase( ) # Exporting models -c.export_models(metabase_database="business") +c.export_models( + metabase_database="business", + schema_filter=Filter(include=["public"]), +) # Extracting exposures -c.extract_exposures(output_path=".") +c.extract_exposures( + output_path=".", + collection_filter=Filter(exclude=["temporary"]), +) ``` See function header comments for information about other parameters. diff --git a/dbtmetabase/_exposures.py b/dbtmetabase/_exposures.py index e9fed5e6..c42c05ae 100644 --- a/dbtmetabase/_exposures.py +++ b/dbtmetabase/_exposures.py @@ -81,10 +81,10 @@ def extract_exposures( collection_slug = collection.get("slug", safe_name(collection["name"])) if not collection_filter.match(collection_name): - _logger.debug("Skipping collection %s", collection["name"]) + _logger.debug("Skipping collection '%s'", collection["name"]) continue - _logger.info("Exploring collection: %s", collection["name"]) + _logger.info("Exploring collection '%s'", collection["name"]) for item in self.metabase.get_collection_items( uid=collection["id"], models=("card", "dashboard"), @@ -123,11 +123,11 @@ def extract_exposures( card=self.metabase.get_card(uid=card["id"]), )["depends"] else: - _logger.warning("Unexpected exposure type: %s", item["model"]) + _logger.warning("Unexpected collection item '%s'", item["model"]) continue name = entity.get("name", "Exposure [Unresolved Name]") - _logger.info("Inspecting exposure: %s", name) + _logger.info("Processing %s '%s'", item["model"], name) creator_name = None creator_email = None @@ -179,14 +179,7 @@ def __extract_card_exposures( ctx: __Context, card: Mapping, ) -> Mapping: - """Extracts exposures from Metabase questions. - - Args: - card (Mapping): Metabase card payload. - - Returns: - Mapping: Map of depends and native_query. - """ + """Extracts exposures from Metabase questions.""" depends = [] native_query = "" @@ -207,10 +200,7 @@ def __extract_card_exposures( elif query_source in ctx.table_names: # Normal question source_table = ctx.table_names.get(query_source) - _logger.info( - "Model extracted from Metabase question: %s", - source_table, - ) + _logger.info("Extracted model '%s' from card", source_table) depends.append(source_table) # Find models exposed through joins @@ -228,10 +218,7 @@ def __extract_card_exposures( # Joined model parsed joined_table = ctx.table_names.get(join_source) if joined_table: - _logger.info( - "Model extracted from Metabase question join: %s", - joined_table, - ) + _logger.info("Extracted model '%s' from join", joined_table) depends.append(joined_table) elif query.get("type") == "native": @@ -257,10 +244,7 @@ def __extract_card_exposures( continue if parsed_model: - _logger.info( - "Model extracted from native query: %s", - parsed_model, - ) + _logger.info("Extracted model '%s' from native query", parsed_model) depends.append(parsed_model) return { @@ -282,24 +266,7 @@ def __format_exposure( native_query: Optional[str], depends_on: Iterable[str], ) -> Mapping: - """Builds an exposure representation (see https://docs.getdbt.com/reference/exposure-properties). - - Args: - model (str): Metabase item model (card or dashboard). - uid (str): Metabase item unique ID. - name (str): Exposure name. - label (str): Exposure label. - header (str): Exposure header. - depends_on (Iterable[str]): List of model dependencies. - created_at (str): Timestamp of exposure creation derived from Metabase. - creator_name (str): Creator name derived from Metabase. - creator_email (str): Creator email derived from Metabase. - description (str): documented in Metabase. - native_query (Optional[str]): SQL query to be included in the exposure documentation (only for native questions). - - Returns: - Mapping: Compiled exposure in dbt format. - """ + """Builds dbt exposure representation (see https://docs.getdbt.com/reference/exposure-properties).""" dbt_type: str url: str diff --git a/dbtmetabase/_models.py b/dbtmetabase/_models.py index 8a13e12c..113eebad 100644 --- a/dbtmetabase/_models.py +++ b/dbtmetabase/_models.py @@ -11,7 +11,7 @@ from .manifest import DEFAULT_SCHEMA, Column, Group, Manifest, Model from .metabase import Metabase -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) class ModelsMixin(metaclass=ABCMeta): @@ -87,8 +87,8 @@ def export_models( table = tables.get(table_key) if not table: - logger.warning( - "Model %s not found in %s schema", table_key, schema_name + _logger.warning( + "Table '%s' not in schema '%s'", table_key, schema_name ) synced = False continue @@ -98,8 +98,8 @@ def export_models( field = table.get("fields", {}).get(column_name) if not field: - logger.warning( - "Column %s not found in %s model", column_name, table_key + _logger.warning( + "Field '%s' not in table '%s'", column_name, table_key ) synced = False continue @@ -127,11 +127,11 @@ def export_models( body=update["body"], ) - logger.info( - "Updated %s/%s successfully: %s", - update["kind"], - update["id"], - ", ".join(update.get("body", {}).keys()), + _logger.info( + "%s '%s' updated successfully: %s", + update["kind"].capitalize(), + update["name"], + ", ".join(update.get("body", {})), ) if not success: @@ -154,7 +154,7 @@ def __export_model( api_table = ctx.tables.get(table_key) if not api_table: - logger.error("Table %s does not exist in Metabase", table_key) + _logger.error("Table '%s' does not exist", table_key) return False # Empty strings not accepted by Metabase @@ -184,10 +184,12 @@ def __export_model( body_table["visibility_type"] = model_visibility if body_table: - ctx.update(entity=api_table, change=body_table) - logger.info("Table %s will be updated", table_key) + ctx.update(entity=api_table, change=body_table, name=table_key) + _logger.info( + "Table '%s' will be updated: %s", table_key, ", ".join(body_table) + ) else: - logger.info("Table %s is up-to-date", table_key) + _logger.info("Table '%s' is up to date", table_key) for column in model.columns: success &= self.__export_column(ctx, schema_name, model_name, column) @@ -201,16 +203,7 @@ def __export_column( model_name: str, column: Column, ) -> bool: - """Exports one dbt column to Metabase database schema. - - Arguments: - schema_name {str} -- Target schema name.s - model_name {str} -- One dbt model name read from project. - column {dict} -- One dbt column read from project. - - Returns: - bool -- True if exported successfully, false if there were errors. - """ + """Exports one dbt column to Metabase database schema.""" success = True @@ -219,11 +212,7 @@ def __export_column( api_field = ctx.tables.get(table_key, {}).get("fields", {}).get(column_name) if not api_field: - logger.error( - "Field %s.%s does not exist in Metabase", - table_key, - column_name, - ) + _logger.error("Field '%s.%s' does not exist", table_key, column_name) return False if "special_type" in api_field: @@ -247,27 +236,24 @@ def __export_column( ) if not target_table or not target_field: - logger.info( - "Skipping FK resolution for %s table, %s field not resolved during dbt parsing", - table_key, + _logger.info( + "Field '%s' not resolved in manifest, skipping foreign key for table '%s'", target_field, + table_key, ) else: - logger.debug( - "Looking for field %s in table %s", - target_field, - target_table, + _logger.debug( + "Looking for field '%s' in table '%s'", target_field, target_table ) - fk_target_field = ( ctx.tables.get(target_table, {}).get("fields", {}).get(target_field) ) if fk_target_field: fk_target_field_id = fk_target_field.get("id") if fk_target_field.get(semantic_type_key) != "type/PK": - logger.info( - "Setting field/%s as PK (for %s column)", + _logger.info( + "Setting field '%s' as primary key for field '%s'", fk_target_field_id, column_name, ) @@ -276,8 +262,8 @@ def __export_column( change={semantic_type_key: "type/PK"}, ) else: - logger.error( - "Unable to find PK for %s.%s column FK", + _logger.error( + "No primary key for field '%s.%s' foreign key", target_table, target_field, ) @@ -333,11 +319,14 @@ def __export_column( ): body_field[semantic_type_key] = column.semantic_type or None + update_name = f"{model_name}.{column_name}" if body_field: - ctx.update(entity=api_field, change=body_field) - logger.info("Field %s.%s will be updated", model_name, column_name) + ctx.update(entity=api_field, change=body_field, name=update_name) + _logger.info( + "Field '%s' will be updated: %s", update_name, ", ".join(body_field) + ) else: - logger.info("Field %s.%s is up-to-date", model_name, column_name) + _logger.info("Field '%s' is up to date", update_name) return success @@ -395,13 +384,19 @@ class __Context: tables: Mapping[str, MutableMapping] = dc.field(default_factory=dict) updates: MutableMapping[str, MutableMapping] = dc.field(default_factory=dict) - def update(self, entity: MutableMapping, change: Mapping): + def update( + self, + entity: MutableMapping, + change: Mapping, + name: Optional[str] = None, + ): entity.update(change) key = f"{entity['kind']}.{entity['id']}" update = self.updates.get(key, {}) update["kind"] = entity["kind"] update["id"] = entity["id"] + update["name"] = name or entity["id"] body = update.get("body", {}) body.update(change) diff --git a/dbtmetabase/core.py b/dbtmetabase/core.py index ba0842dc..7812ce8b 100644 --- a/dbtmetabase/core.py +++ b/dbtmetabase/core.py @@ -32,7 +32,8 @@ def __init__( http_headers: Optional[dict] = None, http_adapter: Optional[HTTPAdapter] = None, ): - """ + """dbt + Metabase integration. + Args: manifest_path (Union[str,Path]): Path to dbt manifest.json, usually in target/ directory after compilation. metabase_url (str): Metabase URL, e.g. "https://metabase.example.com". diff --git a/dbtmetabase/format.py b/dbtmetabase/format.py index 482eee05..762426ba 100644 --- a/dbtmetabase/format.py +++ b/dbtmetabase/format.py @@ -18,7 +18,8 @@ def __init__( include: Optional[Sequence[str]] = None, exclude: Optional[Sequence[str]] = None, ): - """ + """Inclusion/exclusion filtering. + Args: include (Optional[Sequence[str]], optional): Optional inclusions (i.e. include only these). Defaults to None. exclude (Optional[Sequence[str]], optional): Optional exclusion list (i.e. exclude these, even if in inclusion list). Defaults to None. diff --git a/dbtmetabase/manifest.py b/dbtmetabase/manifest.py index adc1e205..ff51443a 100644 --- a/dbtmetabase/manifest.py +++ b/dbtmetabase/manifest.py @@ -73,7 +73,7 @@ def read_models(self) -> Iterable[Model]: name = node["name"].upper() if node["config"]["materialized"] == "ephemeral": - _logger.debug("Skipping ephemeral model %s", name) + _logger.debug("Skipping ephemeral model '%s'", name) continue models.append(self._read_model(manifest, node, Group.nodes)) @@ -174,7 +174,7 @@ def _read_relationships( depends_on_nodes = list(child["depends_on"][group]) if len(depends_on_nodes) > 2: _logger.warning( - "Expected at most two nodes, got %d {} nodes, skipping %s {}", + "Got %d dependencies for '%s' instead of <=2, skipping relationship", len(depends_on_nodes), unique_id, ) @@ -184,7 +184,7 @@ def _read_relationships( # Otherwise, the primary key of the current model would be (incorrectly) determined to be FK. if len(depends_on_nodes) == 2 and depends_on_nodes[1] != unique_id: _logger.debug( - "Skip this incoming relationship test, concerning nodes %s", + "Circular dependency in '%s', skipping relationship", depends_on_nodes, ) continue @@ -195,7 +195,7 @@ def _read_relationships( if len(depends_on_nodes) != 1: _logger.warning( - "Expected single node after filtering, got %d instead, skipping %s", + "Got %d dependencies for '%s' instead of 1, skipping", len(depends_on_nodes), unique_id, ) @@ -209,10 +209,7 @@ def _read_relationships( ) if not fk_target_table_alias: - _logger.debug( - "Cannot resolve depends on model %s to a model in manifest", - depends_on_id, - ) + _logger.debug("Cannot resolve dependency for '%s'", depends_on_id) continue fk_target_schema = manifest[group][depends_on_id].get( @@ -252,7 +249,7 @@ def _set_column_fk( if not table or not field: if table or field: _logger.warning( - "FK requires table and field for column %s", + "Foreign key requires table and field for column '%s'", metabase_column.name, ) return @@ -267,7 +264,7 @@ def _set_column_fk( ) metabase_column.fk_target_field = field.strip('"').upper() _logger.debug( - "Relation from %s to %s.%s", + "Relation from '%s' to '%s.%s'", metabase_column.name, metabase_column.fk_target_table, metabase_column.fk_target_field, diff --git a/dbtmetabase/metabase.py b/dbtmetabase/metabase.py index 1a3463f5..2d6651df 100644 --- a/dbtmetabase/metabase.py +++ b/dbtmetabase/metabase.py @@ -165,14 +165,14 @@ def find_user(self, uid: str) -> Optional[Mapping]: return dict(self._api("get", f"/api/user/{uid}")) except requests.exceptions.HTTPError as error: if error.response and error.response.status_code == 404: - _logger.warning("User not found: %s", uid) + _logger.warning("User '%s' not found", uid) return None raise - def update_table(self, uid: str, body: Mapping): + def update_table(self, uid: str, body: Mapping) -> Mapping: """Posts update to an existing table.""" - self._api("put", f"/api/table/{uid}", json=body) + return dict(self._api("put", f"/api/table/{uid}", json=body)) - def update_field(self, uid: str, body: Mapping): + def update_field(self, uid: str, body: Mapping) -> Mapping: """Posts an update to an existing table field.""" - self._api("put", f"/api/field/{uid}", json=body) + return dict(self._api("put", f"/api/field/{uid}", json=body))