diff --git a/databuilder/README.md b/databuilder/README.md index 416148fdf7..90bfbd73ae 100644 --- a/databuilder/README.md +++ b/databuilder/README.md @@ -1700,6 +1700,28 @@ Adds the same set of tags to all tables produced by the job. #### [GenericTransformer](./databuilder/transformer/generic_transformer.py) Transforms dictionary based on callback function that user provides. +#### [ComplexTypeTransformer](./databuilder/transformer/complex_type_transformer.py) +Transforms complex types for columns in a table by using a configured parsing function. The transformer takes a `TableMetadata` object and iterates over its list of `ColumnMetadata` objects. The configured parser takes each column as input and sets the column's `type_metadata` field with the parsed results contained in a `TypeMetadata` object. + +**If you use Hive as a data store:**
+Configure this transformer with the [Hive parser](./databuilder/utils/hive_complex_type_parser.py). + +**If you do not use Hive as a data store:**
+You will need to write a custom parsing function for transforming column type strings into nested `TypeMetadata` objects. You are free to use the [Hive parser](./databuilder/utils/hive_complex_type_parser.py) as a starting point. You can also look online to try to find either a grammar or some OSS prior art, as writing a parser from scratch can get a little involved. We strongly recommend leveraging PyParsing instead of regex, etc. + +New parsing functions should take the following arguments: +- Column type string +- Column name +- `ColumnMetadata` object itself + +Within the parsing function, [TypeMetadata](./databuilder/models/type_metadata.py) objects should be created by passing its name, parent object, and type string. + +**Things to know about [TypeMetadata](./databuilder/models/type_metadata.py)**
+- If the existing subclasses do not cover all the required complex types, the base class can be extended to create any new ones that are needed. +- Each new subclass should implement a `is_terminal_type` function, which allows the node and relation iterators to check whether to continue creating the next nested level or to stop due to reaching a terminal node. +- `ScalarTypeMetadata` is the default type class that represents a terminal state. This should be used to set any column's `type_metadata` when it is not a complex type, or for the innermost terminal state for any complex type. Having all the columns set the `type_metadata` field allows the frontend to know to use the correct nested column display. +- Subclasses should set a `kind` field that specifies what kind of complex type they are. This is used by the frontend for specific type handling. For example, for arrays and maps a smaller row is inserted in the display table to differentiate them from named nested columns such as structs. + ## List of loader #### [FsNeo4jCSVLoader](https://github.com/amundsen-io/amundsen/blob/main/databuilder/databuilder/loader/file_system_neo4j_csv_loader.py "FsNeo4jCSVLoader") Write node and relationship CSV file(s) that can be consumed by Neo4jCsvPublisher. It assumes that the record it consumes is instance of Neo4jCsvSerializable. diff --git a/frontend/amundsen_application/static/js/ducks/tableMetadata/api/helpers.ts b/frontend/amundsen_application/static/js/ducks/tableMetadata/api/helpers.ts index cb2a6e3bb3..ecb5d3f955 100644 --- a/frontend/amundsen_application/static/js/ducks/tableMetadata/api/helpers.ts +++ b/frontend/amundsen_application/static/js/ducks/tableMetadata/api/helpers.ts @@ -54,7 +54,7 @@ export function processColumns( ...column, key: tableKey + '/' + column.name, children: - nestedType && isNestedColumnsEnabled() + !column.type_metadata && nestedType && isNestedColumnsEnabled() ? convertNestedTypeToColumns(nestedType) : undefined, }; diff --git a/frontend/amundsen_application/static/js/features/ColumnList/index.tsx b/frontend/amundsen_application/static/js/features/ColumnList/index.tsx index dde0246ac0..c9b934e20d 100644 --- a/frontend/amundsen_application/static/js/features/ColumnList/index.tsx +++ b/frontend/amundsen_application/static/js/features/ColumnList/index.tsx @@ -173,7 +173,8 @@ const ColumnList: React.FC = ({ key: item.key, name: item.name, isEditable: item.is_editable, - isExpandable: false, + isExpandable: + item.type_metadata && item.type_metadata.children.length > 0, editText: editText || null, editUrl: editUrl || null, tableParams, diff --git a/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx b/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx index 542d32833a..fe43038e95 100644 --- a/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx +++ b/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx @@ -353,12 +353,10 @@ export class TableDetail extends React.Component< }); }; - hasColumnsToExpand = () => - // TODO use this instead once the new nested columns display is turned on - // const { tableData } = this.props; - // return tableData.columns.some((col) => col.type_metadata?.children?.length); - - false; + hasColumnsToExpand = () => { + const { tableData } = this.props; + return tableData.columns.some((col) => col.type_metadata?.children?.length); + }; renderTabs(editText, editUrl) { const tabInfo: TabInfo[] = []; diff --git a/frontend/setup.py b/frontend/setup.py index 3afad6a370..d51d4cba5a 100644 --- a/frontend/setup.py +++ b/frontend/setup.py @@ -45,7 +45,7 @@ def build_js() -> None: with open(requirements_path) as requirements_file: requirements_dev = requirements_file.readlines() -__version__ = '4.1.2' +__version__ = '4.2.0' jira = ['jira==3.0.1'] asana = ['asana==0.10.3']