Skip to content

Commit

Permalink
fix: issue with neo4j query for ES data (#1823)
Browse files Browse the repository at this point in the history
* fix: issue with neo4j query for ES data

Signed-off-by: Allison Suarez Miranda <[email protected]>

* fixed multiple columns because of user match bug, and fixed uppercase fields breaking filters issue

Signed-off-by: Allison Suarez Miranda <[email protected]>
  • Loading branch information
allisonsuarez authored Apr 22, 2022
1 parent 925fce9 commit 0cd3cbc
Showing 1 changed file with 46 additions and 46 deletions.
92 changes: 46 additions & 46 deletions databuilder/databuilder/task/search/search_data_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,82 +11,76 @@
MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)
<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
{publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (schema)-[:DESCRIPTION]->(schema_description:Description)
OPTIONAL MATCH (table)-[:DESCRIPTION]->(prog_descs:Programmatic_Description)
WITH db, cluster, schema, schema_description, table, table_description,
COLLECT(prog_descs.description) as programmatic_descriptions
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
WITH db, cluster, schema, schema_description, table, table_description, programmatic_descriptions,
COLLECT(DISTINCT tags.key) as tags
WITH db, cluster, schema, schema_description, table, table_description,
COLLECT(DISTINCT toLower(tags.key)) as tags
OPTIONAL MATCH (table)-[:HAS_BADGE]->(badges:Badge)
WITH db, cluster, schema, schema_description, table, table_description, programmatic_descriptions, tags,
COLLECT(DISTINCT badges.key) as badges
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
WITH db, cluster, schema, schema_description, table, table_description, programmatic_descriptions, tags, badges
OPTIONAL MATCH (table)-[:COLUMN]->(col:Column)
WITH db, cluster, schema, schema_description, table, table_description, tags,
COLLECT(DISTINCT toLower(badges.key)) as badges
MATCH (table)-[:COLUMN]->(col:Column)
OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description)
WITH db, cluster, schema, schema_description, table, table_description, tags, badges,
programmatic_descriptions,
COLLECT(col.name) AS columns, COLLECT(col_description.description) AS column_descriptions
COLLECT(toLower(col.name)) AS columns, COLLECT(col_description.description) AS column_descriptions
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
{additional_field_match}
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema,
RETURN toLower(db.name) as database, cluster.name AS cluster, toLower(schema.name) AS schema,
schema_description.description AS schema_description,
table.name AS name, table.key AS key, table_description.description AS description,
toLower(table.name) AS name, table.key AS key, table_description.description AS description,
time_stamp.last_updated_timestamp AS last_updated_timestamp,
{{
{usage_fields}
}} AS usage,
columns,
column_descriptions,
tags,
badges,
{additional_field_return}
programmatic_descriptions
badges
ORDER BY table.name;
"""
)

DEFAULT_TABLE_QUERY = NEO4J_TABLE_CYPHER_QUERY.format(
publish_tag_filter='',
additional_field_match='',
additional_field_match="""
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
WITH db, cluster, schema, schema_description, table, table_description, time_stamp,
tags, badges, columns, column_descriptions, SUM(read.read_count) AS total_usage,
COUNT(DISTINCT user.email) AS unique_usage
""",
usage_fields="""
total_usage: CASE SUM(read.read_count)
total_usage: CASE total_usage
WHEN 0 THEN null
ELSE SUM(read.read_count)
ELSE total_usage
END,
unique_usage: CASE COUNT(DISTINCT user.email)
unique_usage: CASE unique_usage
WHEN 0 THEN null
ELSE COUNT(DISTINCT user.email)
ELSE unique_usage
END
""",
additional_field_return='')

NEO4J_DASHBOARD_CYPHER_QUERY = textwrap.dedent(
"""
MATCH (dashboard:Dashboard)
MATCH (dashboard:Dashboard)-[:DASHBOARD_OF]->(dbg:Dashboardgroup)
-[:DASHBOARD_GROUP_OF]->(cluster:Cluster)
{publish_tag_filter}
MATCH (dashboard)-[:DASHBOARD_OF]->(dbg:Dashboardgroup)
MATCH (dbg)-[:DASHBOARD_GROUP_OF]->(cluster:Cluster)
OPTIONAL MATCH (dashboard)-[:DESCRIPTION]->(db_descr:Description)
OPTIONAL MATCH (dbg)-[:DESCRIPTION]->(dbg_descr:Description)
OPTIONAL MATCH (dashboard)-[:DESCRIPTION]->(db_descr:Description)
OPTIONAL MATCH (dashboard)-[:EXECUTED]->(last_exec:Execution)
WHERE split(last_exec.key, '/')[5] = '_last_successful_execution'
OPTIONAL MATCH (dashboard)-[read:READ_BY]->(user:User)
WITH dashboard, dbg, db_descr, dbg_descr, cluster, last_exec, SUM(read.read_count) AS total_usage
OPTIONAL MATCH (dashboard)-[:HAS_QUERY]->(query:Query)-[:HAS_CHART]->(chart:Chart)
WITH dashboard, dbg, db_descr, dbg_descr, cluster, last_exec, COLLECT(DISTINCT query.name) as query_names,
COLLECT(DISTINCT chart.name) as chart_names,
total_usage
COLLECT(DISTINCT chart.name) as chart_names
OPTIONAL MATCH (dashboard)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
WITH dashboard, dbg, db_descr, dbg_descr, cluster, last_exec, query_names, chart_names, total_usage,
{additional_field_match}
COLLECT(DISTINCT tags.key) as tags
WITH dashboard, dbg, db_descr, dbg_descr, cluster, last_exec, query_names, chart_names,
COLLECT(DISTINCT toLower(tags.key)) as tags
OPTIONAL MATCH (dashboard)-[:HAS_BADGE]->(badges:Badge)
WITH dashboard, dbg, db_descr, dbg_descr, cluster, last_exec, query_names, chart_names, total_usage, tags,
WITH dashboard, dbg, db_descr, dbg_descr, cluster, last_exec, query_names, chart_names, tags,
COLLECT(DISTINCT badges.key) as badges
RETURN dbg.name as group_name, dashboard.name as name, cluster.name as cluster,
{additional_field_match}
RETURN toLower(dbg.name) as group_name, toLower(dashboard.name) as name, cluster.name as cluster,
{additional_field_return}
{{
{usage_fields}
Expand All @@ -102,7 +96,11 @@

DEFAULT_DASHBOARD_QUERY = NEO4J_DASHBOARD_CYPHER_QUERY.format(
publish_tag_filter='',
additional_field_match='',
additional_field_match="""
OPTIONAL MATCH (dashboard)-[read:READ_BY]->(user:User)
WITH dashboard, dbg, db_descr, dbg_descr, cluster, last_exec, query_names, chart_names, tags,
badges, SUM(read.read_count) AS total_usage
""",
usage_fields="""
total_usage: CASE total_usage
WHEN 0 THEN null
Expand Down Expand Up @@ -162,35 +160,37 @@
OPTIONAL MATCH (feature)-[:DESCRIPTION]->(desc:Description)
OPTIONAL MATCH (feature)-[:TAGGED_BY]->(tag:Tag)
OPTIONAL MATCH (feature)-[:HAS_BADGE]->(badge:Badge)
OPTIONAL MATCH (feature)-[read:READ_BY]->(user:User)
{additional_field_match}
RETURN
fg.name as feature_group,
feature.name as name,
toLower(fg.name) as feature_group,
toLower(feature.name) as name,
feature.version as version,
feature.key as key,
{additional_field_return}
{{
{usage_fields}
}} AS usage,
feature.status as status,
feature.entity as entity,
toLower(feature.entity) as entity,
desc.description as description,
db.name as availability,
COLLECT(DISTINCT badge.key) as badges,
COLLECT(DISTINCT tag.key) as tags,
COLLECT(DISTINCT toLower(badge.key)) as badges,
COLLECT(DISTINCT toLower(tag.key)) as tags,
toInteger(feature.last_updated_timestamp) as last_updated_timestamp
order by fg.name, feature.name, feature.version
order by feature_group, name, version
"""
)

DEFAULT_FEATURE_QUERY = NEO4J_FEATURE_CYPHER_QUERY.format(
publish_tag_filter='',
additional_field_match='',
additional_field_match="""
OPTIONAL MATCH (feature)-[read:READ_BY]->(user:User)
WITH feature, fg, db, desc, tag, badge, SUM(read.read_count) as total_usage
""",
usage_fields="""
total_usage: CASE SUM(read.read_count)
total_usage: CASE total_usage
WHEN 0 THEN null
ELSE SUM(read.read_count)
ELSE total_usage
END
""",
additional_field_return='')

0 comments on commit 0cd3cbc

Please sign in to comment.