Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Age Extension Query with Formatted Age Range #6206

Draft
wants to merge 6 commits into
base: production
Choose a base branch
from

Conversation

acwhite211
Copy link
Member

Fixes #6089
Fixes #6016

Creates a new functions that builds a SQLAlchemy subquery that, in addition to the previous implementation's use case of filter CollectionObjects by all it's paths to chronostrat age, returns a formatted age range for a CollectionObject's maximum start age to its minimum end age. This allows the query QB to display this formatted range in the age column in the query results.

image image

Here is a compiled SQLAlchemy query that get generated for a simple CollectionObject Age query:

SELECT
	collectionobject.`CollectionObjectID`,
	concat_ws(' - ',
			  ifnull(regexp_replace(CAST(agg_subq.max_start_period AS CHAR), '\\.(0+)$', ''), ''),
			  ifnull(regexp_replace(CAST(agg_subq.min_end_period AS CHAR), '\\.(0+)$', ''), '')) AS age
FROM
	collectionobject
INNER JOIN (
	SELECT
		unioned.coid AS coid,
		min(unioned.endperiod) AS min_end_period,
		max(unioned.startperiod) AS max_start_period
	FROM
		(
		SELECT
			absoluteage.`CollectionObjectID` AS coid,
			CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) - coalesce(absoluteage.`AgeUncertainty`, 0) AS startperiod,
			CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) + coalesce(absoluteage.`AgeUncertainty`, 0) AS endperiod
		FROM
			absoluteage
		WHERE
			CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) - coalesce(absoluteage.`AgeUncertainty`, 0) <= 2000.0
			AND CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) + coalesce(absoluteage.`AgeUncertainty`, 0) >= 2.0
	UNION ALL
		SELECT
			r.`CollectionObjectID` AS coid,
			CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN greatest(CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0), CAST(aend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(aend.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0))
				ELSE CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0)
			END AS startperiod,
			CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN least((CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0), (CAST(aend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(aend.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0))
				ELSE (CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0)
			END AS endperiod
		FROM
			relativeage AS r
		INNER JOIN geologictimeperiod AS a ON
			r.`AgeNameID` = a.`GeologicTimePeriodID`
		LEFT OUTER JOIN geologictimeperiod AS aend ON
			r.`AgeNameEndID` = aend.`GeologicTimePeriodID`
		WHERE
			a.`StartPeriod` IS NOT NULL
			AND a.`EndPeriod` IS NOT NULL
			AND a.`StartPeriod` >= a.`EndPeriod`
			AND (r.`AgeNameEndID` IS NULL
				OR aend.`StartPeriod` IS NOT NULL
				AND aend.`EndPeriod` IS NOT NULL
				AND aend.`StartPeriod` >= aend.`EndPeriod`)
			AND CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN greatest(CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0), CAST(aend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(aend.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0))
				ELSE CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0)
			END <= 2000.0
			AND CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN least((CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0), (CAST(aend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(aend.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0))
				ELSE (CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0)
			END >= 2.0
	UNION ALL
		SELECT
			DISTINCT c.`CollectionObjectID` AS coid,
			CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN least(CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0), CAST(csend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(csend.`StartUncertainty`, 0))
				ELSE CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0)
			END AS startperiod,
			CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN greatest(CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0), CAST(csend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(csend.`EndUncertainty`, 0))
				ELSE CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0)
			END AS endperiod
		FROM
			collectionobject AS c
		LEFT OUTER JOIN collectingevent AS ce ON
			c.`CollectingEventID` = ce.`CollectingEventID`
		LEFT OUTER JOIN locality AS l ON
			ce.`LocalityID` = l.`LocalityID`
		LEFT OUTER JOIN paleocontext AS p ON
			c.`PaleoContextID` = p.`PaleoContextID`
			OR ce.`PaleoContextID` = p.`PaleoContextID`
			OR l.`PaleoContextID` = p.`PaleoContextID`
		LEFT OUTER JOIN geologictimeperiod AS cs ON
			p.`ChronosStratID` = cs.`GeologicTimePeriodID`
		LEFT OUTER JOIN geologictimeperiod AS csend ON
			p.`ChronosStratEndID` = csend.`GeologicTimePeriodID`
		WHERE
			p.`PaleoContextID` IS NOT NULL
			AND cs.`StartPeriod` IS NOT NULL
			AND cs.`EndPeriod` IS NOT NULL
			AND cs.`StartPeriod` >= cs.`EndPeriod`
			AND (p.`ChronosStratEndID` IS NULL
				OR csend.`StartPeriod` IS NOT NULL
				AND csend.`EndPeriod` IS NOT NULL
				AND csend.`StartPeriod` >= csend.`EndPeriod`)
			AND CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN least(CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0), CAST(csend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(csend.`StartUncertainty`, 0))
				ELSE CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0)
			END <= 2000.0
			AND CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN greatest(CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0), CAST(csend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(csend.`EndUncertainty`, 0))
				ELSE CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0)
			END >= 2.0) AS unioned
	GROUP BY
		unioned.coid) AS agg_subq ON
	collectionobject.`CollectionObjectID` = agg_subq.coid
WHERE
	collectionobject.`CollectionID` = 98304;

Checklist

  • Self-review the PR after opening it to make sure the changes look good and
    self-explanatory (or properly documented)
  • Add relevant issue to release milestone
  • Add relevant documentation (Tester - Dev)

Testing instructions

TBD

@acwhite211 acwhite211 added 2 - Queries Issues that are related to the query builder or queries in general geospecify labels Feb 7, 2025
@acwhite211 acwhite211 added this to the 7.10.1 milestone Feb 7, 2025
@acwhite211 acwhite211 self-assigned this Feb 7, 2025
@acwhite211
Copy link
Member Author

Have some code cleanup to do before merging.

Here is another example of a compiled SQLAlchemy query that gets generated:

SELECT
	collectionobject.`CollectionObjectID`,
	concat_ws(' - ', ifnull(regexp_replace(CAST(agg_subq.max_start_period AS CHAR), '\\.(0+)$', ''), ''), ifnull(regexp_replace(CAST(agg_subq.min_end_period AS CHAR), '\\.(0+)$', ''), '')) AS age,
	absoluteage_1.`AbsoluteAge`,
	geologictimeperiod_1.`Name`,
	geologictimeperiod_2.`Name`,
	geologictimeperiod_3.`Name`,
	geologictimeperiod_4.`Name`,
	geologictimeperiod_5.`Name`,
	geologictimeperiod_6.`Name`
FROM
	collectionobject
INNER JOIN (
	SELECT
		unioned.coid AS coid,
		min(unioned.endperiod) AS min_end_period,
		max(unioned.startperiod) AS max_start_period
	FROM
		(
		SELECT
			absoluteage.`CollectionObjectID` AS coid,
			CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) - coalesce(absoluteage.`AgeUncertainty`, 0) AS startperiod,
			CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) + coalesce(absoluteage.`AgeUncertainty`, 0) AS endperiod
		FROM
			absoluteage
		WHERE
			CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) - coalesce(absoluteage.`AgeUncertainty`, 0) <= 2000.0
			AND CAST(absoluteage.`AbsoluteAge` AS DECIMAL(10, 6)) + coalesce(absoluteage.`AgeUncertainty`, 0) >= 2.0
	UNION ALL
		SELECT
			r.`CollectionObjectID` AS coid,
			CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN greatest(CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0), CAST(aend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(aend.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0))
				ELSE CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0)
			END AS startperiod,
			CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN least((CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0), (CAST(aend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(aend.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0))
				ELSE (CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0)
			END AS endperiod
		FROM
			relativeage AS r
		INNER JOIN geologictimeperiod AS a ON
			r.`AgeNameID` = a.`GeologicTimePeriodID`
		LEFT OUTER JOIN geologictimeperiod AS aend ON
			r.`AgeNameEndID` = aend.`GeologicTimePeriodID`
		WHERE
			a.`StartPeriod` IS NOT NULL
			AND a.`EndPeriod` IS NOT NULL
			AND a.`StartPeriod` >= a.`EndPeriod`
			AND (r.`AgeNameEndID` IS NULL
				OR aend.`StartPeriod` IS NOT NULL
				AND aend.`EndPeriod` IS NOT NULL
				AND aend.`StartPeriod` >= aend.`EndPeriod`)
			AND CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN greatest(CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0), CAST(aend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(aend.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0))
				ELSE CAST(a.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(a.`StartUncertainty`, 0) + coalesce(r.`AgeUncertainty`, 0)
			END <= 2000.0
			AND CASE
				WHEN (r.`AgeNameEndID` IS NOT NULL) THEN least((CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0), (CAST(aend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(aend.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0))
				ELSE (CAST(a.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(a.`EndUncertainty`, 0)) - coalesce(r.`AgeUncertainty`, 0)
			END >= 2.0
	UNION ALL
		SELECT
			DISTINCT c.`CollectionObjectID` AS coid,
			CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN least(CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0), CAST(csend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(csend.`StartUncertainty`, 0))
				ELSE CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0)
			END AS startperiod,
			CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN greatest(CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0), CAST(csend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(csend.`EndUncertainty`, 0))
				ELSE CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0)
			END AS endperiod
		FROM
			collectionobject AS c
		LEFT OUTER JOIN collectingevent AS ce ON
			c.`CollectingEventID` = ce.`CollectingEventID`
		LEFT OUTER JOIN locality AS l ON
			ce.`LocalityID` = l.`LocalityID`
		LEFT OUTER JOIN paleocontext AS p ON
			c.`PaleoContextID` = p.`PaleoContextID`
			OR ce.`PaleoContextID` = p.`PaleoContextID`
			OR l.`PaleoContextID` = p.`PaleoContextID`
		LEFT OUTER JOIN geologictimeperiod AS cs ON
			p.`ChronosStratID` = cs.`GeologicTimePeriodID`
		LEFT OUTER JOIN geologictimeperiod AS csend ON
			p.`ChronosStratEndID` = csend.`GeologicTimePeriodID`
		WHERE
			p.`PaleoContextID` IS NOT NULL
			AND cs.`StartPeriod` IS NOT NULL
			AND cs.`EndPeriod` IS NOT NULL
			AND cs.`StartPeriod` >= cs.`EndPeriod`
			AND (p.`ChronosStratEndID` IS NULL
				OR csend.`StartPeriod` IS NOT NULL
				AND csend.`EndPeriod` IS NOT NULL
				AND csend.`StartPeriod` >= csend.`EndPeriod`)
			AND CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN least(CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0), CAST(csend.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(csend.`StartUncertainty`, 0))
				ELSE CAST(cs.`StartPeriod` AS DECIMAL(10, 6)) + coalesce(cs.`StartUncertainty`, 0)
			END <= 2000.0
			AND CASE
				WHEN (p.`ChronosStratEndID` IS NOT NULL) THEN greatest(CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0), CAST(csend.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(csend.`EndUncertainty`, 0))
				ELSE CAST(cs.`EndPeriod` AS DECIMAL(10, 6)) - coalesce(cs.`EndUncertainty`, 0)
			END >= 2.0) AS unioned
	GROUP BY
		unioned.coid) AS agg_subq ON
	collectionobject.`CollectionObjectID` = agg_subq.coid
LEFT OUTER JOIN absoluteage AS absoluteage_1 ON
	collectionobject.`CollectionObjectID` = absoluteage_1.`CollectionObjectID`
LEFT OUTER JOIN relativeage AS relativeage_1 ON
	collectionobject.`CollectionObjectID` = relativeage_1.`CollectionObjectID`
LEFT OUTER JOIN geologictimeperiod AS geologictimeperiod_1 ON
	geologictimeperiod_1.`GeologicTimePeriodID` = relativeage_1.`AgeNameID`
LEFT OUTER JOIN geologictimeperiod AS geologictimeperiod_2 ON
	geologictimeperiod_2.`GeologicTimePeriodID` = relativeage_1.`AgeNameEndID`
LEFT OUTER JOIN paleocontext AS paleocontext_1 ON
	paleocontext_1.`PaleoContextID` = collectionobject.`PaleoContextID`
LEFT OUTER JOIN geologictimeperiod AS geologictimeperiod_3 ON
	geologictimeperiod_3.`GeologicTimePeriodID` = paleocontext_1.`ChronosStratID`
LEFT OUTER JOIN geologictimeperiod AS geologictimeperiod_4 ON
	geologictimeperiod_4.`GeologicTimePeriodID` = paleocontext_1.`ChronosStratEndID`
LEFT OUTER JOIN collectingevent AS collectingevent_1 ON
	paleocontext_1.`PaleoContextID` = collectingevent_1.`PaleoContextID`
LEFT OUTER JOIN paleocontext AS paleocontext_2 ON
	paleocontext_2.`PaleoContextID` = collectingevent_1.`PaleoContextID`
LEFT OUTER JOIN geologictimeperiod AS geologictimeperiod_5 ON
	geologictimeperiod_5.`GeologicTimePeriodID` = paleocontext_2.`ChronosStratID`
LEFT OUTER JOIN geologictimeperiod AS geologictimeperiod_6 ON
	geologictimeperiod_6.`GeologicTimePeriodID` = paleocontext_2.`ChronosStratEndID`
WHERE
	collectionobject.`CollectionID` = 98304;

@acwhite211
Copy link
Member Author

It seems that the behavior for querying by the time period name isn't working as expected now. Investigating...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
2 - Queries Issues that are related to the query builder or queries in general geospecify
Projects
Status: 📋Back Log
1 participant