From d7a9935eba6b96404c5bba6c68b7c7be4d641d70 Mon Sep 17 00:00:00 2001 From: Daniil Demin Date: Tue, 6 Aug 2024 12:54:23 +0300 Subject: [PATCH] Enable COUNT in view queries (#6820) --- ydb/core/kqp/host/kqp_host.cpp | 2 +- ydb/core/kqp/host/kqp_translate.cpp | 2 + ydb/core/kqp/host/kqp_translate.h | 5 +- ydb/core/kqp/provider/rewrite_io_utils.cpp | 25 ++++------ ydb/core/kqp/provider/rewrite_io_utils.h | 4 +- .../kqp/provider/yql_kikimr_datasource.cpp | 22 +++++++-- ydb/core/kqp/provider/yql_kikimr_provider.h | 3 +- .../aggregates_and_window/create_view.sql | 43 +++++++++++++++++ .../cases/aggregates_and_window/drop_view.sql | 1 + .../aggregates_and_window/etalon_query.sql | 46 +++++++++++++++++++ .../select_from_view.sql | 3 ++ .../cases/count_episodes/create_view.sql | 9 ++++ .../input/cases/count_episodes/drop_view.sql | 1 + .../cases/count_episodes/etalon_query.sql | 12 +++++ .../cases/count_episodes/select_from_view.sql | 3 ++ .../create_view.sql | 22 +++++++++ .../count_episodes_with_titles/drop_view.sql | 1 + .../etalon_query.sql | 25 ++++++++++ .../select_from_view.sql | 3 ++ .../input/cases/count_rows/create_view.sql | 4 ++ .../view/input/cases/count_rows/drop_view.sql | 1 + .../input/cases/count_rows/etalon_query.sql | 7 +++ .../cases/count_rows/select_from_view.sql | 3 ++ 23 files changed, 223 insertions(+), 24 deletions(-) create mode 100644 ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql create mode 100644 ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index 972689af9f48..5f854fa7cef5 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -1776,7 +1776,7 @@ class TKqpHost : public IKqpHost { auto queryExecutor = MakeIntrusive(Gateway, Cluster, SessionCtx, KqpRunner); auto kikimrDataSource = CreateKikimrDataSource(*FuncRegistry, *TypesCtx, gatewayProxy, SessionCtx, - ExternalSourceFactory, IsInternalCall); + ExternalSourceFactory, IsInternalCall, GUCSettings); auto kikimrDataSink = CreateKikimrDataSink(*FuncRegistry, *TypesCtx, gatewayProxy, SessionCtx, ExternalSourceFactory, queryExecutor); FillSettings.AllResultsBytesLimit = Nothing(); diff --git a/ydb/core/kqp/host/kqp_translate.cpp b/ydb/core/kqp/host/kqp_translate.cpp index 35e632c273b4..0384b3120606 100644 --- a/ydb/core/kqp/host/kqp_translate.cpp +++ b/ydb/core/kqp/host/kqp_translate.cpp @@ -1,6 +1,8 @@ #include "kqp_translate.h" +#include #include +#include namespace NKikimr { diff --git a/ydb/core/kqp/host/kqp_translate.h b/ydb/core/kqp/host/kqp_translate.h index e54879819d97..1304c28d6b05 100644 --- a/ydb/core/kqp/host/kqp_translate.h +++ b/ydb/core/kqp/host/kqp_translate.h @@ -1,7 +1,8 @@ #pragma once -#include -#include +#include +#include +#include #include namespace NKikimr { diff --git a/ydb/core/kqp/provider/rewrite_io_utils.cpp b/ydb/core/kqp/provider/rewrite_io_utils.cpp index 633149305a44..9a2bd768904b 100644 --- a/ydb/core/kqp/provider/rewrite_io_utils.cpp +++ b/ydb/core/kqp/provider/rewrite_io_utils.cpp @@ -15,23 +15,17 @@ using namespace NNodes; constexpr const char* QueryGraphNodeSignature = "SavedQueryGraph"; -NSQLTranslation::TTranslationSettings CreateViewTranslationSettings(const TString& cluster) { - NSQLTranslation::TTranslationSettings settings; - - settings.DefaultCluster = cluster; - settings.ClusterMapping[cluster] = TString(NYql::KikimrProviderName); - settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; - - return settings; -} - TExprNode::TPtr CompileViewQuery( const TString& query, TExprContext& ctx, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver ) { + auto translationSettings = settingsBuilder.Build(ctx); + translationSettings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; + TAstParseResult queryAst; - queryAst = NSQLTranslation::SqlToYql(query, CreateViewTranslationSettings(cluster)); + queryAst = NSQLTranslation::SqlToYql(query, translationSettings); ctx.IssueManager.AddIssues(queryAst.Issues); if (!queryAst.IsOk()) { @@ -39,7 +33,7 @@ TExprNode::TPtr CompileViewQuery( } TExprNode::TPtr queryGraph; - if (!CompileExpr(*queryAst.Root, queryGraph, ctx, nullptr, nullptr)) { + if (!CompileExpr(*queryAst.Root, queryGraph, ctx, moduleResolver.get(), nullptr)) { return nullptr; } @@ -123,7 +117,8 @@ TExprNode::TPtr RewriteReadFromView( const TExprNode::TPtr& node, TExprContext& ctx, const TString& query, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver ) { YQL_PROFILE_FUNC(DEBUG); @@ -132,7 +127,7 @@ TExprNode::TPtr RewriteReadFromView( TExprNode::TPtr queryGraph = FindSavedQueryGraph(readNode.Ptr()); if (!queryGraph) { - queryGraph = CompileViewQuery(query, ctx, cluster); + queryGraph = CompileViewQuery(query, ctx, settingsBuilder, moduleResolver); if (!queryGraph) { ctx.AddError(TIssue(ctx.GetPosition(readNode.Pos()), "The query stored in the view cannot be compiled.")); diff --git a/ydb/core/kqp/provider/rewrite_io_utils.h b/ydb/core/kqp/provider/rewrite_io_utils.h index d793cc0fcd1a..71beb4e56c22 100644 --- a/ydb/core/kqp/provider/rewrite_io_utils.h +++ b/ydb/core/kqp/provider/rewrite_io_utils.h @@ -1,5 +1,6 @@ #pragma once +#include #include namespace NYql { @@ -10,7 +11,8 @@ TExprNode::TPtr RewriteReadFromView( const TExprNode::TPtr& node, TExprContext& ctx, const TString& query, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver ); } \ No newline at end of file diff --git a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp index d289a9e7f2e1..3a0ba86838f7 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp @@ -3,6 +3,7 @@ #include "yql_kikimr_provider_impl.h" #include +#include #include #include @@ -472,12 +473,14 @@ class TKikimrDataSource : public TDataProviderBase { TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NExternalSource::IExternalSourceFactory::TPtr& externalSourceFactory, - bool isInternalCall) + bool isInternalCall, + TGUCSettings::TPtr gucSettings) : FunctionRegistry(functionRegistry) , Types(types) , Gateway(gateway) , SessionCtx(sessionCtx) , ExternalSourceFactory(externalSourceFactory) + , GUCSettings(gucSettings) , ConfigurationTransformer(new TKikimrConfigurationTransformer(sessionCtx, types)) , IntentDeterminationTransformer(new TKiSourceIntentDeterminationTransformer(sessionCtx)) , LoadTableMetadataTransformer(CreateKiSourceLoadTableMetadataTransformer(gateway, sessionCtx, types, externalSourceFactory, isInternalCall)) @@ -760,6 +763,7 @@ class TKikimrDataSource : public TDataProviderBase { } ctx.Step + .Repeat(TExprStep::ExpandApplyForLambdas) .Repeat(TExprStep::ExprEval) .Repeat(TExprStep::DiscoveryIO) .Repeat(TExprStep::Epochs) @@ -768,7 +772,15 @@ class TKikimrDataSource : public TDataProviderBase { .Repeat(TExprStep::RewriteIO); const auto& query = tableDesc.Metadata->ViewPersistedData.QueryText; - return RewriteReadFromView(node, ctx, query, cluster); + NKqp::TKqpTranslationSettingsBuilder settingsBuilder( + SessionCtx->Query().Type, + SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), + cluster, + query, + SessionCtx->Config().BindingsMode, + GUCSettings + ); + return RewriteReadFromView(node, ctx, query, settingsBuilder, Types.Modules); } } @@ -881,6 +893,7 @@ class TKikimrDataSource : public TDataProviderBase { TIntrusivePtr Gateway; TIntrusivePtr SessionCtx; NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory; + TGUCSettings::TPtr GUCSettings; TAutoPtr ConfigurationTransformer; TAutoPtr IntentDeterminationTransformer; @@ -920,9 +933,10 @@ TIntrusivePtr CreateKikimrDataSource( TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NExternalSource::IExternalSourceFactory::TPtr& externalSourceFactory, - bool isInternalCall) + bool isInternalCall, + TGUCSettings::TPtr gucSettings) { - return new TKikimrDataSource(functionRegistry, types, gateway, sessionCtx, externalSourceFactory, isInternalCall); + return new TKikimrDataSource(functionRegistry, types, gateway, sessionCtx, externalSourceFactory, isInternalCall, gucSettings); } TAutoPtr CreateKiSourceLoadTableMetadataTransformer(TIntrusivePtr gateway, diff --git a/ydb/core/kqp/provider/yql_kikimr_provider.h b/ydb/core/kqp/provider/yql_kikimr_provider.h index 413c1f704daa..0c5987fce759 100644 --- a/ydb/core/kqp/provider/yql_kikimr_provider.h +++ b/ydb/core/kqp/provider/yql_kikimr_provider.h @@ -549,7 +549,8 @@ TIntrusivePtr CreateKikimrDataSource( TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NKikimr::NExternalSource::IExternalSourceFactory::TPtr& sourceFactory, - bool isInternalCall); + bool isInternalCall, + TGUCSettings::TPtr gucSettings); TIntrusivePtr CreateKikimrDataSink( const NKikimr::NMiniKQL::IFunctionRegistry& functionRegistry, diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql new file mode 100644 index 000000000000..f9f813948191 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql @@ -0,0 +1,43 @@ +CREATE VIEW `/Root/aggregates_and_window` WITH (security_invoker = TRUE) AS + SELECT + series.title AS series, + series_stats.seasons_with_episode_count_greater_than_average AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + SUM( + CASE + WHEN episode_count > average_episodes_in_season + THEN 1 + ELSE 0 + END + ) AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + season_id, + episode_count, + AVG(episode_count) OVER average_episodes_in_season_window AS average_episodes_in_season + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + WINDOW + average_episodes_in_season_window AS ( + PARTITION BY + series_id + ) + ) + GROUP BY + series_id + ) + AS series_stats + JOIN `/Root/series` + AS series + USING (series_id); diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql new file mode 100644 index 000000000000..c4c0dd0a1c8f --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/aggregates_and_window`; diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql new file mode 100644 index 000000000000..c701bd2d2648 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql @@ -0,0 +1,46 @@ +SELECT + * +FROM ( + SELECT + series.title AS series, + series_stats.seasons_with_episode_count_greater_than_average AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + SUM( + CASE + WHEN episode_count > average_episodes_in_season + THEN 1 + ELSE 0 + END + ) AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + season_id, + episode_count, + AVG(episode_count) OVER average_episodes_in_season_window AS average_episodes_in_season + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + WINDOW + average_episodes_in_season_window AS ( + PARTITION BY + series_id + ) + ) + GROUP BY + series_id + ) + AS series_stats + JOIN `/Root/series` + AS series + USING (series_id) +); diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql new file mode 100644 index 000000000000..0cc947dd8661 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/aggregates_and_window`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql new file mode 100644 index 000000000000..dcbfee8f92db --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql @@ -0,0 +1,9 @@ +CREATE VIEW `/Root/count_episodes` WITH (security_invoker = TRUE) AS + SELECT + series_id, + season_id, + COUNT(*) + FROM `/Root/episodes` + GROUP BY + series_id, + season_id; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql new file mode 100644 index 000000000000..897adef6e6d6 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql new file mode 100644 index 000000000000..8964c9ab76a1 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + series_id, + season_id, + COUNT(*) + FROM `/Root/episodes` + GROUP BY + series_id, + season_id +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql new file mode 100644 index 000000000000..153ad494f165 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql new file mode 100644 index 000000000000..545ec4056034 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql @@ -0,0 +1,22 @@ +CREATE VIEW `/Root/count_episodes_with_titles` WITH (security_invoker = TRUE) AS + SELECT + series.title AS series, + seasons.title AS season, + episodes.episode_count AS episode_count + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + AS episodes + JOIN `/Root/series` + AS series + ON episodes.series_id == series.series_id + JOIN `/Root/seasons` + AS seasons + ON episodes.series_id == seasons.series_id AND episodes.season_id == seasons.season_id; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql new file mode 100644 index 000000000000..99e8b54a675d --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_episodes_with_titles`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql new file mode 100644 index 000000000000..faa472a66beb --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql @@ -0,0 +1,25 @@ +SELECT + * +FROM ( + SELECT + series.title AS series, + seasons.title AS season, + episodes.episode_count AS episode_count + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + AS episodes + JOIN `/Root/series` + AS series + ON episodes.series_id == series.series_id + JOIN `/Root/seasons` + AS seasons + ON episodes.series_id == seasons.series_id AND episodes.season_id == seasons.season_id +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql new file mode 100644 index 000000000000..b84299a26d02 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_episodes_with_titles`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql new file mode 100644 index 000000000000..0189412ace53 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql @@ -0,0 +1,4 @@ +CREATE VIEW `/Root/count_rows` WITH (security_invoker = TRUE) AS + SELECT + COUNT(*) + FROM `/Root/episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql new file mode 100644 index 000000000000..b540f8f401e8 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_rows`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql new file mode 100644 index 000000000000..b9d2cdb0f4a7 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql @@ -0,0 +1,7 @@ +SELECT + * +FROM ( + SELECT + COUNT(*) + FROM `/Root/episodes` +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql new file mode 100644 index 000000000000..49ffdf0dd923 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_rows`;