diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp index 679120cfe094..8e112a3b0f36 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp @@ -274,7 +274,7 @@ class TKqpCompileActor : public TActorBootstrapped { Config->FeatureFlags = AppData(ctx)->FeatureFlags; KqpHost = CreateKqpHost(Gateway, QueryId.Cluster, QueryId.Database, Config, ModuleResolverState->ModuleResolver, - FederatedQuerySetup, UserToken, GUCSettings, ApplicationName, AppData(ctx)->FunctionRegistry, + FederatedQuerySetup, UserToken, GUCSettings, QueryServiceConfig, ApplicationName, AppData(ctx)->FunctionRegistry, false, false, std::move(TempTablesState), nullptr, SplitCtx); IKqpHost::TPrepareSettings prepareSettings; diff --git a/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp b/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp index 4889ee332b27..b358f0efcf1c 100644 --- a/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp +++ b/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp @@ -8,6 +8,7 @@ #include #include +#include namespace NKikimr { namespace NKqp { @@ -28,7 +29,7 @@ NKqpProto::TKqpPhyTx BuildTxPlan(const TString& sql, TIntrusivePtr IModuleResolver::TPtr moduleResolver; UNIT_ASSERT(GetYqlDefaultModuleResolver(moduleCtx, moduleResolver)); - auto qp = CreateKqpHost(gateway, cluster, "/Root", config, moduleResolver, NYql::IHTTPGateway::Make(), nullptr, nullptr, Nothing(), nullptr, nullptr, false, false, nullptr, actorSystem); + auto qp = CreateKqpHost(gateway, cluster, "/Root", config, moduleResolver, NYql::IHTTPGateway::Make(), nullptr, nullptr, NKikimrConfig::TQueryServiceConfig(), Nothing(), nullptr, nullptr, false, false, nullptr, actorSystem, nullptr); auto result = qp->SyncPrepareDataQuery(sql, IKqpHost::TPrepareSettings()); result.Issues().PrintTo(Cerr); UNIT_ASSERT(result.Success()); diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index 01c8b51843af..972689af9f48 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -1033,7 +1033,7 @@ class TKqpHost : public IKqpHost { std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, bool isInternalCall, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, NActors::TActorSystem* actorSystem = nullptr, - NYql::TExprContext* ctx = nullptr) + NYql::TExprContext* ctx = nullptr, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig = NKikimrConfig::TQueryServiceConfig()) : Gateway(gateway) , Cluster(cluster) , GUCSettings(gUCSettings) @@ -1051,6 +1051,7 @@ class TKqpHost : public IKqpHost { , FakeWorld(ctx ? nullptr : ExprCtx->NewWorld(TPosition())) , ExecuteCtx(MakeIntrusive()) , ActorSystem(actorSystem ? actorSystem : NActors::TActivationContext::ActorSystem()) + , QueryServiceConfig(queryServiceConfig) { if (funcRegistry) { FuncRegistry = funcRegistry; @@ -1825,10 +1826,15 @@ class TKqpHost : public IKqpHost { || settingName == "FilterPushdownOverJoinOptionalSide" || settingName == "DisableFilterPushdownOverJoinOptionalSide" || settingName == "RotateJoinTree" + || settingName == "TimeOrderRecoverDelay" + || settingName == "TimeOrderRecoverAhead" + || settingName == "TimeOrderRecoverRowLimit" + || settingName == "MatchRecognizeStream" ; }; auto configProvider = CreateConfigProvider(*TypesCtx, gatewaysConfig, {}, allowSettings); TypesCtx->AddDataSource(ConfigProviderName, configProvider); + TypesCtx->MatchRecognize = QueryServiceConfig.GetEnableMatchRecognize(); YQL_ENSURE(TypesCtx->Initialize(*ExprCtx)); @@ -1930,6 +1936,7 @@ class TKqpHost : public IKqpHost { TKqpTempTablesState::TConstPtr TempTablesState; NActors::TActorSystem* ActorSystem = nullptr; + NKikimrConfig::TQueryServiceConfig QueryServiceConfig; }; } // namespace @@ -1950,11 +1957,11 @@ Ydb::Table::QueryStatsCollection::Mode GetStatsMode(NYql::EKikimrStatsMode stats TIntrusivePtr CreateKqpHost(TIntrusivePtr gateway, const TString& cluster, const TString& database, TKikimrConfiguration::TPtr config, IModuleResolver::TPtr moduleResolver, std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const TGUCSettings::TPtr& gUCSettings, - const TMaybe& applicationName, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, + const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TMaybe& applicationName, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, bool isInternalCall, TKqpTempTablesState::TConstPtr tempTablesState, NActors::TActorSystem* actorSystem, NYql::TExprContext* ctx) { return MakeIntrusive(gateway, cluster, database, gUCSettings, applicationName, config, moduleResolver, federatedQuerySetup, userToken, funcRegistry, - keepConfigChanges, isInternalCall, std::move(tempTablesState), actorSystem, ctx); + keepConfigChanges, isInternalCall, std::move(tempTablesState), actorSystem, ctx, queryServiceConfig); } } // namespace NKqp diff --git a/ydb/core/kqp/host/kqp_host.h b/ydb/core/kqp/host/kqp_host.h index ff94738619f0..85a7025a9e1b 100644 --- a/ydb/core/kqp/host/kqp_host.h +++ b/ydb/core/kqp/host/kqp_host.h @@ -120,7 +120,7 @@ class IKqpHost : public TThrRefBase { TIntrusivePtr CreateKqpHost(TIntrusivePtr gateway, const TString& cluster, const TString& database, NYql::TKikimrConfiguration::TPtr config, NYql::IModuleResolver::TPtr moduleResolver, std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const TGUCSettings::TPtr& gUCSettings, - const TMaybe& applicationName = Nothing(), const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry = nullptr, + const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TMaybe& applicationName = Nothing(), const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry = nullptr, bool keepConfigChanges = false, bool isInternalCall = false, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, NActors::TActorSystem* actorSystem = nullptr /*take from TLS by default*/, NYql::TExprContext* ctx = nullptr); diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 4734288dcdac..1018878b092f 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -60,6 +61,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { AddHandler(0, &TCoNarrowFlatMap::Match, HNDL(DqReadWideWrapFieldSubset)); AddHandler(0, &TCoNarrowMultiMap::Match, HNDL(DqReadWideWrapFieldSubset)); AddHandler(0, &TCoWideMap::Match, HNDL(DqReadWideWrapFieldSubset)); + AddHandler(0, &TCoMatchRecognize::Match, HNDL(MatchRecognize)); AddHandler(1, &TCoTop::Match, HNDL(RewriteTopSortOverIndexRead)); AddHandler(1, &TCoTopSort::Match, HNDL(RewriteTopSortOverIndexRead)); @@ -288,6 +290,14 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { return output; } + TMaybeNode MatchRecognize(TExprBase node, TExprContext& ctx) { + auto output = ExpandMatchRecognize(node.Ptr(), ctx, TypesCtx); + if (output) { + DumpAppliedRule("MatchRecognize", node.Ptr(), output, ctx); + } + return output; + } + TMaybeNode DqReadWrapByProvider(TExprBase node, TExprContext& ctx) { auto output = NDq::DqReadWrapByProvider(node, ctx, TypesCtx); if (output) { diff --git a/ydb/core/kqp/session_actor/kqp_worker_actor.cpp b/ydb/core/kqp/session_actor/kqp_worker_actor.cpp index 4ee15aad90c4..d464b7a0dc41 100644 --- a/ydb/core/kqp/session_actor/kqp_worker_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_worker_actor.cpp @@ -194,7 +194,7 @@ class TKqpWorkerActor : public TActorBootstrapped { Config->FeatureFlags = AppData(ctx)->FeatureFlags; KqpHost = CreateKqpHost(Gateway, Settings.Cluster, Settings.Database, Config, ModuleResolverState->ModuleResolver, FederatedQuerySetup, - QueryState->RequestEv->GetUserToken(), GUCSettings, Settings.ApplicationName, AppData(ctx)->FunctionRegistry, !Settings.LongSession, false); + QueryState->RequestEv->GetUserToken(), GUCSettings, QueryServiceConfig, Settings.ApplicationName, AppData(ctx)->FunctionRegistry, !Settings.LongSession, false, nullptr, nullptr, nullptr); auto& queryRequest = QueryState->RequestEv; QueryState->ProxyRequestId = proxyRequestId; diff --git a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp index 7f57550ea3ec..797d91922427 100644 --- a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp +++ b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp @@ -55,7 +55,7 @@ TIntrusivePtr CreateKikimrQueryProcessor(TIntrusivePtr ga auto federatedQuerySetup = std::make_optional({NYql::IHTTPGateway::Make(), nullptr, nullptr, nullptr, {}, {}, {}, nullptr, nullptr, {}}); return NKqp::CreateKqpHost(gateway, cluster, "/Root", kikimrConfig, moduleResolver, - federatedQuerySetup, nullptr, nullptr, {}, funcRegistry, funcRegistry, keepConfigChanges, nullptr, actorSystem); + federatedQuerySetup, nullptr, nullptr, NKikimrConfig::TQueryServiceConfig(), {}, funcRegistry, funcRegistry, keepConfigChanges, nullptr, actorSystem, nullptr); } NYql::NNodes::TExprBase GetExpr(const TString& ast, NYql::TExprContext& ctx, NYql::IModuleResolver* moduleResolver) { diff --git a/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp b/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp index 8cd9b54a6857..edf5740b7ef2 100644 --- a/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp +++ b/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp @@ -84,6 +84,93 @@ Y_UNIT_TEST_SUITE(KqpPragma) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); UNIT_ASSERT_C(result.GetIssues().Empty(), result.GetIssues().ToString()); } + + Y_UNIT_TEST(MatchRecognizeWithTimeOrderRecoverer) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableQueryServiceConfig()->SetEnableMatchRecognize(true); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + NYdb::NScripting::TScriptingClient client(kikimr.GetDriver()); + + auto result = client.ExecuteYqlScript(R"( + PRAGMA FeatureR010="prototype"; + + CREATE TABLE `/Root/NewTable` ( + dt Uint64, + value String, + PRIMARY KEY (dt) + ); + COMMIT; + + INSERT INTO `/Root/NewTable` (dt, value) VALUES + (1, 'value1'), (2, 'value2'), (3, 'value3'), (4, 'value4'); + COMMIT; + + SELECT * FROM (SELECT dt, value FROM `/Root/NewTable`) + MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) + MEASURES + LAST(V1.dt) as v1, + LAST(V4.dt) as v4 + ONE ROW PER MATCH + PATTERN (V1 V* V4) + DEFINE + V1 as V1.value = "value1", + V as True, + V4 as V4.value = "value4" + ); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[1u];[4u]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + } + + Y_UNIT_TEST(MatchRecognizeWithoutTimeOrderRecoverer) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableQueryServiceConfig()->SetEnableMatchRecognize(true); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + NYdb::NScripting::TScriptingClient client(kikimr.GetDriver()); + + auto result = client.ExecuteYqlScript(R"( + PRAGMA FeatureR010="prototype"; + PRAGMA config.flags("MatchRecognizeStream", "disable"); + + CREATE TABLE `/Root/NewTable` ( + dt Uint64, + value String, + PRIMARY KEY (dt) + ); + COMMIT; + + INSERT INTO `/Root/NewTable` (dt, value) VALUES + (1, 'value1'), (2, 'value2'), (3, 'value3'), (4, 'value4'); + COMMIT; + + SELECT * FROM (SELECT dt, value FROM `/Root/NewTable`) + MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) + MEASURES + LAST(V1.dt) as v1, + LAST(V4.dt) as v4 + ONE ROW PER MATCH + PATTERN (V1 V* V4) + DEFINE + V1 as V1.value = "value1", + V as True, + V4 as V4.value = "value4" + ); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[1u];[4u]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + } } } // namspace NKqp diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 1212d7e632ef..a6934ef73742 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1016,6 +1016,7 @@ message TQueryServiceConfig { optional TFinalizeScriptServiceConfig FinalizeScriptServiceConfig = 12; optional uint64 ProgressStatsPeriodMs = 14 [default = 0]; // 0 = disabled optional uint32 QueryTimeoutDefaultSeconds = 19 [default = 1800]; + optional bool EnableMatchRecognize = 20 [default = false]; } // Config describes immediate controls and allows diff --git a/ydb/library/yql/core/yql_opt_match_recognize.cpp b/ydb/library/yql/core/yql_opt_match_recognize.cpp index 5e99874b5415..b7f05afa37ab 100644 --- a/ydb/library/yql/core/yql_opt_match_recognize.cpp +++ b/ydb/library/yql/core/yql_opt_match_recognize.cpp @@ -146,7 +146,7 @@ TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext& ExtractSortKeyAndOrder(pos, sortTraits, sortKey, sortOrder, ctx); TExprNode::TPtr result; if (isStreaming) { - YQL_ENSURE(sortOrder->ChildrenSize() == 1, "Expect ORDER BY timestamp for MATCH_RECOGNIZE on streams"); + YQL_ENSURE(sortOrder->ChildrenSize() == 1, "Expect ORDER BY timestamp for MATCH_RECOGNIZE"); const auto reordered = ctx.Builder(pos) .Lambda() .Param("partition") @@ -216,37 +216,15 @@ TExprNode::TPtr ExpandMatchRecognize(const TExprNode::TPtr& node, TExprContext& .Seal() .Build(); } else { //non-streaming - if (partitionColumns->ChildrenSize() != 0) { - result = ctx.Builder(pos) - .Callable("PartitionsByKeys") - .Add(0, input) - .Add(1, partitionKeySelector) - .Add(2, sortOrder) - .Add(3, sortKey) - .Add(4, matchRecognize) - .Seal() - .Build(); - } else { - if (sortOrder->IsCallable("Void")) { - result = ctx.Builder(pos) - .Apply(matchRecognize) - .With(0, input) - .Seal() - .Build();; - } else { - result = ctx.Builder(pos) - .Apply(matchRecognize) - .With(0) - .Callable("Sort") - .Add(0, input) - .Add(1, sortOrder) - .Add(2, sortKey) - .Seal() - .Done() - .Seal() - .Build(); - } - } + result = ctx.Builder(pos) + .Callable("PartitionsByKeys") + .Add(0, input) + .Add(1, partitionKeySelector) + .Add(2, sortOrder) + .Add(3, sortKey) + .Add(4, matchRecognize) + .Seal() + .Build(); } YQL_CLOG(INFO, Core) << "Expanded MatchRecognize"; return result; diff --git a/ydb/library/yql/tests/sql/dq_file/part16/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part16/canondata/result.json index 970634b99903..50058d2aa8f3 100644 --- a/ydb/library/yql/tests/sql/dq_file/part16/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part16/canondata/result.json @@ -1892,9 +1892,9 @@ ], "test.test[match_recognize-test_type-default.txt-Debug]": [ { - "checksum": "cb5512aae3f5566055b2388be6d114af", - "size": 3220, - "uri": "https://{canondata_backend}/1937367/518bbcf510ad7a43c5e77746bafd21ed0e3fdc6e/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql_patched" + "checksum": "648119cc488bae598a0936f9d2c82b7e", + "size": 3458, + "uri": "https://{canondata_backend}/1942173/c4d7dbc720e57397caf847cd2616b1362110ddd2/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-test_type-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json index dc11e5ccdf2d..491a422f89f6 100644 --- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json @@ -1717,9 +1717,9 @@ ], "test.test[match_recognize-alerts-default.txt-Debug]": [ { - "checksum": "782bb90b80a43308dfef1dbd81055b12", - "size": 5618, - "uri": "https://{canondata_backend}/1942173/e32f1de19c4f2770a6f215d1dc22bc97e318bf22/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql_patched" + "checksum": "c8b1e13d6da573f8a1afd415db1d00e7", + "size": 5787, + "uri": "https://{canondata_backend}/1917492/86ab0de654a60bf1e3145a3d8e3d7eae4a9f26b8/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-alerts-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json index ae8fa30b3dba..8b49649f5c6a 100644 --- a/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json @@ -1784,6 +1784,28 @@ } ], "test.test[limit-empty_input_after_limit-default.txt-Results]": [], + "test.test[match_recognize-alerts_without_order-default.txt-Analyze]": [ + { + "checksum": "b4dd508a329723c74293d80f0278c705", + "size": 505, + "uri": "https://{canondata_backend}/1917492/ef839f70e5a2f493427f7f92ed00d26a993f6d4a/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Analyze_/plan.txt" + } + ], + "test.test[match_recognize-alerts_without_order-default.txt-Debug]": [ + { + "checksum": "17c5c1f84ac65b6a82234cd0b0a41a68", + "size": 5699, + "uri": "https://{canondata_backend}/1917492/ef839f70e5a2f493427f7f92ed00d26a993f6d4a/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Debug_/opt.yql_patched" + } + ], + "test.test[match_recognize-alerts_without_order-default.txt-Plan]": [ + { + "checksum": "b4dd508a329723c74293d80f0278c705", + "size": 505, + "uri": "https://{canondata_backend}/1917492/ef839f70e5a2f493427f7f92ed00d26a993f6d4a/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Plan_/plan.txt" + } + ], + "test.test[match_recognize-alerts_without_order-default.txt-Results]": [], "test.test[optimizers-unused_columns_group_one_of_multi--Analyze]": [ { "checksum": "ffcfe803a5b4bbfe9af72cc128197217", diff --git a/ydb/library/yql/tests/sql/dq_file/part7/canondata/test.test_match_recognize-without_order_by--Results_/extracted b/ydb/library/yql/tests/sql/dq_file/part7/canondata/test.test_match_recognize-without_order_by--Results_/extracted new file mode 100644 index 000000000000..abd564f4a8c8 --- /dev/null +++ b/ydb/library/yql/tests/sql/dq_file/part7/canondata/test.test_match_recognize-without_order_by--Results_/extracted @@ -0,0 +1,5 @@ +/program.sql:
: Fatal: Optimization + + /program.sql:
:8:1: Fatal: ydb/library/yql/core/yql_opt_match_recognize.cpp:xxx ExpandMatchRecognize(): requirement sortOrder->ChildrenSize() == 1 failed, message: Expect ORDER BY timestamp for MATCH_RECOGNIZE + select * from (select * from AS_TABLE($data) MATCH_RECOGNIZE( + ^ \ No newline at end of file diff --git a/ydb/library/yql/tests/sql/hybrid_file/part0/canondata/result.json b/ydb/library/yql/tests/sql/hybrid_file/part0/canondata/result.json index 623d2b1086f4..92443c8dba6b 100644 --- a/ydb/library/yql/tests/sql/hybrid_file/part0/canondata/result.json +++ b/ydb/library/yql/tests/sql/hybrid_file/part0/canondata/result.json @@ -1653,9 +1653,9 @@ ], "test.test[match_recognize-alerts-default.txt-Debug]": [ { - "checksum": "900161f08e14b0b4c725130ed055ca73", - "size": 5617, - "uri": "https://{canondata_backend}/1880306/5d2fb97b23cd70975bc5d744391981f9d5595c04/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql_patched" + "checksum": "902f8b167c5875200480d237a6493bb7", + "size": 5786, + "uri": "https://{canondata_backend}/1903885/f00a3197fa44aa3d49bf7fe1bbf0fed52ce265b9/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-alerts-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json index 4093b8541624..b37b4782c312 100644 --- a/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json @@ -1737,9 +1737,9 @@ ], "test.test[match_recognize-test_type-default.txt-Debug]": [ { - "checksum": "e0e549a969a71f8fddbd772e08bbeebe", - "size": 3219, - "uri": "https://{canondata_backend}/1937492/7ae37c32b42bb57d4df171a62ced7ab76867a8ea/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql_patched" + "checksum": "367551185530c7b04aa9da2f8afa111f", + "size": 3457, + "uri": "https://{canondata_backend}/1903885/a4d0122d8471ff0ca85352e617bed922d9ad8df1/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql_patched" } ], "test.test[match_recognize-test_type-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/hybrid_file/part9/canondata/result.json b/ydb/library/yql/tests/sql/hybrid_file/part9/canondata/result.json index f8b89e309fd9..3791cb41e2f8 100644 --- a/ydb/library/yql/tests/sql/hybrid_file/part9/canondata/result.json +++ b/ydb/library/yql/tests/sql/hybrid_file/part9/canondata/result.json @@ -1511,6 +1511,20 @@ "uri": "https://{canondata_backend}/1900335/8eba31ae2dcfd9245ad9327a1ac3ca89667336e2/resource.tar.gz#test.test_limit-empty_input_after_limit-default.txt-Plan_/plan.txt" } ], + "test.test[match_recognize-alerts_without_order-default.txt-Debug]": [ + { + "checksum": "acba759d95a9b70640e6418dc1febb2d", + "size": 5698, + "uri": "https://{canondata_backend}/1937424/f54290c1c9e8b8c01bdab19c1d6ef1f76de15d9c/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Debug_/opt.yql_patched" + } + ], + "test.test[match_recognize-alerts_without_order-default.txt-Plan]": [ + { + "checksum": "b4dd508a329723c74293d80f0278c705", + "size": 505, + "uri": "https://{canondata_backend}/1937424/f54290c1c9e8b8c01bdab19c1d6ef1f76de15d9c/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Plan_/plan.txt" + } + ], "test.test[optimizers-direct_row_after_merge--Debug]": [ { "checksum": "6db94e68bc8d6ad4ae649044f1b0c9e9", diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index 97606b21524c..73ec13c715c0 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -10786,6 +10786,13 @@ "uri": "https://{canondata_backend}/1784117/d56ae82ad9d30397a41490647be1bd2124718f98/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_/sql.yql" } ], + "test_sql2yql.test[match_recognize-alerts_without_order]": [ + { + "checksum": "4a7d1c9ca704a076217e529b5489ad87", + "size": 8780, + "uri": "https://{canondata_backend}/1937001/f1ec239726ab3e2cf00695f3d10461ff9ef6c3b0/resource.tar.gz#test_sql2yql.test_match_recognize-alerts_without_order_/sql.yql" + } + ], "test_sql2yql.test[match_recognize-permute]": [ { "checksum": "05c45a70d86bca34be996277afae8bf9", @@ -10816,9 +10823,9 @@ ], "test_sql2yql.test[match_recognize-test_type]": [ { - "checksum": "1b5581aa704781439ce64e9fc4e3c21d", - "size": 9654, - "uri": "https://{canondata_backend}/1784117/d56ae82ad9d30397a41490647be1bd2124718f98/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_/sql.yql" + "checksum": "0a5812e84f194b487eae4084027bd170", + "size": 10249, + "uri": "https://{canondata_backend}/1936842/c0fac16b134e7c8f865a197ac63738ced4fac271/resource.tar.gz#test_sql2yql.test_match_recognize-test_type_/sql.yql" } ], "test_sql2yql.test[match_recognize-test_type_predicate]": [ @@ -30204,6 +30211,13 @@ "uri": "https://{canondata_backend}/1937001/da4215d5087e56eec0224ec5e7754dafd0b2bdcf/resource.tar.gz#test_sql_format.test_match_recognize-alerts_/formatted.sql" } ], + "test_sql_format.test[match_recognize-alerts_without_order]": [ + { + "checksum": "779c2c3a4eab619646509ce5008863e8", + "size": 2906, + "uri": "https://{canondata_backend}/1937001/f1ec239726ab3e2cf00695f3d10461ff9ef6c3b0/resource.tar.gz#test_sql_format.test_match_recognize-alerts_without_order_/formatted.sql" + } + ], "test_sql_format.test[match_recognize-permute]": [ { "checksum": "998e6752ce413cc78e952b9958dfab74", @@ -30234,9 +30248,9 @@ ], "test_sql_format.test[match_recognize-test_type]": [ { - "checksum": "3fcf6d53720604b982ad58beed055a26", - "size": 1127, - "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_match_recognize-test_type_/formatted.sql" + "checksum": "36104b385f3b9986c22f409931b80564", + "size": 1302, + "uri": "https://{canondata_backend}/1936842/c0fac16b134e7c8f865a197ac63738ced4fac271/resource.tar.gz#test_sql_format.test_match_recognize-test_type_/formatted.sql" } ], "test_sql_format.test[match_recognize-test_type_predicate]": [ diff --git a/ydb/library/yql/tests/sql/suites/match_recognize/alerts_without_order.sql b/ydb/library/yql/tests/sql/suites/match_recognize/alerts_without_order.sql new file mode 100644 index 000000000000..7d92f0f18c7b --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/match_recognize/alerts_without_order.sql @@ -0,0 +1,59 @@ +$osquery_data = [ +<|dt:1688910000, host:"fqdn1", ev_type:"someEv", ev_status:"", user:"", vpn:false, |>, +<|dt:1688910050, host:"fqdn2", ev_type:"login", ev_status:"success", user:"", vpn:true, |>, +<|dt:1688910100, host:"fqdn1", ev_type:"login", ev_status:"success", user:"", vpn:true, |>, +<|dt:1688910220, host:"fqdn1", ev_type:"login", ev_status:"success", user:"", vpn:false, |>, +<|dt:1688910300, host:"fqdn1", ev_type:"delete_all", ev_status:"", user:"", vpn:false, |>, +<|dt:1688910400, host:"fqdn2", ev_type:"delete_all", ev_status:"", user:"", vpn:false, |>, +<|dt:1688910500, host:"fqdn1", ev_type:"login", ev_status:"failed", user:"user1", vpn:false, |>, +<|dt:1688910500, host:"fqdn1", ev_type:"login", ev_status:"failed", user:"user2", vpn:false, |>, +<|dt:1688910600, host:"fqdn", ev_type:"someEv", ev_status:"", user:"user1", vpn:false, |>, +<|dt:1688910800, host:"fqdn2", ev_type:"login", ev_status:"failed", user:"user1", vpn:false, |>, +<|dt:1688910900, host:"fqdn2", ev_type:"login", ev_status:"failed", user:"user2", vpn:false, |>, +<|dt:1688911000, host:"fqdn2", ev_type:"login", ev_status:"success", user:"user1", vpn:false, |>, +]; + +pragma FeatureR010="prototype"; +pragma config.flags("MatchRecognizeStream", "disable"); + +SELECT * +FROM AS_TABLE($osquery_data) MATCH_RECOGNIZE( + MEASURES + LAST(SUSPICIOUS_ACTION_SOON.dt) as suspicious_action_dt, + LAST(LOGIN_SUCCESS_REMOTE.host) as remote_login_host, + LAST(LOGIN_SUCCESS_REMOTE.user) as remote_login_user, + LAST(LOGIN_SUCCESS_REMOTE.dt) as t, + FIRST(LOGIN_FAILED_SAME_USER.dt) as brutforce_begin, + FIRST(LOGIN_SUCCESS_SAME_USER.dt) as brutforce_end, + LAST(LOGIN_SUCCESS_SAME_USER.user) as brutforce_login + + ONE ROW PER MATCH + PATTERN ( + LOGIN_SUCCESS_REMOTE ANY_ROW* (SUSPICIOUS_ACTION_SOON | SUSPICIOUS_ACTION_TIMEOUT) | + (LOGIN_FAILED_SAME_USER ANY_ROW*){2,} LOGIN_SUCCESS_SAME_USER + ) + DEFINE + LOGIN_SUCCESS_REMOTE as + LOGIN_SUCCESS_REMOTE.ev_type = "login" and + LOGIN_SUCCESS_REMOTE.ev_status = "success" and + LOGIN_SUCCESS_REMOTE.vpn = true, + SUSPICIOUS_ACTION_SOON as + SUSPICIOUS_ACTION_SOON.host = LAST(LOGIN_SUCCESS_REMOTE.host) and + SUSPICIOUS_ACTION_SOON.ev_type = "delete_all" and + SUSPICIOUS_ACTION_SOON.dt - LAST(LOGIN_SUCCESS_REMOTE.dt) < 1000, + SUSPICIOUS_ACTION_TIMEOUT as + SUSPICIOUS_ACTION_TIMEOUT.dt - LAST(LOGIN_SUCCESS_REMOTE.dt) >= 1000, + + LOGIN_FAILED_SAME_USER as + LOGIN_FAILED_SAME_USER.ev_type = "login" and + LOGIN_FAILED_SAME_USER.ev_status <> "success" and + (LAST(LOGIN_FAILED_SAME_USER.user) IS NULL + or LAST(LOGIN_FAILED_SAME_USER.user) = LOGIN_FAILED_SAME_USER.user + ), + LOGIN_SUCCESS_SAME_USER as + LOGIN_SUCCESS_SAME_USER.ev_type = "login" and + LOGIN_SUCCESS_SAME_USER.ev_status = "success" and + LOGIN_SUCCESS_SAME_USER.user = LAST(LOGIN_FAILED_SAME_USER.user) +) AS MATCHED +; + diff --git a/ydb/library/yql/tests/sql/suites/match_recognize/test_type.sql b/ydb/library/yql/tests/sql/suites/match_recognize/test_type.sql index 1a5cdeaaf6bb..a92b5672e488 100644 --- a/ydb/library/yql/tests/sql/suites/match_recognize/test_type.sql +++ b/ydb/library/yql/tests/sql/suites/match_recognize/test_type.sql @@ -7,6 +7,7 @@ $data = [<|dt:4, host:"fqdn1", key:14|>]; -- NoPartitionNoMeasure select * from AS_TABLE($data) MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) ONE ROW PER MATCH AFTER MATCH SKIP TO NEXT ROW PATTERN ( @@ -18,6 +19,7 @@ select * from AS_TABLE($data) MATCH_RECOGNIZE( --NoPartitionStringMeasure select * from AS_TABLE($data) MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) MEASURES "SomeString" as Measure1 ONE ROW PER MATCH @@ -32,6 +34,7 @@ select * from AS_TABLE($data) MATCH_RECOGNIZE( --IntPartitionColNoMeasure select * from AS_TABLE($data) MATCH_RECOGNIZE( PARTITION BY dt + ORDER BY CAST(dt as Timestamp) ONE ROW PER MATCH AFTER MATCH SKIP TO NEXT ROW PATTERN ( @@ -44,6 +47,7 @@ select * from AS_TABLE($data) MATCH_RECOGNIZE( --StringPartitionColStringMeasure select * from AS_TABLE($data) MATCH_RECOGNIZE( PARTITION BY host + ORDER BY CAST(dt as Timestamp) MEASURES "SomeString" as Measure1 ONE ROW PER MATCH @@ -58,6 +62,7 @@ select * from AS_TABLE($data) MATCH_RECOGNIZE( --TwoPartitionColsTwoMeasures select * from AS_TABLE($data) MATCH_RECOGNIZE( PARTITION BY host, dt + ORDER BY CAST(dt as Timestamp) MEASURES "SomeString" as S, 345 as I diff --git a/ydb/library/yql/tests/sql/yt_native_file/part16/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part16/canondata/result.json index a6868bd2cea1..841a59b86e37 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part16/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part16/canondata/result.json @@ -1721,9 +1721,9 @@ ], "test.test[match_recognize-test_type-default.txt-Debug]": [ { - "checksum": "e7e3da81b0bcd6e16f054a55deeef34e", - "size": 3152, - "uri": "https://{canondata_backend}/1130705/85e00e1809c16d7a062c55ddef958687d825adb0/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql" + "checksum": "f3049326be52b62a5cf57002c3eebcf2", + "size": 3382, + "uri": "https://{canondata_backend}/1599023/f2c034a7162395b18a12a2a3caebebf38f158f60/resource.tar.gz#test.test_match_recognize-test_type-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-test_type-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json index e4fc7ff3cbed..ad4f85d28d6b 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json @@ -1477,9 +1477,9 @@ ], "test.test[match_recognize-alerts-default.txt-Debug]": [ { - "checksum": "e6883aec19c55794eb3df952b116a0f7", - "size": 5540, - "uri": "https://{canondata_backend}/1880306/2a33b9c798cdb676ceda243cdc609d2afef27554/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql" + "checksum": "40334ece1e6991ad870f1d1488b88b0a", + "size": 5709, + "uri": "https://{canondata_backend}/1599023/94042bc6ec9d078689120650754efa466e6c1d00/resource.tar.gz#test.test_match_recognize-alerts-default.txt-Debug_/opt.yql" } ], "test.test[match_recognize-alerts-default.txt-Plan]": [ diff --git a/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json index 1af73fd149d5..9113e2016919 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json @@ -1632,6 +1632,27 @@ "uri": "https://{canondata_backend}/1942100/33d51fa00fd086c78b2c0087e9e9f2249eef2d76/resource.tar.gz#test.test_limit-empty_input_after_limit-default.txt-Results_/results.txt" } ], + "test.test[match_recognize-alerts_without_order-default.txt-Debug]": [ + { + "checksum": "46286b4ea128734b6e3ffae32164bc1e", + "size": 5622, + "uri": "https://{canondata_backend}/1130705/ac239a3807774cda911f256a24ef987fd0afe20f/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Debug_/opt.yql" + } + ], + "test.test[match_recognize-alerts_without_order-default.txt-Plan]": [ + { + "checksum": "b4dd508a329723c74293d80f0278c705", + "size": 505, + "uri": "https://{canondata_backend}/1130705/ac239a3807774cda911f256a24ef987fd0afe20f/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Plan_/plan.txt" + } + ], + "test.test[match_recognize-alerts_without_order-default.txt-Results]": [ + { + "checksum": "6e11c24a571d7b78308343a8fe4d0772", + "size": 4611, + "uri": "https://{canondata_backend}/1130705/ac239a3807774cda911f256a24ef987fd0afe20f/resource.tar.gz#test.test_match_recognize-alerts_without_order-default.txt-Results_/results.txt" + } + ], "test.test[optimizers-unused_columns_group_one_of_multi--Debug]": [ { "checksum": "52de67abfc854bb6cba714600c125055", diff --git a/ydb/library/yql/tests/sql/yt_native_file/part7/canondata/test.test_match_recognize-without_order_by--Results_/extracted b/ydb/library/yql/tests/sql/yt_native_file/part7/canondata/test.test_match_recognize-without_order_by--Results_/extracted new file mode 100644 index 000000000000..abd564f4a8c8 --- /dev/null +++ b/ydb/library/yql/tests/sql/yt_native_file/part7/canondata/test.test_match_recognize-without_order_by--Results_/extracted @@ -0,0 +1,5 @@ +/program.sql:
: Fatal: Optimization + + /program.sql:
:8:1: Fatal: ydb/library/yql/core/yql_opt_match_recognize.cpp:xxx ExpandMatchRecognize(): requirement sortOrder->ChildrenSize() == 1 failed, message: Expect ORDER BY timestamp for MATCH_RECOGNIZE + select * from (select * from AS_TABLE($data) MATCH_RECOGNIZE( + ^ \ No newline at end of file diff --git a/ydb/library/yql/tools/dqrun/examples/gateways.conf b/ydb/library/yql/tools/dqrun/examples/gateways.conf index e2d2217e7a21..e3c699301ed0 100644 --- a/ydb/library/yql/tools/dqrun/examples/gateways.conf +++ b/ydb/library/yql/tools/dqrun/examples/gateways.conf @@ -124,6 +124,9 @@ YqlCore { Flags { Name: "_EnableStreamLookupJoin" } + Flags { + Name: "_EnableMatchRecognize" + } } SqlCore { diff --git a/ydb/tests/fq/yt/kqp_yt_file.py b/ydb/tests/fq/yt/kqp_yt_file.py index 0b7d8865de14..8f867eb3510e 100644 --- a/ydb/tests/fq/yt/kqp_yt_file.py +++ b/ydb/tests/fq/yt/kqp_yt_file.py @@ -8,7 +8,6 @@ from yql_utils import KSV_ATTR, get_files, get_http_files, get_tables, is_xfail, yql_binary_path, yql_source_path EXCLUDED_SUITES = [ - 'match_recognize', # MATCH_RECOGNIZE is disabled in KQP ] EXCLUDED_TESTS = [ diff --git a/ydb/tests/tools/kqprun/configuration/app_config.conf b/ydb/tests/tools/kqprun/configuration/app_config.conf index 6d9a34e34146..85225bbe88f1 100644 --- a/ydb/tests/tools/kqprun/configuration/app_config.conf +++ b/ydb/tests/tools/kqprun/configuration/app_config.conf @@ -29,6 +29,7 @@ QueryServiceConfig { QueryArtifactsCompressionMethod: "zstd_6" ScriptResultRowsLimit: 0 ScriptResultSizeLimit: 10485760 + EnableMatchRecognize: true FileStorage { MaxFiles: 1000 diff --git a/ydb/tools/query_replay/query_compiler.cpp b/ydb/tools/query_replay/query_compiler.cpp index 53d9f816a623..40046ca100ff 100644 --- a/ydb/tools/query_replay/query_compiler.cpp +++ b/ydb/tools/query_replay/query_compiler.cpp @@ -292,7 +292,7 @@ class TReplayCompileActor: public TActorBootstrapped { TlsActivationContext->ExecutorThread.ActorSystem, SelfId().NodeId(), counters); auto federatedQuerySetup = std::make_optional({NYql::IHTTPGateway::Make(), nullptr, nullptr, nullptr, {}, {}, {}, nullptr, nullptr, {}}); KqpHost = CreateKqpHost(Gateway, Query->Cluster, Query->Database, Config, ModuleResolverState->ModuleResolver, - federatedQuerySetup, nullptr, GUCSettings, Nothing(), FunctionRegistry, false); + federatedQuerySetup, nullptr, GUCSettings, NKikimrConfig::TQueryServiceConfig(), Nothing(), FunctionRegistry, false); IKqpHost::TPrepareSettings prepareSettings; prepareSettings.DocumentApiRestricted = false; diff --git a/ydb/tools/query_replay_yt/query_compiler.cpp b/ydb/tools/query_replay_yt/query_compiler.cpp index 9bec8b9523a5..f3dfe05dafa9 100644 --- a/ydb/tools/query_replay_yt/query_compiler.cpp +++ b/ydb/tools/query_replay_yt/query_compiler.cpp @@ -624,7 +624,7 @@ class TReplayCompileActor: public TActorBootstrapped { TlsActivationContext->ExecutorThread.ActorSystem, SelfId().NodeId(), counters); auto federatedQuerySetup = std::make_optional({HttpGateway, nullptr, nullptr, nullptr, {}, {}, {}, nullptr, nullptr, {}}); KqpHost = CreateKqpHost(Gateway, Query->Cluster, Query->Database, Config, ModuleResolverState->ModuleResolver, - federatedQuerySetup, nullptr, GUCSettings, Nothing(), FunctionRegistry, false); + federatedQuerySetup, nullptr, GUCSettings, NKikimrConfig::TQueryServiceConfig(), Nothing(), FunctionRegistry, false); StartCompilation(); Continue();