diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/AbstractLlmSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/AbstractLlmSettings.cs index 5c6765442c7..b813f19a0e7 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/AbstractLlmSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/AbstractLlmSettings.cs @@ -1,4 +1,5 @@ -using Sparrow.Json; +using System; +using Sparrow.Json; using Sparrow.Json.Parsing; namespace Raven.Client.Documents.Operations.ETL.AI; @@ -6,6 +7,41 @@ namespace Raven.Client.Documents.Operations.ETL.AI; public abstract class AbstractAiSettings : IDynamicJsonValueConvertible { public abstract bool HasSettings(); - public abstract bool HasCriticalChanges(AbstractAiSettings other); + public abstract AiSettingsCompareDifferences Compare(AbstractAiSettings other); + public abstract DynamicJsonValue ToJson(); } + +[Flags] +public enum AiSettingsCompareDifferences +{ + None = 0, + + // Changes that affect the mathematical structure of embeddings + EmbeddingDimensions = 1 << 0, + EmbeddingNormalization = 1 << 1, + PoolingStrategy = 1 << 2, + ModelArchitecture = 1 << 3, // Changes in model name/version that affect embedding structure + + // Changes in text preprocessing that affect input + TextPreprocessing = 1 << 4, // e.g. case sensitivity, unicode normalization + TokenizationSettings = 1 << 5, // e.g. special tokens (CLS, SEP, PAD, etc.) + SequenceLimits = 1 << 6, // e.g. maximum tokens + + // Changes in API configuration + EndpointConfiguration = 1 << 7, // Changes in endpoint URLs + AuthenticationSettings = 1 << 8, // Changes in API keys, org IDs etc + + // Changes that could affect embedding generation but cannot be verified by comparing settings + DeploymentConfiguration = 1 << 9, + + // Combinations for common scenarios + EmbeddingStructure = EmbeddingDimensions | EmbeddingNormalization | PoolingStrategy | ModelArchitecture, + InputProcessing = TextPreprocessing | TokenizationSettings | SequenceLimits, + ConnectionConfig = EndpointConfiguration | AuthenticationSettings, + + RequiresEmbeddingsRegeneration = EmbeddingStructure | InputProcessing | DeploymentConfiguration, + + // All changes + All = RequiresEmbeddingsRegeneration | ConnectionConfig +} diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/AiConnectionString.cs b/src/Raven.Client/Documents/Operations/ETL/AI/AiConnectionString.cs index fcc3c0649b7..144c67b2b1b 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/AiConnectionString.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/AiConnectionString.cs @@ -106,30 +106,34 @@ private static string GenerateIdentifier(string input) return string.IsNullOrEmpty(finalResult) ? $"{nameof(AiConnectionString)}Identifier" : finalResult; } - public bool HasCriticalChanges(AiConnectionString newConnectionString) + public AiSettingsCompareDifferences Compare(AiConnectionString newConnectionString) { if (newConnectionString == null) - return true; + return AiSettingsCompareDifferences.All; + + var result = AiSettingsCompareDifferences.None; if (Identifier != newConnectionString.Identifier) - return true; + result |= AiSettingsCompareDifferences.ConnectionConfig; var oldProvider = GetActiveProvider(); var newProvider = newConnectionString.GetActiveProvider(); if (oldProvider != newProvider) - return true; + return AiSettingsCompareDifferences.All; - return oldProvider switch + result |= oldProvider switch { - AiConnectorType.OpenAi => OpenAiSettings.HasCriticalChanges(newConnectionString.OpenAiSettings), - AiConnectorType.AzureOpenAI => AzureOpenAiSettings.HasCriticalChanges(newConnectionString.AzureOpenAiSettings), - AiConnectorType.Ollama => OllamaSettings.HasCriticalChanges(newConnectionString.OllamaSettings), - AiConnectorType.Onnx => OnnxSettings.HasCriticalChanges(newConnectionString.OnnxSettings), - AiConnectorType.Google => GoogleSettings.HasCriticalChanges(newConnectionString.GoogleSettings), - AiConnectorType.HuggingFace => HuggingFaceSettings.HasCriticalChanges(newConnectionString.HuggingFaceSettings), - _ => true + AiConnectorType.OpenAi => OpenAiSettings.Compare(newConnectionString.OpenAiSettings), + AiConnectorType.AzureOpenAI => AzureOpenAiSettings.Compare(newConnectionString.AzureOpenAiSettings), + AiConnectorType.Ollama => OllamaSettings.Compare(newConnectionString.OllamaSettings), + AiConnectorType.Onnx => OnnxSettings.Compare(newConnectionString.OnnxSettings), + AiConnectorType.Google => GoogleSettings.Compare(newConnectionString.GoogleSettings), + AiConnectorType.HuggingFace => HuggingFaceSettings.Compare(newConnectionString.HuggingFaceSettings), + _ => AiSettingsCompareDifferences.All }; + + return result; } private AiConnectorType GetActiveProvider() diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/AzureOpenAiSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/AzureOpenAiSettings.cs index 789bad2c605..da8fa505b85 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/AzureOpenAiSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/AzureOpenAiSettings.cs @@ -32,13 +32,20 @@ public override bool HasSettings() string.IsNullOrWhiteSpace(DeploymentName) == false; } - public override bool HasCriticalChanges(AbstractAiSettings other) + public override AiSettingsCompareDifferences Compare(AbstractAiSettings other) { if (other is not AzureOpenAiSettings azureSettings) - return true; + return AiSettingsCompareDifferences.All; - return base.HasCriticalChanges(other) || - Dimensions != azureSettings.Dimensions; + var differences = base.Compare(other); + + if (DeploymentName != azureSettings.DeploymentName) + differences |= AiSettingsCompareDifferences.DeploymentConfiguration; + + if (Dimensions != azureSettings.Dimensions) + differences |= AiSettingsCompareDifferences.EmbeddingDimensions; + + return differences; } public override DynamicJsonValue ToJson() diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/GoogleSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/GoogleSettings.cs index 89462a72de0..86e7de96d02 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/GoogleSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/GoogleSettings.cs @@ -33,13 +33,21 @@ public override bool HasSettings() string.IsNullOrWhiteSpace(ApiKey) == false; } - public override bool HasCriticalChanges(AbstractAiSettings other) + public override AiSettingsCompareDifferences Compare(AbstractAiSettings other) { if (other is not GoogleSettings googleSettings) - return true; + return AiSettingsCompareDifferences.All; - return Model != googleSettings.Model || - AiVersion != googleSettings.AiVersion; + var differences = AiSettingsCompareDifferences.None; + + if (Model != googleSettings.Model || + AiVersion != googleSettings.AiVersion) + differences |= AiSettingsCompareDifferences.ModelArchitecture; + + if (ApiKey != googleSettings.ApiKey) + differences |= AiSettingsCompareDifferences.AuthenticationSettings; + + return differences; } public override DynamicJsonValue ToJson() diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/HuggingFaceSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/HuggingFaceSettings.cs index c5a23591dd5..68730d72f52 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/HuggingFaceSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/HuggingFaceSettings.cs @@ -37,12 +37,23 @@ public override bool HasSettings() return string.IsNullOrWhiteSpace(Model) == false; } - public override bool HasCriticalChanges(AbstractAiSettings other) + public override AiSettingsCompareDifferences Compare(AbstractAiSettings other) { if (other is not HuggingFaceSettings huggingFaceSettings) - return true; + return AiSettingsCompareDifferences.All; - return Model != huggingFaceSettings.Model; + var differences = AiSettingsCompareDifferences.None; + + if (Model != huggingFaceSettings.Model) + differences |= AiSettingsCompareDifferences.ModelArchitecture; + + if (Endpoint != huggingFaceSettings.Endpoint) + differences |= AiSettingsCompareDifferences.EndpointConfiguration; + + if (ApiKey != huggingFaceSettings.ApiKey) + differences |= AiSettingsCompareDifferences.AuthenticationSettings; + + return differences; } public override DynamicJsonValue ToJson() diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/OllamaSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/OllamaSettings.cs index ae15254bb23..46a2c938d56 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/OllamaSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/OllamaSettings.cs @@ -37,12 +37,20 @@ public override bool HasSettings() string.IsNullOrWhiteSpace(Model) == false; } - public override bool HasCriticalChanges(AbstractAiSettings other) + public override AiSettingsCompareDifferences Compare(AbstractAiSettings other) { if (other is not OllamaSettings ollamaSettings) - return true; + return AiSettingsCompareDifferences.All; - return Model != ollamaSettings.Model; + var differences = AiSettingsCompareDifferences.None; + + if (Model != ollamaSettings.Model) + differences |= AiSettingsCompareDifferences.ModelArchitecture; + + if (Uri != ollamaSettings.Uri) + differences |= AiSettingsCompareDifferences.EndpointConfiguration; + + return differences; } public override DynamicJsonValue ToJson() => diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/OnnxSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/OnnxSettings.cs index 9dff01e8983..00fa1ed44e6 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/OnnxSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/OnnxSettings.cs @@ -61,20 +61,33 @@ public sealed class OnnxSettings : AbstractAiSettings public override bool HasSettings() => true; - public override bool HasCriticalChanges(AbstractAiSettings other) + public override AiSettingsCompareDifferences Compare(AbstractAiSettings other) { if (other is not OnnxSettings onnxSettings) - return true; + return AiSettingsCompareDifferences.All; - return CaseSensitive != onnxSettings.CaseSensitive || - MaximumTokens != onnxSettings.MaximumTokens || - ClsToken != onnxSettings.ClsToken || + var differences = AiSettingsCompareDifferences.None; + + if (CaseSensitive != onnxSettings.CaseSensitive || + UnicodeNormalization != onnxSettings.UnicodeNormalization) + differences |= AiSettingsCompareDifferences.TextPreprocessing; + + if (ClsToken != onnxSettings.ClsToken || UnknownToken != onnxSettings.UnknownToken || SepToken != onnxSettings.SepToken || - PadToken != onnxSettings.PadToken || - UnicodeNormalization != onnxSettings.UnicodeNormalization || - PoolingMode != onnxSettings.PoolingMode || - NormalizeEmbeddings != onnxSettings.NormalizeEmbeddings; + PadToken != onnxSettings.PadToken) + differences |= AiSettingsCompareDifferences.TokenizationSettings; + + if (MaximumTokens != onnxSettings.MaximumTokens) + differences |= AiSettingsCompareDifferences.SequenceLimits; + + if (PoolingMode != onnxSettings.PoolingMode) + differences |= AiSettingsCompareDifferences.PoolingStrategy; + + if (NormalizeEmbeddings != onnxSettings.NormalizeEmbeddings) + differences |= AiSettingsCompareDifferences.EmbeddingNormalization; + + return differences; } public override DynamicJsonValue ToJson() => diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiBaseSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiBaseSettings.cs index 0324f705614..677832f3ead 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiBaseSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiBaseSettings.cs @@ -40,12 +40,23 @@ public override bool HasSettings() string.IsNullOrWhiteSpace(Model) == false; } - public override bool HasCriticalChanges(AbstractAiSettings other) + public override AiSettingsCompareDifferences Compare(AbstractAiSettings other) { - if (other is not OpenAiBaseSettings openAiBaseSettings) - return true; + if (other is not OpenAiBaseSettings openAiSettings) + return AiSettingsCompareDifferences.All; - return Model != openAiBaseSettings.Model; + var differences = AiSettingsCompareDifferences.None; + + if (ApiKey != openAiSettings.ApiKey) + differences |= AiSettingsCompareDifferences.AuthenticationSettings; + + if (Endpoint != openAiSettings.Endpoint) + differences |= AiSettingsCompareDifferences.EndpointConfiguration; + + if (Model != openAiSettings.Model) + differences |= AiSettingsCompareDifferences.ModelArchitecture; + + return differences; } public override DynamicJsonValue ToJson() diff --git a/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiSettings.cs b/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiSettings.cs index f9a98299a13..76d7c5101f5 100644 --- a/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiSettings.cs +++ b/src/Raven.Client/Documents/Operations/ETL/AI/OpenAiSettings.cs @@ -36,6 +36,20 @@ public OpenAiSettings() /// public string? ProjectId { get; set; } + public override AiSettingsCompareDifferences Compare(AbstractAiSettings other) + { + if (other is not OpenAiSettings openAiSettings) + return AiSettingsCompareDifferences.All; + + var differences = base.Compare(other); + + if (OrganizationId != openAiSettings.OrganizationId || + ProjectId != openAiSettings.ProjectId) + differences |= AiSettingsCompareDifferences.AuthenticationSettings; + + return differences; + } + public override DynamicJsonValue ToJson() { var json = base.ToJson(); diff --git a/src/Raven.Server/ServerWide/Commands/ConnectionStrings/PutConnectionStringCommand.cs b/src/Raven.Server/ServerWide/Commands/ConnectionStrings/PutConnectionStringCommand.cs index 41b1f21b64a..dac6fb787b8 100644 --- a/src/Raven.Server/ServerWide/Commands/ConnectionStrings/PutConnectionStringCommand.cs +++ b/src/Raven.Server/ServerWide/Commands/ConnectionStrings/PutConnectionStringCommand.cs @@ -176,23 +176,27 @@ public override void UpdateDatabaseRecord(DatabaseRecord record, long etag) $"connection string{(identifierConflicts.Length > 1 ? "s" : "")} " + $"'{string.Join("', '", identifierConflicts.Select(x => x.Key))}'"); - var etlsUsingConnection = record.AiEtls.Where(x => x.ConnectionStringName == ConnectionString.Name).ToArray(); + var etlsUsingConnection = record.AiEtls.Where(x => x.ConnectionStringName == ConnectionString.Name).ToArray(); var isConnectionStringInUse = etlsUsingConnection.Length > 0; - if (isUpdate && isConnectionStringInUse && oldAiConnectionString.HasCriticalChanges(ConnectionString)) + if (isUpdate && isConnectionStringInUse) { - var etlNames = string.Join("', '", etlsUsingConnection.Select(x => x.Name)); - throw new RachisApplyException( - $"Cannot update connection string '{ConnectionString.Name}' because it contains changes that would affect the structure or creation process of embeddings. " + - $"Changes to parameters like model selection, tokenization settings, embedding dimensions, or normalization options require recreating all embeddings to maintain consistency. " + - $"To proceed with these changes:{Environment.NewLine}" + - $"1. Delete the existing ETL task{(etlsUsingConnection.Length == 1 ? "" : "s")}{Environment.NewLine}" + - $"2. {(etlsUsingConnection.Length == 1 ? - "After deleting the ETL task, you can either update this connection string or create a new one with your desired settings" : - $"Create a new connection string with your desired settings, as this connection string is used by ETL tasks: '{etlNames}'")}{Environment.NewLine}" + - $"3. Create a new ETL task using the {(etlsUsingConnection.Length == 1 ? "updated or new" : "new")} connection string{Environment.NewLine}" + - "This will ensure all documents are processed with consistent settings and maintain data integrity. " + - "Note: While you can update non-critical settings like API keys or endpoints without recreating the task, your current changes include critical modifications that affect the embedding process."); + var differences = oldAiConnectionString.Compare(ConnectionString); + if (differences.HasFlag(AiSettingsCompareDifferences.RequiresEmbeddingsRegeneration)) + { + var etlNames = string.Join("', '", etlsUsingConnection.Select(x => x.Name)); + throw new RachisApplyException( + $"Cannot update connection string '{ConnectionString.Name}' because it contains changes that would affect the structure or creation process of embeddings. " + + $"Changes to parameters like model selection, tokenization settings, embedding dimensions, or normalization options require recreating all embeddings to maintain consistency. " + + $"To proceed with these changes:{Environment.NewLine}" + + $"1. Delete the existing ETL task{(etlsUsingConnection.Length == 1 ? "" : "s")}{Environment.NewLine}" + + $"2. {(etlsUsingConnection.Length == 1 ? + "After deleting the ETL task, you can either update this connection string or create a new one with your desired settings" : + $"Create a new connection string with your desired settings, as this connection string is used by ETL tasks: '{etlNames}'")}{Environment.NewLine}" + + $"3. Create a new ETL task using the {(etlsUsingConnection.Length == 1 ? "updated or new" : "new")} connection string{Environment.NewLine}" + + "This will ensure all documents are processed with consistent settings and maintain data integrity. " + + "Note: While you can update non-critical settings like API keys or endpoints without recreating the task, your current changes include critical modifications that affect the embedding process."); + } } record.AiConnectionStrings[ConnectionString.Name] = ConnectionString; diff --git a/src/Raven.Server/ServerWide/Commands/ETL/UpdateEtlCommand.cs b/src/Raven.Server/ServerWide/Commands/ETL/UpdateEtlCommand.cs index 576ed0a7b90..7efd44e40d3 100644 --- a/src/Raven.Server/ServerWide/Commands/ETL/UpdateEtlCommand.cs +++ b/src/Raven.Server/ServerWide/Commands/ETL/UpdateEtlCommand.cs @@ -184,7 +184,8 @@ public override void UpdateDatabaseRecord(DatabaseRecord record, long etag) "This will ensure all documents are processed with consistent settings and maintain data integrity."); } - if (oldConfig.Connection.HasCriticalChanges(Configuration.Connection)) + var differences = oldConfig.Connection.Compare(Configuration.Connection); + if (differences.HasFlag(AiSettingsCompareDifferences.RequiresEmbeddingsRegeneration)) { throw new RachisApplyException( $"Cannot update AI ETL task '{Configuration.Name}' because it contains critical changes in the connection settings that would affect the structure or creation process of embeddings. " +