Skip to content

Commit

Permalink
RavenDB-23556: Replace HasCriticalChanges method with granular Compar…
Browse files Browse the repository at this point in the history
…e to provide detailed insights into AI settings changes
  • Loading branch information
ArieSLV committed Jan 30, 2025
1 parent 1974b0c commit 65d128d
Show file tree
Hide file tree
Showing 11 changed files with 173 additions and 56 deletions.
Original file line number Diff line number Diff line change
@@ -1,11 +1,47 @@
using Sparrow.Json;
using System;
using Sparrow.Json;
using Sparrow.Json.Parsing;

namespace Raven.Client.Documents.Operations.ETL.AI;

public abstract class AbstractAiSettings : IDynamicJsonValueConvertible
{
public abstract bool HasSettings();
public abstract bool HasCriticalChanges(AbstractAiSettings other);
public abstract AiSettingsCompareDifferences Compare(AbstractAiSettings other);

public abstract DynamicJsonValue ToJson();
}

[Flags]
public enum AiSettingsCompareDifferences
{
None = 0,

// Changes that affect the mathematical structure of embeddings
EmbeddingDimensions = 1 << 0,
EmbeddingNormalization = 1 << 1,
PoolingStrategy = 1 << 2,
ModelArchitecture = 1 << 3, // Changes in model name/version that affect embedding structure

// Changes in text preprocessing that affect input
TextPreprocessing = 1 << 4, // e.g. case sensitivity, unicode normalization
TokenizationSettings = 1 << 5, // e.g. special tokens (CLS, SEP, PAD, etc.)
SequenceLimits = 1 << 6, // e.g. maximum tokens

// Changes in API configuration
EndpointConfiguration = 1 << 7, // Changes in endpoint URLs
AuthenticationSettings = 1 << 8, // Changes in API keys, org IDs etc

// Changes that could affect embedding generation but cannot be verified by comparing settings
DeploymentConfiguration = 1 << 9,

// Combinations for common scenarios
EmbeddingStructure = EmbeddingDimensions | EmbeddingNormalization | PoolingStrategy | ModelArchitecture,
InputProcessing = TextPreprocessing | TokenizationSettings | SequenceLimits,
ConnectionConfig = EndpointConfiguration | AuthenticationSettings,

RequiresEmbeddingsRegeneration = EmbeddingStructure | InputProcessing | DeploymentConfiguration,

// All changes
All = RequiresEmbeddingsRegeneration | ConnectionConfig
}
28 changes: 16 additions & 12 deletions src/Raven.Client/Documents/Operations/ETL/AI/AiConnectionString.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,30 +106,34 @@ private static string GenerateIdentifier(string input)
return string.IsNullOrEmpty(finalResult) ? $"{nameof(AiConnectionString)}Identifier" : finalResult;
}

public bool HasCriticalChanges(AiConnectionString newConnectionString)
public AiSettingsCompareDifferences Compare(AiConnectionString newConnectionString)
{
if (newConnectionString == null)
return true;
return AiSettingsCompareDifferences.All;

var result = AiSettingsCompareDifferences.None;

if (Identifier != newConnectionString.Identifier)
return true;
result |= AiSettingsCompareDifferences.ConnectionConfig;

var oldProvider = GetActiveProvider();
var newProvider = newConnectionString.GetActiveProvider();

if (oldProvider != newProvider)
return true;
return AiSettingsCompareDifferences.All;

return oldProvider switch
result |= oldProvider switch
{
AiConnectorType.OpenAi => OpenAiSettings.HasCriticalChanges(newConnectionString.OpenAiSettings),
AiConnectorType.AzureOpenAI => AzureOpenAiSettings.HasCriticalChanges(newConnectionString.AzureOpenAiSettings),
AiConnectorType.Ollama => OllamaSettings.HasCriticalChanges(newConnectionString.OllamaSettings),
AiConnectorType.Onnx => OnnxSettings.HasCriticalChanges(newConnectionString.OnnxSettings),
AiConnectorType.Google => GoogleSettings.HasCriticalChanges(newConnectionString.GoogleSettings),
AiConnectorType.HuggingFace => HuggingFaceSettings.HasCriticalChanges(newConnectionString.HuggingFaceSettings),
_ => true
AiConnectorType.OpenAi => OpenAiSettings.Compare(newConnectionString.OpenAiSettings),
AiConnectorType.AzureOpenAI => AzureOpenAiSettings.Compare(newConnectionString.AzureOpenAiSettings),
AiConnectorType.Ollama => OllamaSettings.Compare(newConnectionString.OllamaSettings),
AiConnectorType.Onnx => OnnxSettings.Compare(newConnectionString.OnnxSettings),
AiConnectorType.Google => GoogleSettings.Compare(newConnectionString.GoogleSettings),
AiConnectorType.HuggingFace => HuggingFaceSettings.Compare(newConnectionString.HuggingFaceSettings),
_ => AiSettingsCompareDifferences.All
};

return result;
}

private AiConnectorType GetActiveProvider()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,20 @@ public override bool HasSettings()
string.IsNullOrWhiteSpace(DeploymentName) == false;
}

public override bool HasCriticalChanges(AbstractAiSettings other)
public override AiSettingsCompareDifferences Compare(AbstractAiSettings other)
{
if (other is not AzureOpenAiSettings azureSettings)
return true;
return AiSettingsCompareDifferences.All;

return base.HasCriticalChanges(other) ||
Dimensions != azureSettings.Dimensions;
var differences = base.Compare(other);

if (DeploymentName != azureSettings.DeploymentName)
differences |= AiSettingsCompareDifferences.DeploymentConfiguration;

if (Dimensions != azureSettings.Dimensions)
differences |= AiSettingsCompareDifferences.EmbeddingDimensions;

return differences;
}

public override DynamicJsonValue ToJson()
Expand Down
16 changes: 12 additions & 4 deletions src/Raven.Client/Documents/Operations/ETL/AI/GoogleSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,21 @@ public override bool HasSettings()
string.IsNullOrWhiteSpace(ApiKey) == false;
}

public override bool HasCriticalChanges(AbstractAiSettings other)
public override AiSettingsCompareDifferences Compare(AbstractAiSettings other)
{
if (other is not GoogleSettings googleSettings)
return true;
return AiSettingsCompareDifferences.All;

return Model != googleSettings.Model ||
AiVersion != googleSettings.AiVersion;
var differences = AiSettingsCompareDifferences.None;

if (Model != googleSettings.Model ||
AiVersion != googleSettings.AiVersion)
differences |= AiSettingsCompareDifferences.ModelArchitecture;

if (ApiKey != googleSettings.ApiKey)
differences |= AiSettingsCompareDifferences.AuthenticationSettings;

return differences;
}

public override DynamicJsonValue ToJson()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,23 @@ public override bool HasSettings()
return string.IsNullOrWhiteSpace(Model) == false;
}

public override bool HasCriticalChanges(AbstractAiSettings other)
public override AiSettingsCompareDifferences Compare(AbstractAiSettings other)
{
if (other is not HuggingFaceSettings huggingFaceSettings)
return true;
return AiSettingsCompareDifferences.All;

return Model != huggingFaceSettings.Model;
var differences = AiSettingsCompareDifferences.None;

if (Model != huggingFaceSettings.Model)
differences |= AiSettingsCompareDifferences.ModelArchitecture;

if (Endpoint != huggingFaceSettings.Endpoint)
differences |= AiSettingsCompareDifferences.EndpointConfiguration;

if (ApiKey != huggingFaceSettings.ApiKey)
differences |= AiSettingsCompareDifferences.AuthenticationSettings;

return differences;
}

public override DynamicJsonValue ToJson()
Expand Down
14 changes: 11 additions & 3 deletions src/Raven.Client/Documents/Operations/ETL/AI/OllamaSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,20 @@ public override bool HasSettings()
string.IsNullOrWhiteSpace(Model) == false;
}

public override bool HasCriticalChanges(AbstractAiSettings other)
public override AiSettingsCompareDifferences Compare(AbstractAiSettings other)
{
if (other is not OllamaSettings ollamaSettings)
return true;
return AiSettingsCompareDifferences.All;

return Model != ollamaSettings.Model;
var differences = AiSettingsCompareDifferences.None;

if (Model != ollamaSettings.Model)
differences |= AiSettingsCompareDifferences.ModelArchitecture;

if (Uri != ollamaSettings.Uri)
differences |= AiSettingsCompareDifferences.EndpointConfiguration;

return differences;
}

public override DynamicJsonValue ToJson() =>
Expand Down
31 changes: 22 additions & 9 deletions src/Raven.Client/Documents/Operations/ETL/AI/OnnxSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,33 @@ public sealed class OnnxSettings : AbstractAiSettings

public override bool HasSettings() => true;

public override bool HasCriticalChanges(AbstractAiSettings other)
public override AiSettingsCompareDifferences Compare(AbstractAiSettings other)
{
if (other is not OnnxSettings onnxSettings)
return true;
return AiSettingsCompareDifferences.All;

return CaseSensitive != onnxSettings.CaseSensitive ||
MaximumTokens != onnxSettings.MaximumTokens ||
ClsToken != onnxSettings.ClsToken ||
var differences = AiSettingsCompareDifferences.None;

if (CaseSensitive != onnxSettings.CaseSensitive ||
UnicodeNormalization != onnxSettings.UnicodeNormalization)
differences |= AiSettingsCompareDifferences.TextPreprocessing;

if (ClsToken != onnxSettings.ClsToken ||
UnknownToken != onnxSettings.UnknownToken ||
SepToken != onnxSettings.SepToken ||
PadToken != onnxSettings.PadToken ||
UnicodeNormalization != onnxSettings.UnicodeNormalization ||
PoolingMode != onnxSettings.PoolingMode ||
NormalizeEmbeddings != onnxSettings.NormalizeEmbeddings;
PadToken != onnxSettings.PadToken)
differences |= AiSettingsCompareDifferences.TokenizationSettings;

if (MaximumTokens != onnxSettings.MaximumTokens)
differences |= AiSettingsCompareDifferences.SequenceLimits;

if (PoolingMode != onnxSettings.PoolingMode)
differences |= AiSettingsCompareDifferences.PoolingStrategy;

if (NormalizeEmbeddings != onnxSettings.NormalizeEmbeddings)
differences |= AiSettingsCompareDifferences.EmbeddingNormalization;

return differences;
}

public override DynamicJsonValue ToJson() =>
Expand Down
19 changes: 15 additions & 4 deletions src/Raven.Client/Documents/Operations/ETL/AI/OpenAiBaseSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,23 @@ public override bool HasSettings()
string.IsNullOrWhiteSpace(Model) == false;
}

public override bool HasCriticalChanges(AbstractAiSettings other)
public override AiSettingsCompareDifferences Compare(AbstractAiSettings other)
{
if (other is not OpenAiBaseSettings openAiBaseSettings)
return true;
if (other is not OpenAiBaseSettings openAiSettings)
return AiSettingsCompareDifferences.All;

return Model != openAiBaseSettings.Model;
var differences = AiSettingsCompareDifferences.None;

if (ApiKey != openAiSettings.ApiKey)
differences |= AiSettingsCompareDifferences.AuthenticationSettings;

if (Endpoint != openAiSettings.Endpoint)
differences |= AiSettingsCompareDifferences.EndpointConfiguration;

if (Model != openAiSettings.Model)
differences |= AiSettingsCompareDifferences.ModelArchitecture;

return differences;
}

public override DynamicJsonValue ToJson()
Expand Down
14 changes: 14 additions & 0 deletions src/Raven.Client/Documents/Operations/ETL/AI/OpenAiSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ public OpenAiSettings()
/// </summary>
public string? ProjectId { get; set; }

public override AiSettingsCompareDifferences Compare(AbstractAiSettings other)
{
if (other is not OpenAiSettings openAiSettings)
return AiSettingsCompareDifferences.All;

var differences = base.Compare(other);

if (OrganizationId != openAiSettings.OrganizationId ||
ProjectId != openAiSettings.ProjectId)
differences |= AiSettingsCompareDifferences.AuthenticationSettings;

return differences;
}

public override DynamicJsonValue ToJson()
{
var json = base.ToJson();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,23 +176,27 @@ public override void UpdateDatabaseRecord(DatabaseRecord record, long etag)
$"connection string{(identifierConflicts.Length > 1 ? "s" : "")} " +
$"'{string.Join("', '", identifierConflicts.Select(x => x.Key))}'");

var etlsUsingConnection = record.AiEtls.Where(x => x.ConnectionStringName == ConnectionString.Name).ToArray();
var etlsUsingConnection = record.AiEtls.Where(x => x.ConnectionStringName == ConnectionString.Name).ToArray();
var isConnectionStringInUse = etlsUsingConnection.Length > 0;

if (isUpdate && isConnectionStringInUse && oldAiConnectionString.HasCriticalChanges(ConnectionString))
if (isUpdate && isConnectionStringInUse)
{
var etlNames = string.Join("', '", etlsUsingConnection.Select(x => x.Name));
throw new RachisApplyException(
$"Cannot update connection string '{ConnectionString.Name}' because it contains changes that would affect the structure or creation process of embeddings. " +
$"Changes to parameters like model selection, tokenization settings, embedding dimensions, or normalization options require recreating all embeddings to maintain consistency. " +
$"To proceed with these changes:{Environment.NewLine}" +
$"1. Delete the existing ETL task{(etlsUsingConnection.Length == 1 ? "" : "s")}{Environment.NewLine}" +
$"2. {(etlsUsingConnection.Length == 1 ?
"After deleting the ETL task, you can either update this connection string or create a new one with your desired settings" :
$"Create a new connection string with your desired settings, as this connection string is used by ETL tasks: '{etlNames}'")}{Environment.NewLine}" +
$"3. Create a new ETL task using the {(etlsUsingConnection.Length == 1 ? "updated or new" : "new")} connection string{Environment.NewLine}" +
"This will ensure all documents are processed with consistent settings and maintain data integrity. " +
"Note: While you can update non-critical settings like API keys or endpoints without recreating the task, your current changes include critical modifications that affect the embedding process.");
var differences = oldAiConnectionString.Compare(ConnectionString);
if (differences.HasFlag(AiSettingsCompareDifferences.RequiresEmbeddingsRegeneration))
{
var etlNames = string.Join("', '", etlsUsingConnection.Select(x => x.Name));
throw new RachisApplyException(
$"Cannot update connection string '{ConnectionString.Name}' because it contains changes that would affect the structure or creation process of embeddings. " +
$"Changes to parameters like model selection, tokenization settings, embedding dimensions, or normalization options require recreating all embeddings to maintain consistency. " +
$"To proceed with these changes:{Environment.NewLine}" +
$"1. Delete the existing ETL task{(etlsUsingConnection.Length == 1 ? "" : "s")}{Environment.NewLine}" +
$"2. {(etlsUsingConnection.Length == 1 ?
"After deleting the ETL task, you can either update this connection string or create a new one with your desired settings" :
$"Create a new connection string with your desired settings, as this connection string is used by ETL tasks: '{etlNames}'")}{Environment.NewLine}" +
$"3. Create a new ETL task using the {(etlsUsingConnection.Length == 1 ? "updated or new" : "new")} connection string{Environment.NewLine}" +
"This will ensure all documents are processed with consistent settings and maintain data integrity. " +
"Note: While you can update non-critical settings like API keys or endpoints without recreating the task, your current changes include critical modifications that affect the embedding process.");
}
}

record.AiConnectionStrings[ConnectionString.Name] = ConnectionString;
Expand Down
3 changes: 2 additions & 1 deletion src/Raven.Server/ServerWide/Commands/ETL/UpdateEtlCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ public override void UpdateDatabaseRecord(DatabaseRecord record, long etag)
"This will ensure all documents are processed with consistent settings and maintain data integrity.");
}

if (oldConfig.Connection.HasCriticalChanges(Configuration.Connection))
var differences = oldConfig.Connection.Compare(Configuration.Connection);
if (differences.HasFlag(AiSettingsCompareDifferences.RequiresEmbeddingsRegeneration))
{
throw new RachisApplyException(
$"Cannot update AI ETL task '{Configuration.Name}' because it contains critical changes in the connection settings that would affect the structure or creation process of embeddings. " +
Expand Down

0 comments on commit 65d128d

Please sign in to comment.