diff --git a/src/Raven.Server/Documents/ETL/Providers/AI/AiEtlDocumentTransformer.cs b/src/Raven.Server/Documents/ETL/Providers/AI/AiEtlDocumentTransformer.cs index 87e635f45aa..609d74c45f1 100644 --- a/src/Raven.Server/Documents/ETL/Providers/AI/AiEtlDocumentTransformer.cs +++ b/src/Raven.Server/Documents/ETL/Providers/AI/AiEtlDocumentTransformer.cs @@ -33,23 +33,23 @@ public override void Initialize(bool debugMode) protected override void AddLoadedAttachment(JsValue reference, string name, Attachment attachment) { - throw new System.NotImplementedException(); + throw new NotImplementedException(); } protected override void AddLoadedCounter(JsValue reference, string name, long value) { - throw new System.NotImplementedException(); + throw new NotImplementedException(); } protected override void AddLoadedTimeSeries(JsValue reference, string name, IEnumerable entries) { - throw new System.NotImplementedException(); + throw new NotImplementedException(); } protected override string[] LoadToDestinations { get; } protected override void LoadToFunction(string tableName, ScriptRunnerResult colsAsObject) { - throw new System.NotImplementedException(); + throw new NotImplementedException(); } /// docId -> > @@ -70,22 +70,26 @@ public override void Transform(AiEtlItem item, EtlStatsScope stats, EtlProcessSt if (BlittableJsonTraverserHelper.TryRead(BlittableJsonTraverser.Default, item.Document, fieldName, out var fieldValue) == false) continue; - if (aiEtlEmbeddingItem.Values.ContainsKey(fieldName) == false) - aiEtlEmbeddingItem.Values.Add(fieldName, new List()); + if (aiEtlEmbeddingItem.Values.TryGetValue(fieldName, out var values) == false) + aiEtlEmbeddingItem.Values[fieldName] = values = new List(); - if (fieldValue is LazyStringValue lsv) - aiEtlEmbeddingItem.Values[fieldName].Add(new AiEtlEmbeddingItemValue() { TextualValue = lsv }); - - // todo lazy and dja - else if (fieldValue is BlittableJsonReaderArray bjra) + switch (fieldValue) { - foreach (var textualValue in bjra) + case LazyStringValue lsv: + values.Add(new AiEtlEmbeddingItemValue() { TextualValue = lsv }); + break; + case LazyCompressedStringValue lcsv: + values.Add(new AiEtlEmbeddingItemValue() { TextualValue = lcsv }); + break; + case BlittableJsonReaderArray bjra: { - aiEtlEmbeddingItem.Values[fieldName].Add(new AiEtlEmbeddingItemValue() { TextualValue = (LazyStringValue)textualValue }); + foreach (var textualValue in bjra) + values.Add(new AiEtlEmbeddingItemValue() { TextualValue = (LazyStringValue)textualValue }); + break; } + default: + throw new ArgumentException($"Unsupported field value type: {fieldValue.GetType()}"); } - else - throw new Exception(); } _currentRun.CurrentRun.Add(aiEtlEmbeddingItem); diff --git a/src/Raven.Server/Documents/ETL/Providers/AI/Enumerators/TombstonesToAiEtlItems.cs b/src/Raven.Server/Documents/ETL/Providers/AI/Enumerators/TombstonesToAiEtlItems.cs index b21d71162a4..b3d3d8068f0 100644 --- a/src/Raven.Server/Documents/ETL/Providers/AI/Enumerators/TombstonesToAiEtlItems.cs +++ b/src/Raven.Server/Documents/ETL/Providers/AI/Enumerators/TombstonesToAiEtlItems.cs @@ -19,6 +19,7 @@ public sealed class TombstonesToAiEtlItems : IEnumerator public AiEtlItem Current { get; private set; } + // todo public TombstonesToAiEtlItems(DocumentsOperationContext context, IEnumerator tombstones, string collection, bool trackAttachments) { _context = context; diff --git a/test/SlowTests/Issues/RavenDB-23556.cs b/test/SlowTests/Issues/RavenDB-23556.cs index afe37698baf..b8fef19b3b9 100644 --- a/test/SlowTests/Issues/RavenDB-23556.cs +++ b/test/SlowTests/Issues/RavenDB-23556.cs @@ -1,8 +1,13 @@ +using System; using System.Collections.Generic; +using System.Linq; using FastTests; +using Newtonsoft.Json.Linq; using Raven.Client.Documents.Operations.ETL; using Raven.Client.Documents.Operations.ETL.AI; +using Raven.Server.Documents.ETL.Providers.AI; using Tests.Infrastructure; +using Xunit; using Xunit.Abstractions; namespace SlowTests.Issues; @@ -14,45 +19,20 @@ public RavenDB_23556(ITestOutputHelper output) : base(output) } [RavenFact(RavenTestCategory.Etl)] - public void Test() + public void TestDocumentsWithSingleValue() { - const string connectionStringName = "some Connection стринг StringName"; + const string connectionStringName = "connection string name"; using (var store = GetDocumentStore()) { + var dto = new Dto { Name = "Name1" }; + using (var session = store.OpenSession()) { - var dto = new Dto { Name = "very cool name" }; session.Store(dto); session.SaveChanges(); } - - /* - var configuration = new AiEtlConfiguration - { - Name = "someETLConfigurationName", - ConnectionStringName = connectionStringName, - Transforms = [new Transformation { Collections = ["Dtos"], Name = "CoolName", Script = "loadToWhatever(){}" }], - FieldsToInclude = ["Name"], - AiConnectorType = AiConnectorType.OpenAi - }; - */ - - // var connectionString = new AiConnectionString - // { - // Name = connectionStringName, - // OllamaSettings = new OllamaSettings { Model = "mistral-nemo", Uri = "http://127.0.0.1:11434" } - // }; - - // var connectionString = new AiConnectionString - // { - // Name = connectionStringName, - // OnnxSettings = new OnnxSettings - // { - // CaseSensitive = false - // } - // }; - + // todo handle lack of transforms var configuration = new AiEtlConfiguration() { @@ -65,18 +45,41 @@ public void Test() }; var connectionString = new AiConnectionString() { Name = connectionStringName, OnnxSettings = new OnnxSettings() }; - - /* - var connectionString = new AiConnectionString - { - Name = connectionStringName, - OpenAiSettings = new OpenAiSettings(apiKey: "someApiKey", endpoint: "someEndpoint", model: "someModel") - }; - */ + var etlDone = Etl.WaitForEtlToComplete(store); + Etl.AddEtl(store, configuration, connectionString); - WaitForUserToContinueTheTest(store); + etlDone.Wait(TimeSpan.FromSeconds(10)); + + using (var session = store.OpenSession()) + { + var valueHash = AiHelper.CalculateValueHash(dto.Name); + var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash); + var valueEmbeddingsDocument = session.Load(valueEmbeddingsDocumentId); + + var expectedAttachmentName = (string)((dynamic)valueEmbeddingsDocument).Name1; + + var attachmentNames = session.Advanced.Attachments.GetNames(valueEmbeddingsDocument); + + Assert.Single(attachmentNames); + Assert.Equal(expectedAttachmentName, attachmentNames[0].Name); + + var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto.Id); + var embeddingsDocument = session.Load(embeddingsDocumentId); + + var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name]; + var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Name; + var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject(); + + Assert.Single(attachmentNamesForNameProperty); + Assert.Equal(expectedAttachmentName, attachmentNamesForNameProperty[0]); + + attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument); + + Assert.Single(attachmentNames); + Assert.Equal(expectedAttachmentName, attachmentNames[0].Name); + } } } @@ -87,9 +90,10 @@ public void TestDocumentsWithListOfValues() using (var store = GetDocumentStore()) { + var dto = new Dto { Names = new List { "Name1", "Name2", "Name3" } }; + using (var session = store.OpenSession()) { - var dto = new Dto { Names = new List { "Name1", "Name2", "Name3" } }; session.Store(dto); session.SaveChanges(); } @@ -107,14 +111,164 @@ public void TestDocumentsWithListOfValues() var connectionString = new AiConnectionString() { Name = connectionStringName, OnnxSettings = new OnnxSettings() }; + var etlDone = Etl.WaitForEtlToComplete(store); + Etl.AddEtl(store, configuration, connectionString); - WaitForUserToContinueTheTest(store); + etlDone.Wait(TimeSpan.FromSeconds(10)); + + using (var session = store.OpenSession()) + { + var expectedAttachmentNames = new List(); + + foreach (var name in dto.Names) + { + var valueHash = AiHelper.CalculateValueHash(name); + var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash); + var valueEmbeddingsDocument = session.Load(valueEmbeddingsDocumentId); + + var expectedAttachmentName = (string)((dynamic)valueEmbeddingsDocument)[name]; + expectedAttachmentNames.Add(expectedAttachmentName); + + var cacheAttachmentNames = session.Advanced.Attachments.GetNames(valueEmbeddingsDocument); + + Assert.Single(cacheAttachmentNames); + Assert.Equal(expectedAttachmentName, cacheAttachmentNames[0].Name); + } + + var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto.Id); + var embeddingsDocument = session.Load(embeddingsDocumentId); + + var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name]; + var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Names; + var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject(); + + Assert.Equal(3, attachmentNamesForNameProperty.Length); + Assert.Equal(expectedAttachmentNames[0], attachmentNamesForNameProperty[0]); + Assert.Equal(expectedAttachmentNames[1], attachmentNamesForNameProperty[1]); + Assert.Equal(expectedAttachmentNames[2], attachmentNamesForNameProperty[2]); + + var attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument).Select(x => x.Name).ToList(); + + Assert.Equal(3, attachmentNames.Count); + Assert.Contains(expectedAttachmentNames[0], attachmentNames); + Assert.Contains(expectedAttachmentNames[1], attachmentNames); + Assert.Contains(expectedAttachmentNames[2], attachmentNames); + } + } + } + + [RavenFact(RavenTestCategory.Etl)] + public void TestIfEmbeddingsAreGeneratedOnlyOnceInDifferentBatches() + { + const string connectionStringName = "AI Connection String Name"; + + using (var store = GetDocumentStore()) + { + var dto1 = new Dto { Name = "Name1" }; + var dto2 = new Dto { Name = "Name1" }; + + using (var session = store.OpenSession()) + { + session.Store(dto1); + session.SaveChanges(); + } + + // todo handle lack of transforms + var configuration = new AiEtlConfiguration() + { + Name = "someETLConfigurationName", + AiConnectorType = AiConnectorType.Onnx, + AllowEtlOnNonEncryptedChannel = true, + ConnectionStringName = connectionStringName, + FieldsToInclude = ["Name"], + Transforms = [new Transformation { Collections = ["Dtos"], Name = "CoolName", Script = "loadToWhatever(){}" }] + }; + + var connectionString = new AiConnectionString() { Name = connectionStringName, OnnxSettings = new OnnxSettings() }; + + var etlDone = Etl.WaitForEtlToComplete(store); + + Etl.AddEtl(store, configuration, connectionString); + + etlDone.Wait(TimeSpan.FromSeconds(10)); + + string expectedAttachmentName, expectedValueEmbeddingsDocumentId, expectedChangeVector; + + using (var session = store.OpenSession()) + { + var valueHash = AiHelper.CalculateValueHash(dto1.Name); + var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash); + var valueEmbeddingsDocument = session.Load(valueEmbeddingsDocumentId); + + expectedAttachmentName = (string)((dynamic)valueEmbeddingsDocument).Name1; + expectedValueEmbeddingsDocumentId = (string)((dynamic)valueEmbeddingsDocument).Id; + expectedChangeVector = session.Advanced.GetChangeVectorFor(valueEmbeddingsDocument); + + var attachmentNames = session.Advanced.Attachments.GetNames(valueEmbeddingsDocument); + + Assert.Single(attachmentNames); + Assert.Equal(expectedAttachmentName, attachmentNames[0].Name); + + var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto1.Id); + var embeddingsDocument = session.Load(embeddingsDocumentId); + + var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name]; + var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Name; + var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject(); + + Assert.Single(attachmentNamesForNameProperty); + Assert.Equal(expectedAttachmentName, attachmentNamesForNameProperty[0]); + + attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument); + + Assert.Single(attachmentNames); + Assert.Equal(expectedAttachmentName, attachmentNames[0].Name); + } + + etlDone.Reset(); + + using (var session = store.OpenSession()) + { + session.Store(dto2); + session.SaveChanges(); + } + + etlDone.Wait(TimeSpan.FromSeconds(10)); + + using (var session = store.OpenSession()) + { + var valueHash = AiHelper.CalculateValueHash(dto2.Name); + var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash); + var valueEmbeddingsDocument = session.Load(valueEmbeddingsDocumentId); + + var changeVector = session.Advanced.GetChangeVectorFor(valueEmbeddingsDocument); + + Assert.Equal(expectedValueEmbeddingsDocumentId, (string)((dynamic)valueEmbeddingsDocument).Id); + Assert.Equal(expectedChangeVector, changeVector); + Assert.Equal(expectedAttachmentName, (string)((dynamic)valueEmbeddingsDocument).Name1); + + var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto2.Id); + var embeddingsDocument = session.Load(embeddingsDocumentId); + + var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name]; + var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Name; + var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject(); + + Assert.Single(attachmentNamesForNameProperty); + Assert.Equal(expectedAttachmentName, attachmentNamesForNameProperty[0]); + + var attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument); + + Assert.Single(attachmentNames); + Assert.Equal(expectedAttachmentName, attachmentNames[0].Name); + } } } private class Dto { + public string Id { get; set; } public string Name { get; set; } public List Names { get; set; } }