Skip to content

Commit

Permalink
RavenDB-23556 Added first tests and small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Lwiel committed Jan 30, 2025
1 parent 4694c38 commit b84995e
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,23 @@ public override void Initialize(bool debugMode)

protected override void AddLoadedAttachment(JsValue reference, string name, Attachment attachment)
{
throw new System.NotImplementedException();
throw new NotImplementedException();
}

protected override void AddLoadedCounter(JsValue reference, string name, long value)
{
throw new System.NotImplementedException();
throw new NotImplementedException();
}

protected override void AddLoadedTimeSeries(JsValue reference, string name, IEnumerable<SingleResult> entries)
{
throw new System.NotImplementedException();
throw new NotImplementedException();
}

protected override string[] LoadToDestinations { get; }
protected override void LoadToFunction(string tableName, ScriptRunnerResult colsAsObject)
{
throw new System.NotImplementedException();
throw new NotImplementedException();
}

/// docId -> <fieldName, <fieldValues>>
Expand All @@ -70,22 +70,26 @@ public override void Transform(AiEtlItem item, EtlStatsScope stats, EtlProcessSt
if (BlittableJsonTraverserHelper.TryRead(BlittableJsonTraverser.Default, item.Document, fieldName, out var fieldValue) == false)
continue;

if (aiEtlEmbeddingItem.Values.ContainsKey(fieldName) == false)
aiEtlEmbeddingItem.Values.Add(fieldName, new List<AiEtlEmbeddingItemValue>());
if (aiEtlEmbeddingItem.Values.TryGetValue(fieldName, out var values) == false)
aiEtlEmbeddingItem.Values[fieldName] = values = new List<AiEtlEmbeddingItemValue>();

if (fieldValue is LazyStringValue lsv)
aiEtlEmbeddingItem.Values[fieldName].Add(new AiEtlEmbeddingItemValue() { TextualValue = lsv });

// todo lazy and dja
else if (fieldValue is BlittableJsonReaderArray bjra)
switch (fieldValue)
{
foreach (var textualValue in bjra)
case LazyStringValue lsv:
values.Add(new AiEtlEmbeddingItemValue() { TextualValue = lsv });
break;
case LazyCompressedStringValue lcsv:
values.Add(new AiEtlEmbeddingItemValue() { TextualValue = lcsv });
break;
case BlittableJsonReaderArray bjra:
{
aiEtlEmbeddingItem.Values[fieldName].Add(new AiEtlEmbeddingItemValue() { TextualValue = (LazyStringValue)textualValue });
foreach (var textualValue in bjra)
values.Add(new AiEtlEmbeddingItemValue() { TextualValue = (LazyStringValue)textualValue });
break;
}
default:
throw new ArgumentException($"Unsupported field value type: {fieldValue.GetType()}");
}
else
throw new Exception();
}

_currentRun.CurrentRun.Add(aiEtlEmbeddingItem);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public sealed class TombstonesToAiEtlItems : IEnumerator<AiEtlItem>

public AiEtlItem Current { get; private set; }

// todo
public TombstonesToAiEtlItems(DocumentsOperationContext context, IEnumerator<Tombstone> tombstones, string collection, bool trackAttachments)
{
_context = context;
Expand Down
236 changes: 195 additions & 41 deletions test/SlowTests/Issues/RavenDB-23556.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
using FastTests;
using Newtonsoft.Json.Linq;
using Raven.Client.Documents.Operations.ETL;
using Raven.Client.Documents.Operations.ETL.AI;
using Raven.Server.Documents.ETL.Providers.AI;
using Tests.Infrastructure;
using Xunit;
using Xunit.Abstractions;

namespace SlowTests.Issues;
Expand All @@ -14,45 +19,20 @@ public RavenDB_23556(ITestOutputHelper output) : base(output)
}

[RavenFact(RavenTestCategory.Etl)]
public void Test()
public void TestDocumentsWithSingleValue()
{
const string connectionStringName = "some Connection стринг StringName";
const string connectionStringName = "connection string name";

using (var store = GetDocumentStore())
{
var dto = new Dto { Name = "Name1" };

using (var session = store.OpenSession())
{
var dto = new Dto { Name = "very cool name" };
session.Store(dto);
session.SaveChanges();
}

/*
var configuration = new AiEtlConfiguration
{
Name = "someETLConfigurationName",
ConnectionStringName = connectionStringName,
Transforms = [new Transformation { Collections = ["Dtos"], Name = "CoolName", Script = "loadToWhatever(){}" }],
FieldsToInclude = ["Name"],
AiConnectorType = AiConnectorType.OpenAi
};
*/

// var connectionString = new AiConnectionString
// {
// Name = connectionStringName,
// OllamaSettings = new OllamaSettings { Model = "mistral-nemo", Uri = "http://127.0.0.1:11434" }
// };

// var connectionString = new AiConnectionString
// {
// Name = connectionStringName,
// OnnxSettings = new OnnxSettings
// {
// CaseSensitive = false
// }
// };


// todo handle lack of transforms
var configuration = new AiEtlConfiguration()
{
Expand All @@ -65,18 +45,41 @@ public void Test()
};

var connectionString = new AiConnectionString() { Name = connectionStringName, OnnxSettings = new OnnxSettings() };

/*
var connectionString = new AiConnectionString
{
Name = connectionStringName,
OpenAiSettings = new OpenAiSettings(apiKey: "someApiKey", endpoint: "someEndpoint", model: "someModel")
};
*/

var etlDone = Etl.WaitForEtlToComplete(store);

Etl.AddEtl(store, configuration, connectionString);

WaitForUserToContinueTheTest(store);
etlDone.Wait(TimeSpan.FromSeconds(10));

using (var session = store.OpenSession())
{
var valueHash = AiHelper.CalculateValueHash(dto.Name);
var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash);
var valueEmbeddingsDocument = session.Load<object>(valueEmbeddingsDocumentId);

var expectedAttachmentName = (string)((dynamic)valueEmbeddingsDocument).Name1;

var attachmentNames = session.Advanced.Attachments.GetNames(valueEmbeddingsDocument);

Assert.Single(attachmentNames);
Assert.Equal(expectedAttachmentName, attachmentNames[0].Name);

var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto.Id);
var embeddingsDocument = session.Load<object>(embeddingsDocumentId);

var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name];
var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Name;
var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject<string[]>();

Assert.Single(attachmentNamesForNameProperty);
Assert.Equal(expectedAttachmentName, attachmentNamesForNameProperty[0]);

attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument);

Assert.Single(attachmentNames);
Assert.Equal(expectedAttachmentName, attachmentNames[0].Name);
}
}
}

Expand All @@ -87,9 +90,10 @@ public void TestDocumentsWithListOfValues()

using (var store = GetDocumentStore())
{
var dto = new Dto { Names = new List<string> { "Name1", "Name2", "Name3" } };

using (var session = store.OpenSession())
{
var dto = new Dto { Names = new List<string> { "Name1", "Name2", "Name3" } };
session.Store(dto);
session.SaveChanges();
}
Expand All @@ -107,14 +111,164 @@ public void TestDocumentsWithListOfValues()

var connectionString = new AiConnectionString() { Name = connectionStringName, OnnxSettings = new OnnxSettings() };

var etlDone = Etl.WaitForEtlToComplete(store);

Etl.AddEtl(store, configuration, connectionString);

WaitForUserToContinueTheTest(store);
etlDone.Wait(TimeSpan.FromSeconds(10));

using (var session = store.OpenSession())
{
var expectedAttachmentNames = new List<string>();

foreach (var name in dto.Names)
{
var valueHash = AiHelper.CalculateValueHash(name);
var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash);
var valueEmbeddingsDocument = session.Load<object>(valueEmbeddingsDocumentId);

var expectedAttachmentName = (string)((dynamic)valueEmbeddingsDocument)[name];
expectedAttachmentNames.Add(expectedAttachmentName);

var cacheAttachmentNames = session.Advanced.Attachments.GetNames(valueEmbeddingsDocument);

Assert.Single(cacheAttachmentNames);
Assert.Equal(expectedAttachmentName, cacheAttachmentNames[0].Name);
}

var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto.Id);
var embeddingsDocument = session.Load<object>(embeddingsDocumentId);

var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name];
var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Names;
var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject<string[]>();

Assert.Equal(3, attachmentNamesForNameProperty.Length);
Assert.Equal(expectedAttachmentNames[0], attachmentNamesForNameProperty[0]);
Assert.Equal(expectedAttachmentNames[1], attachmentNamesForNameProperty[1]);
Assert.Equal(expectedAttachmentNames[2], attachmentNamesForNameProperty[2]);

var attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument).Select(x => x.Name).ToList();

Assert.Equal(3, attachmentNames.Count);
Assert.Contains(expectedAttachmentNames[0], attachmentNames);
Assert.Contains(expectedAttachmentNames[1], attachmentNames);
Assert.Contains(expectedAttachmentNames[2], attachmentNames);
}
}
}

[RavenFact(RavenTestCategory.Etl)]
public void TestIfEmbeddingsAreGeneratedOnlyOnceInDifferentBatches()
{
const string connectionStringName = "AI Connection String Name";

using (var store = GetDocumentStore())
{
var dto1 = new Dto { Name = "Name1" };
var dto2 = new Dto { Name = "Name1" };

using (var session = store.OpenSession())
{
session.Store(dto1);
session.SaveChanges();
}

// todo handle lack of transforms
var configuration = new AiEtlConfiguration()
{
Name = "someETLConfigurationName",
AiConnectorType = AiConnectorType.Onnx,
AllowEtlOnNonEncryptedChannel = true,
ConnectionStringName = connectionStringName,
FieldsToInclude = ["Name"],
Transforms = [new Transformation { Collections = ["Dtos"], Name = "CoolName", Script = "loadToWhatever(){}" }]
};

var connectionString = new AiConnectionString() { Name = connectionStringName, OnnxSettings = new OnnxSettings() };

var etlDone = Etl.WaitForEtlToComplete(store);

Etl.AddEtl(store, configuration, connectionString);

etlDone.Wait(TimeSpan.FromSeconds(10));

string expectedAttachmentName, expectedValueEmbeddingsDocumentId, expectedChangeVector;

using (var session = store.OpenSession())
{
var valueHash = AiHelper.CalculateValueHash(dto1.Name);
var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash);
var valueEmbeddingsDocument = session.Load<object>(valueEmbeddingsDocumentId);

expectedAttachmentName = (string)((dynamic)valueEmbeddingsDocument).Name1;
expectedValueEmbeddingsDocumentId = (string)((dynamic)valueEmbeddingsDocument).Id;
expectedChangeVector = session.Advanced.GetChangeVectorFor(valueEmbeddingsDocument);

var attachmentNames = session.Advanced.Attachments.GetNames(valueEmbeddingsDocument);

Assert.Single(attachmentNames);
Assert.Equal(expectedAttachmentName, attachmentNames[0].Name);

var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto1.Id);
var embeddingsDocument = session.Load<object>(embeddingsDocumentId);

var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name];
var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Name;
var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject<string[]>();

Assert.Single(attachmentNamesForNameProperty);
Assert.Equal(expectedAttachmentName, attachmentNamesForNameProperty[0]);

attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument);

Assert.Single(attachmentNames);
Assert.Equal(expectedAttachmentName, attachmentNames[0].Name);
}

etlDone.Reset();

using (var session = store.OpenSession())
{
session.Store(dto2);
session.SaveChanges();
}

etlDone.Wait(TimeSpan.FromSeconds(10));

using (var session = store.OpenSession())
{
var valueHash = AiHelper.CalculateValueHash(dto2.Name);
var valueEmbeddingsDocumentId = AiHelper.GetValueEmbeddingsDocumentId(configuration.Name, valueHash);
var valueEmbeddingsDocument = session.Load<object>(valueEmbeddingsDocumentId);

var changeVector = session.Advanced.GetChangeVectorFor(valueEmbeddingsDocument);

Assert.Equal(expectedValueEmbeddingsDocumentId, (string)((dynamic)valueEmbeddingsDocument).Id);
Assert.Equal(expectedChangeVector, changeVector);
Assert.Equal(expectedAttachmentName, (string)((dynamic)valueEmbeddingsDocument).Name1);

var embeddingsDocumentId = AiHelper.GetDocumentEmbeddingsId(dto2.Id);
var embeddingsDocument = session.Load<object>(embeddingsDocumentId);

var configurationValues = ((dynamic)embeddingsDocument)[configuration.Name];
var attachmentNamesForNamePropertyJArray = (JArray)configurationValues.Name;
var attachmentNamesForNameProperty = attachmentNamesForNamePropertyJArray.ToObject<string[]>();

Assert.Single(attachmentNamesForNameProperty);
Assert.Equal(expectedAttachmentName, attachmentNamesForNameProperty[0]);

var attachmentNames = session.Advanced.Attachments.GetNames(embeddingsDocument);

Assert.Single(attachmentNames);
Assert.Equal(expectedAttachmentName, attachmentNames[0].Name);
}
}
}

private class Dto
{
public string Id { get; set; }
public string Name { get; set; }
public List<string> Names { get; set; }
}
Expand Down

0 comments on commit b84995e

Please sign in to comment.