Skip to content

Commit

Permalink
RavenDB-23445 Fixed indexing of nulls in numerical data
Browse files Browse the repository at this point in the history
  • Loading branch information
Lwiel committed Jan 8, 2025
1 parent 2ec52e3 commit 3a5cdb3
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 6 deletions.
32 changes: 27 additions & 5 deletions src/Raven.Server/Documents/Indexes/Static/StaticIndexBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ internal static object CreateVector(IndexField indexField, object value)
private static object VectorFromEmbedding(IndexField currentIndexingField, object value)
{
if (value is null)
return null;
return VectorValue.Null;

var vectorOptions = currentIndexingField.Vector;
var allocator = CurrentIndexingScope.Current.IndexContext.Allocator;
Expand Down Expand Up @@ -507,10 +507,12 @@ object HandleEnumerable(IEnumerable enumerable)
object HandleBlittableJsonReaderArray(BlittableJsonReaderArray data)
{
var dataLength = data.Length;
var first = data[0];

if (TryGetFirstNonNullElement(data, out var firstNonNull) == false)
return VectorValue.Null;

//Array of base64s
if (IsBase64(first))
if (IsBase64(firstNonNull))
{
var values = new object[dataLength];
for (var i = 0; i < dataLength; i++)
Expand All @@ -520,11 +522,16 @@ object HandleBlittableJsonReaderArray(BlittableJsonReaderArray data)
}

//Array of arrays
if (first is BlittableJsonReaderArray)
if (firstNonNull is BlittableJsonReaderArray)
{
var values = new object[dataLength];
for (var i = 0; i < dataLength; i++)
values[i] = HandleBlittableJsonReaderArray((BlittableJsonReaderArray)data[i]);
{
if (data[i] == null)
values[i] = VectorValue.Null;
else
values[i] = HandleBlittableJsonReaderArray((BlittableJsonReaderArray)data[i]);
}

return values;
}
Expand Down Expand Up @@ -644,6 +651,21 @@ object HandleJsArray(JsArray jsArray)
}

bool IsBase64(object val) => val is LazyStringValue or LazyCompressedStringValue or string or DynamicNullObject or JsString;

bool TryGetFirstNonNullElement(BlittableJsonReaderArray data, out object first)
{
first = data[0];

var i = 0;

while (first is null && i < data.Length)
first = data[i++];

if (first == null)
return false;

return true;
}
}

private static object VectorFromText(IndexField indexField, object value)
Expand Down
46 changes: 45 additions & 1 deletion test/SlowTests/Issues/RavenDB-23445.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Raven.Client.Documents;
using Raven.Client.Documents.Indexes;
using Raven.Client.Documents.Indexes.Vector;
using Raven.Server.Config;
using Tests.Infrastructure;
using Xunit;
using Xunit.Abstractions;
Expand All @@ -16,7 +17,7 @@ public RavenDB_23445(ITestOutputHelper output) : base(output)
{
}

[RavenTheory(RavenTestCategory.Indexes)]
[RavenTheory(RavenTestCategory.Indexes | RavenTestCategory.Vector)]
[RavenData(SearchEngineMode = RavenSearchEngineMode.All)]
public void TestIndexingOfNulls(Options options)
{
Expand Down Expand Up @@ -75,6 +76,48 @@ public void CanCreateVectorIndexFromCSharp(VectorEmbeddingType vectorEmbeddingTy
}
}

[RavenTheory(RavenTestCategory.Indexes | RavenTestCategory.Vector)]
[RavenData(SearchEngineMode = RavenSearchEngineMode.All)]
public void TestIndexingOfNullsInNumericalData(Options options)
{
options.ModifyDatabaseRecord += record =>
{
record.Settings[RavenConfiguration.GetKey(x => x.Indexing.CoraxIncludeDocumentScore)] = true.ToString();
};

using (var store = GetDocumentStore(options))
{
using (var session = store.OpenSession())
{
var d1 = new Document() { Id = "docs/1", Vectors = [ null, [0.1f, 0.2f, 0.3f, 0.4f], null ] };
var d2 = new Document() { Id = "docs/2", Vectors = [ null, [1.1f, 1.2f, 1.3f] ] };
var d3 = new Document() { Id = "docs/3", Vectors = [ null, [-0.5f, -0.6f, -0.7f, -0.8f] ] };

session.Store(d1);
session.Store(d2);
session.Store(d3);

session.SaveChanges();

var queriedEmbedding1 = new float[] { 0.1f, 0.2f, 0.3f, 0.4f };

var res = session.Query<Document>().VectorSearch(x => x.WithEmbedding("Vectors"), factory => factory.ByEmbedding(queriedEmbedding1), minimumSimilarity: 0.9f).ToList();

WaitForUserToContinueTheTest(store);

Assert.Single(res);
Assert.Equal("docs/1", res[0].Id);

var queriedEmbedding2 = new float[] { -0.5f, -0.6f, -0.7f, -0.8f };

res = session.Query<Document>().VectorSearch(x => x.WithEmbedding("Vectors"), factory => factory.ByEmbedding(queriedEmbedding2), minimumSimilarity: 0.9f).ToList();

Assert.Single(res);
Assert.Equal("docs/3", res[0].Id);
}
}
}

private class TextVectorIndex : AbstractIndexCreationTask<Document>
{
public TextVectorIndex()
Expand Down Expand Up @@ -103,6 +146,7 @@ private class Document
public string Text2 { get; set; }
public string[] TextArr { get; set; }
public float[] Vector { get; set; }
public float?[][] Vectors { get; set; }
}

private class Dto
Expand Down

0 comments on commit 3a5cdb3

Please sign in to comment.