Skip to content

Commit

Permalink
RavenDB-22703 Fixed terms deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
Lwiel committed Aug 21, 2024
1 parent 99a2362 commit 2a0cfe8
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 33 deletions.
44 changes: 29 additions & 15 deletions src/Corax/Indexing/IndexWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,12 @@ private void ProcessDeletes()
Page lastVisitedPage = default;

var fieldsByRootPage = GetIndexedFieldByRootPage(_fieldsTree);

var nullTermsMarkers = new HashSet<long>();
Querying.IndexSearcher.LoadNullTermMarkers(_nullEntriesPostingListsTree, nullTermsMarkers);
Querying.IndexSearcher.LoadSpecialTermMarkers(_nullEntriesPostingListsTree, nullTermsMarkers);

var nonExistingTermsMarkers = new HashSet<long>();
Querying.IndexSearcher.LoadSpecialTermMarkers(_nonExistingEntriesPostingListsTree, nonExistingTermsMarkers);

long dicId = CompactTree.GetDictionaryId(llt);

Expand All @@ -447,22 +451,21 @@ private void ProcessDeletes()

var termsPerEntryIndex = InsertTermsPerEntry(entryToDelete);

RecordTermDeletionsForEntry(entryTerms, llt, fieldsByRootPage, nullTermsMarkers, dicId, entryToDelete, termsPerEntryIndex);
RecordTermDeletionsForEntry(entryTerms, llt, fieldsByRootPage, nullTermsMarkers, nonExistingTermsMarkers, dicId, entryToDelete, termsPerEntryIndex);
Container.Delete(llt, _entriesTermsContainerId, entryTermsId);
}
}

private void RecordTermDeletionsForEntry(Container.Item entryTerms, LowLevelTransaction llt, Dictionary<long, IndexedField> fieldsByRootPage, HashSet<long> nullTermMarkers, long dicId, long entryToDelete, int termsPerEntryIndex)
private void RecordTermDeletionsForEntry(Container.Item entryTerms, LowLevelTransaction llt, Dictionary<long, IndexedField> fieldsByRootPage, HashSet<long> nullTermMarkers, HashSet<long> nonExistingTermMarkers, long dicId, long entryToDelete, int termsPerEntryIndex)
{
var reader = new EntryTermsReader(llt, nullTermMarkers, entryTerms.Address, entryTerms.Length, dicId);
var reader = new EntryTermsReader(llt, nullTermMarkers, nonExistingTermMarkers, entryTerms.Address, entryTerms.Length, dicId);
reader.Reset();
while (reader.MoveNextStoredField())
{
//Null/empty is not stored in container, just exists as marker.
// Null/empty is not stored in container, just exists as marker.
if (reader.TermId == -1)
continue;


Container.Delete(llt, _storedFieldsContainerId, reader.TermId);
}
reader.Reset();
Expand All @@ -475,15 +478,13 @@ private void RecordTermDeletionsForEntry(Container.Item entryTerms, LowLevelTran

if (reader.IsNull)
{
ref var nullTermLocation = ref CollectionsMarshal.GetValueRefOrAddDefault(field.Textual, Constants.NullValueSlice, out var nullExists);
if (nullExists == false)
{
nullTermLocation = field.Storage.Count;
field.Storage.AddByRef(new EntriesModifications(1));
// We dont want to reclaim the term name
}
ref var nullTerm = ref field.Storage.GetAsRef(nullTermLocation);
nullTerm.Removal(_entriesAllocator, entryToDelete, termsPerEntryIndex, reader.Frequency);
RemoveSpecialTerm(field, reader, Constants.NullValueSlice, entryToDelete, termsPerEntryIndex);
continue;
}

if (reader.IsNonExisting)
{
RemoveSpecialTerm(field, reader, Constants.NonExistingValueSlice, entryToDelete, termsPerEntryIndex);
continue;
}

Expand Down Expand Up @@ -528,6 +529,19 @@ private void RecordTermDeletionsForEntry(Container.Item entryTerms, LowLevelTran
term.Removal(_entriesAllocator, entryToDelete, termsPerEntryIndex, freq: 1);
}
}

private void RemoveSpecialTerm(IndexedField field, EntryTermsReader reader, Slice termSlice, long entryToDelete, int termsPerEntryIndex)
{
ref var termLocation = ref CollectionsMarshal.GetValueRefOrAddDefault(field.Textual, termSlice, out var exists);
if (exists == false)
{
termLocation = field.Storage.Count;
field.Storage.AddByRef(new EntriesModifications(1));
// We dont want to reclaim the term name
}
ref var term = ref field.Storage.GetAsRef(termLocation);
term.Removal(_entriesAllocator, entryToDelete, termsPerEntryIndex, reader.Frequency);
}

public Dictionary<long, string> GetIndexedFieldNamesByRootPage()
{
Expand Down
45 changes: 29 additions & 16 deletions src/Corax/Querying/IndexSearcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ public sealed unsafe partial class IndexSearcher : IDisposable

private readonly IndexFieldsMapping _fieldMapping;
private HashSet<long> _nullTermsMarkers;
private HashSet<long> _nonExistingTermsMarkers;
private Tree _persistedDynamicTreeAnalyzer;
private long? _numberOfEntries;
public bool _nullTermsMarkersLoaded;
private bool _nonExistingTermsMarkersLoaded;

/// <summary>
/// When true no SIMD instruction will be used. Useful for checking that optimized algorithms behave in the same
Expand Down Expand Up @@ -124,14 +126,10 @@ public EntryTermsReader GetEntryTermsReader(long id, ref Page p)
if (_entryIdToLocation.TryGetValue(id, out var loc) == false)
throw new InvalidOperationException("Unable to find entry id: " + id);

if (_nullTermsMarkersLoaded == false)
{
_nullTermsMarkersLoaded = true;
InitializeNullTermsMarkers();
}
InitializeSpecialTermsMarkers();

var item = Container.MaybeGetFromSamePage(_transaction.LowLevelTransaction, ref p, loc);
return new EntryTermsReader(_transaction.LowLevelTransaction, _nullTermsMarkers, item.Address, item.Length, _dictionaryId);
return new EntryTermsReader(_transaction.LowLevelTransaction, _nullTermsMarkers, _nonExistingTermsMarkers, item.Address, item.Length, _dictionaryId);
}

internal void EncodeAndApplyAnalyzerForMultipleTerms(in FieldMetadata binding, ReadOnlySpan<char> term, ref ContextBoundNativeList<Slice> terms)
Expand Down Expand Up @@ -584,26 +582,41 @@ public IncludeNonExistingMatch<TInner> IncludeNonExistingMatch<TInner>(in FieldM
return new IncludeNonExistingMatch<TInner>(this, inner, field, forward);
}

private void InitializeNullTermsMarkers()
private void InitializeSpecialTermsMarkers()
{
_nullTermsMarkers = new HashSet<long>();
InitNullPostingList();
if (_nullPostingListsTree == null)
return;
if (_nullTermsMarkersLoaded == false)
{
_nullTermsMarkersLoaded = true;
_nullTermsMarkers = new HashSet<long>();

InitNullPostingList();

if (_nullPostingListsTree != null)
LoadSpecialTermMarkers(_nullPostingListsTree, _nullTermsMarkers);
}

LoadNullTermMarkers(_nullPostingListsTree, _nullTermsMarkers);
if (_nonExistingTermsMarkersLoaded == false)
{
_nonExistingTermsMarkersLoaded = true;
_nonExistingTermsMarkers = new HashSet<long>();

InitNonExistingPostingList();

if (_nonExistingPostingListsTree != null)
LoadSpecialTermMarkers(_nonExistingPostingListsTree, _nonExistingTermsMarkers);
}
}

public static void LoadNullTermMarkers(Tree nullPostingList, HashSet<long> nullTermsMarkers)
public static void LoadSpecialTermMarkers(Tree postingList, HashSet<long> termsMarkers)
{
using (var it = nullPostingList.Iterate(prefetch: false))
using (var it = postingList.Iterate(prefetch: false))
{
if (it.Seek(Slices.BeforeAllKeys))
{
do
{
(_, long nullTermId) = it.CreateReaderForCurrent().ReadStructure<(long, long)>();
nullTermsMarkers.Add(nullTermId);
(_, long termId) = it.CreateReaderForCurrent().ReadStructure<(long, long)>();
termsMarkers.Add(termId);
} while (it.MoveNext());
}
}
Expand Down
14 changes: 12 additions & 2 deletions src/Corax/Utils/EntryTermsReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ public unsafe struct EntryTermsReader
{
private readonly LowLevelTransaction _llt;
private readonly HashSet<long> _nullTermsMarkers;
private readonly HashSet<long> _nonExistingTermsMarkers;
private readonly long _dicId;
private byte* _cur;
private readonly byte* _end, _start;
Expand All @@ -96,12 +97,14 @@ public unsafe struct EntryTermsReader
public bool IsRaw;
public bool IsList;
public bool IsNull;
public bool IsNonExisting;

//rootPages has to be sorted.
public EntryTermsReader(LowLevelTransaction llt, HashSet<long> nullTermsMarkers, byte* cur, int size, long dicId)
public EntryTermsReader(LowLevelTransaction llt, HashSet<long> nullTermsMarkers, HashSet<long> nonExistingTermsMarkers, byte* cur, int size, long dicId)
{
_llt = llt;
_nullTermsMarkers = nullTermsMarkers;
_nonExistingTermsMarkers = nonExistingTermsMarkers;
_start = _cur;
_cur = cur;
_start = cur;
Expand All @@ -116,6 +119,7 @@ public EntryTermsReader(LowLevelTransaction llt, HashSet<long> nullTermsMarkers,
public bool FindNextStored(long fieldRootPage)
{
IsNull = false;
IsNonExisting = false;

while (MoveNextStoredField())
{
Expand All @@ -137,6 +141,7 @@ public bool FindNext(long fieldRootPage)
public bool FindNextSpatial(long fieldRootPage)
{
IsNull = false;
IsNonExisting = false;

while (MoveNextSpatial())
{
Expand All @@ -153,6 +158,7 @@ public bool MoveNext()
return false;

IsNull = false;
IsNonExisting = false;
var termContainerId = VariableSizeEncoding.Read<long>(_cur, out var offset) + _prevTerm;
_prevTerm = termContainerId;
_cur += offset;
Expand Down Expand Up @@ -197,6 +203,7 @@ public bool MoveNextStoredField()
return false;

IsNull = false;
IsNonExisting = false;
var termContainerId = VariableSizeEncoding.Read<long>(_cur, out var offset) + _prevTerm;
_prevTerm = termContainerId;
_cur += offset;
Expand Down Expand Up @@ -228,10 +235,12 @@ private void HandleRegularTerm(long termContainerId)
}

IsNull = _nullTermsMarkers.Contains(TermId);
IsNonExisting = _nonExistingTermsMarkers.Contains(TermId);

Container.Get(_llt, TermId, out var termItem);
FieldRootPage = termItem.PageLevelMetadata;
if (IsNull == false)

if (IsNull == false && IsNonExisting == false)
{
TermsReader.Set(Current, termItem, _dicId);
}
Expand Down Expand Up @@ -320,6 +329,7 @@ public void Reset()
_prevLong = 0;
_prevTerm = 0;
IsNull = false;
IsNonExisting = false;
}

public string Debug(Indexing.IndexWriter w)
Expand Down
14 changes: 14 additions & 0 deletions test/SlowTests/Issues/RavenDB-22703.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Linq;
using FastTests;
using Raven.Client.Documents.Commands;
using Raven.Client.Documents.Operations;
using Sparrow.Json.Parsing;
using Tests.Infrastructure;
using Xunit;
Expand Down Expand Up @@ -51,6 +52,19 @@ public void TestQueryWithOrderByClauseAndNoWhereClause(Options options)
.ToList();

Assert.Equal(4, res.Count);

var deleteByQueryOp = new DeleteByQueryOperation("from 'Bars'");

store.Operations.Send(deleteByQueryOp);

Indexes.WaitForIndexing(store);

res = session.Query<Bar>()
.OrderByDescending(b => b.Foo.BarBool)
.ThenByDescending(b => b.Foo.BarShort)
.ToList();

Assert.Equal(0, res.Count);
}
}
}
Expand Down

0 comments on commit 2a0cfe8

Please sign in to comment.