Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use large threads for processing Boogie programs #710

Merged
merged 10 commits into from
Mar 31, 2023
2 changes: 1 addition & 1 deletion Source/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<!-- Target framework and package configuration -->
<PropertyGroup>
<Version>2.16.3</Version>
<Version>2.16.4</Version>
<TargetFramework>net6.0</TargetFramework>
<GeneratePackageOnBuild>false</GeneratePackageOnBuild>
<Authors>Boogie</Authors>
Expand Down
8 changes: 6 additions & 2 deletions Source/ExecutionEngine/ExecutionEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public ProcessedProgram(Program program) : this(program, (_, _, _) => { }) {

public class ExecutionEngine : IDisposable
{
private static readonly WorkStealingTaskScheduler LargeThreadScheduler = new(16 * 1024 * 1024);
private static readonly TaskFactory LargeThreadTaskFactory = new(
CancellationToken.None, TaskCreationOptions.DenyChildAttach,
TaskContinuationOptions.None, LargeThreadScheduler);

static int autoRequestIdCount;

Expand Down Expand Up @@ -870,7 +874,7 @@ private IObservable<IVerificationStatus> VerifyImplementationWithoutCaching(Proc
var verificationResult = new VerificationResult(impl, programId);

var batchCompleted = new Subject<(Split split, VCResult vcResult)>();
var completeVerification = Task.Run(async () =>
var completeVerification = LargeThreadTaskFactory.StartNew(async () =>
{
var vcgen = new VCGen(processedProgram.Program, checkerPool);
vcgen.CachingActionCounts = stats.CachingActionCounts;
Expand Down Expand Up @@ -931,7 +935,7 @@ private IObservable<IVerificationStatus> VerifyImplementationWithoutCaching(Proc

batchCompleted.OnCompleted();
return new Completed(verificationResult);
}, cancellationToken);
}, cancellationToken).Unwrap();

return batchCompleted.Select(t => new BatchCompleted(t.split, t.vcResult)).Merge<IVerificationStatus>(Observable.FromAsync(() => completeVerification));
}
Expand Down
219 changes: 219 additions & 0 deletions Source/ExecutionEngine/WorkStealingQueue.cs
keyboardDrummer marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
using System.Collections.Generic;
using System.Threading;

namespace Microsoft.Boogie;

/// <summary>A work-stealing queue.</summary>
/// <typeparam name="T">Specifies the type of data stored in the queue.</typeparam>
internal class WorkStealingQueue<T> where T : class
{
private const int INITIAL_SIZE = 32;
private T[] m_array = new T[INITIAL_SIZE];
private int m_mask = INITIAL_SIZE - 1;
private volatile int m_headIndex = 0;
private volatile int m_tailIndex = 0;

private object m_foreignLock = new object();

internal void LocalPush(T obj)
{
int tail = m_tailIndex;

// When there are at least 2 elements' worth of space, we can take the fast path.
if (tail < m_headIndex + m_mask)
{
m_array[tail & m_mask] = obj;
m_tailIndex = tail + 1;
}
else
{
// We need to contend with foreign pops, so we lock.
lock (m_foreignLock)
{
int head = m_headIndex;
int count = m_tailIndex - m_headIndex;

// If there is still space (one left), just add the element.
if (count >= m_mask)
{
// We're full; expand the queue by doubling its size.
T[] newArray = new T[m_array.Length << 1];
for (int i = 0; i < m_array.Length; i++) {
newArray[i] = m_array[(i + head) & m_mask];
}

// Reset the field values, incl. the mask.
m_array = newArray;
m_headIndex = 0;
m_tailIndex = tail = count;
m_mask = (m_mask << 1) | 1;
}

m_array[tail & m_mask] = obj;
m_tailIndex = tail + 1;
}
}
}

internal bool LocalPop(ref T obj)
{
while (true)
{
// Decrement the tail using a fence to ensure subsequent read doesn't come before.
int tail = m_tailIndex;
if (m_headIndex >= tail)
{
obj = null;
return false;
}

tail -= 1;
#pragma warning disable 0420
Interlocked.Exchange(ref m_tailIndex, tail);
#pragma warning restore 0420

// If there is no interaction with a take, we can head down the fast path.
if (m_headIndex <= tail)
{
int idx = tail & m_mask;
obj = m_array[idx];

// Check for nulls in the array.
if (obj == null) {
continue;
}

m_array[idx] = null;
return true;
}
else
{
// Interaction with takes: 0 or 1 elements left.
lock (m_foreignLock)
{
if (m_headIndex <= tail)
{
// Element still available. Take it.
int idx = tail & m_mask;
obj = m_array[idx];

// Check for nulls in the array.
if (obj == null) {
continue;
}

m_array[idx] = null;
return true;
}
else
{
// We lost the race, element was stolen, restore the tail.
m_tailIndex = tail + 1;
obj = null;
return false;
}
}
}
}
}

internal bool TrySteal(ref T obj)
{
obj = null;

while (true)
{
if (m_headIndex >= m_tailIndex) {
return false;
}

lock (m_foreignLock)
{
// Increment head, and ensure read of tail doesn't move before it (fence).
int head = m_headIndex;
#pragma warning disable 0420
Interlocked.Exchange(ref m_headIndex, head + 1);
#pragma warning restore 0420

if (head < m_tailIndex)
{
int idx = head & m_mask;
obj = m_array[idx];

// Check for nulls in the array.
if (obj == null) {
continue;
}

m_array[idx] = null;
return true;
}
else
{
// Failed, restore head.
m_headIndex = head;
obj = null;
}
}

return false;
}
}

internal bool TryFindAndPop(T obj)
{
// We do an O(N) search for the work item. The theory of work stealing and our
// inlining logic is that most waits will happen on recently queued work. And
// since recently queued work will be close to the tail end (which is where we
// begin our search), we will likely find it quickly. In the worst case, we
// will traverse the whole local queue; this is typically not going to be a
// problem (although degenerate cases are clearly an issue) because local work
// queues tend to be somewhat shallow in length, and because if we fail to find
// the work item, we are about to block anyway (which is very expensive).

for (int i = m_tailIndex - 1; i >= m_headIndex; i--)
{
if (m_array[i & m_mask] == obj)
{
// If we found the element, block out steals to avoid interference.
lock (m_foreignLock)
{
// If we lost the race, bail.
if (m_array[i & m_mask] == null)
{
return false;
}

// Otherwise, null out the element.
m_array[i & m_mask] = null;

// And then check to see if we can fix up the indexes (if we're at
// the edge). If we can't, we just leave nulls in the array and they'll
// get filtered out eventually (but may lead to superflous resizing).
if (i == m_tailIndex) {
m_tailIndex -= 1;
} else if (i == m_headIndex) {
m_headIndex += 1;
}

return true;
}
}
}

return false;
}

internal T[] ToArray()
{
List<T> list = new List<T>();
for (int i = m_tailIndex - 1; i >= m_headIndex; i--)
{
T obj = m_array[i & m_mask];
if (obj != null) {
list.Add(obj);
}
}
return list.ToArray();
}
}
Loading