diff --git a/src/System.Private.CoreLib/src/System/Collections/Concurrent/LowLevelConcurrentQueue.cs b/src/System.Private.CoreLib/src/System/Collections/Concurrent/LowLevelConcurrentQueue.cs index fd51be3e8e4..5e799db68e1 100644 --- a/src/System.Private.CoreLib/src/System/Collections/Concurrent/LowLevelConcurrentQueue.cs +++ b/src/System.Private.CoreLib/src/System/Collections/Concurrent/LowLevelConcurrentQueue.cs @@ -1,24 +1,21 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#pragma warning disable 0420 -// =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +// =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +// +// A lock-free, concurrent queue primitive, and its associated debugger view type. +// +// This is a stripped-down version of ConcurrentQueue, for use from within the System.Threading +// surface to eliminate a dependency on System.Collections.Concurrent. +// Please try to keep this in sync with the public ConcurrentQueue implementation. // -// A lock-free, concurrent queue primitive, and its associated debugger view type. -// -// This is a stripped-down version of ConcurrentQueue, for use from within the System.Threading -// surface to eliminate a dependency on System.Collections.Concurrent. -// Please try to keep this in sync with the public ConcurrentQueue implementation. -// -// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- -using System; -using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Runtime.InteropServices; -using System.Security; +using System.Runtime.Serialization; using System.Threading; namespace System.Collections.Concurrent @@ -28,42 +25,143 @@ namespace System.Collections.Concurrent /// /// Specifies the type of elements in the queue. /// - /// All public and protected members of are thread-safe and may be used + /// All public and protected members of are thread-safe and may be used /// concurrently from multiple threads. /// - internal class LowLevelConcurrentQueue /*: IProducerConsumerCollection*/ : IEnumerable + [DebuggerDisplay("Count = {Count}")] + internal class LowLevelConcurrentQueue : /* IProducerConsumerCollection, */ IReadOnlyCollection { - //fields of ConcurrentQueue - private volatile Segment _head; + // This implementation provides an unbounded, multi-producer multi-consumer queue + // that supports the standard Enqueue/TryDequeue operations, as well as support for + // snapshot enumeration (GetEnumerator, ToArray, CopyTo), peeking, and Count/IsEmpty. + // It is composed of a linked list of bounded ring buffers, each of which has a head + // and a tail index, isolated from each other to minimize false sharing. As long as + // the number of elements in the queue remains less than the size of the current + // buffer (Segment), no additional allocations are required for enqueued items. When + // the number of items exceeds the size of the current segment, the current segment is + // "frozen" to prevent further enqueues, and a new segment is linked from it and set + // as the new tail segment for subsequent enqueues. As old segments are consumed by + // dequeues, the head reference is updated to point to the segment that dequeuers should + // try next. To support snapshot enumeration, segments also support the notion of + // preserving for observation, whereby they avoid overwriting state as part of dequeues. + // Any operation that requires a snapshot results in all current segments being + // both frozen for enqueues and preserved for observation: any new enqueues will go + // to new segments, and dequeuers will consume from the existing segments but without + // overwriting the existing data. + + /// Initial length of the segments used in the queue. + private const int InitialSegmentLength = 32; + /// + /// Maximum length of the segments used in the queue. This is a somewhat arbitrary limit: + /// larger means that as long as we don't exceed the size, we avoid allocating more segments, + /// but if we do exceed it, then the segment becomes garbage. + /// + private const int MaxSegmentLength = 1024 * 1024; + /// + /// Lock used to protect cross-segment operations, including any updates to or + /// and any operations that need to get a consistent view of them. + /// + [NonSerialized] + private Lock _crossSegmentLock; + /// The current tail segment. + [NonSerialized] private volatile Segment _tail; + /// The current head segment. + [NonSerialized] + private volatile Segment _head; + /// Field used to temporarily store the contents of the queue for serialization. + private T[] _serializationArray; + + /// + /// Initializes a new instance of the class. + /// + public LowLevelConcurrentQueue() + { + _crossSegmentLock = new Lock(); + _tail = _head = new Segment(InitialSegmentLength); + } - private const int SEGMENT_SIZE = 32; + /// Set the data array to be serialized. + [OnSerializing] + private void OnSerializing(StreamingContext context) + { + _serializationArray = ToArray(); + } - //number of snapshot takers, GetEnumerator(), ToList() and ToArray() operations take snapshot. - internal volatile int m_numSnapshotTakers = 0; + /// Clear the data array that was serialized. + [OnSerialized] + private void OnSerialized(StreamingContext context) + { + _serializationArray = null; + } + + /// Construct the queue from the deserialized . + [OnDeserialized] + private void OnDeserialized(StreamingContext context) + { + Debug.Assert(_serializationArray != null); + InitializeFromCollection(_serializationArray); + _serializationArray = null; + } /// - /// Initializes a new instance of the class. + /// Initializes the contents of the queue from an existing collection. /// - public LowLevelConcurrentQueue() + /// A collection from which to copy elements. + private void InitializeFromCollection(IEnumerable collection) { - _head = _tail = new Segment(0, this); + _crossSegmentLock = new Lock(); + + // Determine the initial segment size. We'll use the default, + // unless the collection is known to be larger than than, in which + // case we round its length up to a power of 2, as all segments must + // be a power of 2 in length. + int length = InitialSegmentLength; + var c = collection as ICollection; + if (c != null) + { + int count = c.Count; + if (count > length) + { + length = Math.Min(RoundUpToPowerOf2(count), MaxSegmentLength); + } + } + + // Initialize the segment and add all of the data to it. + _tail = _head = new Segment(length); + foreach (T item in collection) + { + Enqueue(item); + } } /// - /// Returns an enumerator that iterates through a collection. + /// Initializes a new instance of the class that contains elements copied + /// from the specified collection. /// - /// An that can be used to iterate through the collection. - IEnumerator IEnumerable.GetEnumerator() + /// + /// The collection whose elements are copied to the new . + /// + /// The argument is null. + public LowLevelConcurrentQueue(IEnumerable collection) { - return ((IEnumerable)this).GetEnumerator(); + if (collection == null) + { + throw new ArgumentNullException(nameof(collection)); + } + + InitializeFromCollection(collection); } + /// Returns an enumerator that iterates through a collection. + /// An that can be used to iterate through the collection. + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + /// - /// Gets a value that indicates whether the is empty. + /// Gets a value that indicates whether the is empty. /// - /// true if the is empty; otherwise, false. + /// true if the is empty; otherwise, false. /// /// For determining whether the collection contains any items, use of this property is recommended /// rather than retrieving the number of items from the property and comparing it @@ -75,74 +173,47 @@ public bool IsEmpty { get { - Segment head = _head; - if (!head.IsEmpty) - //fast route 1: - //if current head is not empty, then queue is not empty - return false; - else if (head.Next == null) - //fast route 2: - //if current head is empty and it's the last segment - //then queue is empty - return true; - else - //slow route: - //current head is empty and it is NOT the last segment, - //it means another thread is growing new segment - { - SpinWait spin = new SpinWait(); - while (head.IsEmpty) - { - if (head.Next == null) - return true; - - spin.SpinOnce(); - head = _head; - } - return false; - } + // IsEmpty == !TryPeek. We use a "resultUsed:false" peek in order to avoid marking + // segments as preserved for observation, making IsEmpty a cheaper way than either + // TryPeek(out T) or Count == 0 to check whether any elements are in the queue. + T ignoredResult; + return !TryPeek(out ignoredResult, resultUsed: false); } } - /// - /// Store the position of the current head and tail positions. - /// - /// return the head segment - /// return the tail segment - /// return the head offset, value range [0, SEGMENT_SIZE] - /// return the tail offset, value range [-1, SEGMENT_SIZE-1] - private void GetHeadTailPositions(out Segment head, out Segment tail, - out int headLow, out int tailHigh) + /// Copies the elements stored in the to a new array. + /// A new array containing a snapshot of elements copied from the . + public T[] ToArray() { - head = _head; - tail = _tail; - headLow = head.Low; - tailHigh = tail.High; - SpinWait spin = new SpinWait(); - - //we loop until the observed values are stable and sensible. - //This ensures that any update order by other methods can be tolerated. - while ( - //if head and tail changed, retry - head != _head || tail != _tail - //if low and high pointers, retry - || headLow != head.Low || tailHigh != tail.High - //if head jumps ahead of tail because of concurrent grow and dequeue, retry - || head.m_index > tail.m_index) + // Snap the current contents for enumeration. + Segment head, tail; + int headHead, tailTail; + SnapForObservation(out head, out headHead, out tail, out tailTail); + + // Count the number of items in that snapped set, and use it to allocate an + // array of the right size. + long count = GetCount(head, headHead, tail, tailTail); + T[] arr = new T[count]; + + // Now enumerate the contents, copying each element into the array. + using (IEnumerator e = Enumerate(head, headHead, tail, tailTail)) { - spin.SpinOnce(); - head = _head; - tail = _tail; - headLow = head.Low; - tailHigh = tail.High; + int i = 0; + while (e.MoveNext()) + { + arr[i++] = e.Current; + } + Debug.Assert(count == i); } - } + // And return it. + return arr; + } /// - /// Gets the number of elements contained in the . + /// Gets the number of elements contained in the . /// - /// The number of elements contained in the . + /// The number of elements contained in the . /// /// For determining whether the collection contains any items, use of the /// property is recommended rather than retrieving the number of items from the @@ -152,36 +223,191 @@ public int Count { get { - //store head and tail positions in buffer, Segment head, tail; - int headLow, tailHigh; - GetHeadTailPositions(out head, out tail, out headLow, out tailHigh); - - if (head == tail) + int headHead, headTail, tailHead, tailTail; + var spinner = new SpinWait(); + while (true) { - return tailHigh - headLow + 1; + // Capture the head and tail, as well as the head's head and tail. + head = _head; + tail = _tail; + headHead = Volatile.Read(ref head._headAndTail.Head); + headTail = Volatile.Read(ref head._headAndTail.Tail); + + if (head == tail) + { + // There was a single segment in the queue. If the captured + // values still (or again) represent reality, return the segment's + // count. A single segment should be the most common case once the + // queue's size has stabilized after segments have grown to + // the point where growing is no longer needed. + if (head == _head && + head == _tail && + headHead == Volatile.Read(ref head._headAndTail.Head) && + headTail == Volatile.Read(ref head._headAndTail.Tail)) + { + return GetCount(head, headHead, headTail); + } + } + else if (head._nextSegment == tail) + { + // There were two segments in the queue. Get the positions + // from the tail, and if the captured values still (or again) match + // reality, return the sum of the counts from both segments. + tailHead = Volatile.Read(ref tail._headAndTail.Head); + tailTail = Volatile.Read(ref tail._headAndTail.Tail); + if (head == _head && + tail == _tail && + headHead == Volatile.Read(ref head._headAndTail.Head) && + headTail == Volatile.Read(ref head._headAndTail.Tail) && + tailHead == Volatile.Read(ref tail._headAndTail.Head) && + tailTail == Volatile.Read(ref tail._headAndTail.Tail)) + { + // We got stable values, so we can just compute the sizes based on those + // values and return the sum of the counts of the segments. + return GetCount(head, headHead, headTail) + GetCount(tail, tailHead, tailTail); + } + } + else + { + // There were more than two segments. Take the slower path, where we freeze the + // queue and then count the now stable segments. + SnapForObservation(out head, out headHead, out tail, out tailTail); + return unchecked((int)GetCount(head, headHead, tail, tailTail)); + } + + // We raced with enqueues/dequeues and captured an inconsistent picture of the queue. + // Spin and try again. + spinner.SpinOnce(); } + } + } + + /// Computes the number of items in a segment based on a fixed head and tail in that segment. + private static int GetCount(Segment s, int head, int tail) + { + if (head != tail && head != tail - s.FreezeOffset) + { + head &= s._slotsMask; + tail &= s._slotsMask; + return head < tail ? tail - head : s._slots.Length - head + tail; + } + return 0; + } + + /// Gets the number of items in snapped region. + private static long GetCount(Segment head, int headHead, Segment tail, int tailTail) + { + // All of the segments should have been both frozen for enqueues and preserved for observation. + // Validate that here for head and tail; we'll validate it for intermediate segments later. + Debug.Assert(head._preservedForObservation); + Debug.Assert(head._frozenForEnqueues); + Debug.Assert(tail._preservedForObservation); + Debug.Assert(tail._frozenForEnqueues); + + long count = 0; - //head segment - int count = SEGMENT_SIZE - headLow; + // Head segment. We've already marked it as frozen for enqueues, so its tail position is fixed, + // and we've already marked it as preserved for observation (before we grabbed the head), so we + // can safely enumerate from its head to its tail and access its elements. + int headTail = (head == tail ? tailTail : Volatile.Read(ref head._headAndTail.Tail)) - head.FreezeOffset; + if (headHead < headTail) + { + // Mask the head and tail for the head segment + headHead &= head._slotsMask; + headTail &= head._slotsMask; - //middle segment(s), if any, are full. - //We don't deal with overflow to be consistent with the behavior of generic types in CLR. - count += SEGMENT_SIZE * ((int)(tail.m_index - head.m_index - 1)); + // Increase the count by either the one or two regions, based on whether tail + // has wrapped to be less than head. + count += headHead < headTail ? + headTail - headHead : + head._slots.Length - headHead + headTail; + } - //tail segment - count += tailHigh + 1; + // We've enumerated the head. If the tail is different from the head, we need to + // enumerate the remaining segments. + if (head != tail) + { + // Count the contents of each segment between head and tail, not including head and tail. + // Since there were segments before these, for our purposes we consider them to start at + // the 0th element, and since there is at least one segment after each, each was frozen + // by the time we snapped it, so we can iterate until each's frozen tail. + for (Segment s = head._nextSegment; s != tail; s = s._nextSegment) + { + Debug.Assert(s._preservedForObservation); + Debug.Assert(s._frozenForEnqueues); + count += s._headAndTail.Tail - s.FreezeOffset; + } - return count; + // Finally, enumerate the tail. As with the intermediate segments, there were segments + // before this in the snapped region, so we can start counting from the beginning. Unlike + // the intermediate segments, we can't just go until the Tail, as that could still be changing; + // instead we need to go until the tail we snapped for observation. + count += tailTail - tail.FreezeOffset; } + + // Return the computed count. + return count; } /// - /// Returns an enumerator that iterates through the . + /// Copies the elements to an existing one-dimensional Array, starting at the specified array index. /// + /// The one-dimensional Array that is the + /// destination of the elements copied from the + /// . The Array must have zero-based + /// indexing. + /// The zero-based index in at which copying + /// begins. + /// is a null reference (Nothing in + /// Visual Basic). + /// is less than + /// zero. + /// is equal to or greater than the + /// length of the + /// -or- The number of elements in the source is greater than the + /// available space from to the end of the destination . + /// + public void CopyTo(T[] array, int index) + { + if (array == null) + { + throw new ArgumentNullException(nameof(array)); + } + if (index < 0) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + // Snap for enumeration + Segment head, tail; + int headHead, tailTail; + SnapForObservation(out head, out headHead, out tail, out tailTail); + + // Get the number of items to be enumerated + long count = GetCount(head, headHead, tail, tailTail); + if (index > array.Length - count) + { + throw new ArgumentException(SR.Arg_ArrayPlusOffTooSmall); + } + + // Copy the items to the target array + int i = index; + using (IEnumerator e = Enumerate(head, headHead, tail, tailTail)) + { + while (e.MoveNext()) + { + array[i++] = e.Current; + } + } + Debug.Assert(count == i - index); + } + + /// Returns an enumerator that iterates through the . /// An enumerator for the contents of the . + /// cref="LowLevelConcurrentQueue{T}"/>. /// /// The enumeration represents a moment-in-time snapshot of the contents /// of the queue. It does not reflect any updates to the collection after @@ -190,426 +416,653 @@ public int Count /// public IEnumerator GetEnumerator() { - // Increments the number of active snapshot takers. This increment must happen before the snapshot is - // taken. At the same time, Decrement must happen after the enumeration is over. Only in this way, can it - // eliminate race condition when Segment.TryRemove() checks whether m_numSnapshotTakers == 0. - Interlocked.Increment(ref m_numSnapshotTakers); - - // Takes a snapshot of the queue. - // A design flaw here: if a Thread.Abort() happens, we cannot decrement m_numSnapshotTakers. But we cannot - // wrap the following with a try/finally block, otherwise the decrement will happen before the yield return - // statements in the GetEnumerator (head, tail, headLow, tailHigh) method. Segment head, tail; - int headLow, tailHigh; - GetHeadTailPositions(out head, out tail, out headLow, out tailHigh); - - //If we put yield-return here, the iterator will be lazily evaluated. As a result a snapshot of - // the queue is not taken when GetEnumerator is initialized but when MoveNext() is first called. - // This is inconsistent with existing generic collections. In order to prevent it, we capture the - // value of m_head in a buffer and call out to a helper method. - //The old way of doing this was to return the ToList().GetEnumerator(), but ToList() was an - // unnecessary perfomance hit. - return GetEnumerator(head, tail, headLow, tailHigh); + int headHead, tailTail; + SnapForObservation(out head, out headHead, out tail, out tailTail); + return Enumerate(head, headHead, tail, tailTail); } /// - /// Helper method of GetEnumerator to seperate out yield return statement, and prevent lazy evaluation. + /// Gets the head and tail information of the current contents of the queue. + /// After this call returns, the specified region can be enumerated any number + /// of times and will not change. /// - private IEnumerator GetEnumerator(Segment head, Segment tail, int headLow, int tailHigh) + private void SnapForObservation(out Segment head, out int headHead, out Segment tail, out int tailTail) { - try + using (LockHolder.Hold(_crossSegmentLock)) // _head and _tail may only change while the lock is held. { - SpinWait spin = new SpinWait(); + // Snap the head and tail + head = _head; + tail = _tail; + Debug.Assert(head != null); + Debug.Assert(tail != null); + Debug.Assert(tail._nextSegment == null); - if (head == tail) + // Mark them and all segments in between as preserving, and ensure no additional items + // can be added to the tail. + for (Segment s = head; ; s = s._nextSegment) { - for (int i = headLow; i <= tailHigh; i++) - { - // If the position is reserved by an Enqueue operation, but the value is not written into, - // spin until the value is available. - spin.Reset(); - while (!head.m_state[i].m_value) - { - spin.SpinOnce(); - } - yield return head.m_array[i]; - } + s._preservedForObservation = true; + if (s == tail) break; + Debug.Assert(s._frozenForEnqueues); // any non-tail should already be marked + } + tail.EnsureFrozenForEnqueues(); // we want to prevent the tailTail from moving + + // At this point, any dequeues from any segment won't overwrite the value, and + // none of the existing segments can have new items enqueued. + + headHead = Volatile.Read(ref head._headAndTail.Head); + tailTail = Volatile.Read(ref tail._headAndTail.Tail); + } + } + + /// Gets the item stored in the th entry in . + private T GetItemWhenAvailable(Segment segment, int i) + { + Debug.Assert(segment._preservedForObservation); + + // Get the expected value for the sequence number + int expectedSequenceNumberAndMask = (i + 1) & segment._slotsMask; + + // If the expected sequence number is not yet written, we're still waiting for + // an enqueuer to finish storing it. Spin until it's there. + if ((segment._slots[i].SequenceNumber & segment._slotsMask) != expectedSequenceNumberAndMask) + { + var spinner = new SpinWait(); + while ((Volatile.Read(ref segment._slots[i].SequenceNumber) & segment._slotsMask) != expectedSequenceNumberAndMask) + { + spinner.SpinOnce(); + } + } + + // Return the value from the slot. + return segment._slots[i].Item; + } + + private IEnumerator Enumerate(Segment head, int headHead, Segment tail, int tailTail) + { + Debug.Assert(head._preservedForObservation); + Debug.Assert(head._frozenForEnqueues); + Debug.Assert(tail._preservedForObservation); + Debug.Assert(tail._frozenForEnqueues); + + // Head segment. We've already marked it as not accepting any more enqueues, + // so its tail position is fixed, and we've already marked it as preserved for + // enumeration (before we grabbed its head), so we can safely enumerate from + // its head to its tail. + int headTail = (head == tail ? tailTail : Volatile.Read(ref head._headAndTail.Tail)) - head.FreezeOffset; + if (headHead < headTail) + { + headHead &= head._slotsMask; + headTail &= head._slotsMask; + + if (headHead < headTail) + { + for (int i = headHead; i < headTail; i++) yield return GetItemWhenAvailable(head, i); } else { - //iterate on head segment - for (int i = headLow; i < SEGMENT_SIZE; i++) - { - // If the position is reserved by an Enqueue operation, but the value is not written into, - // spin until the value is available. - spin.Reset(); - while (!head.m_state[i].m_value) - { - spin.SpinOnce(); - } - yield return head.m_array[i]; - } - //iterate on middle segments - Segment curr = head.Next; - while (curr != tail) - { - for (int i = 0; i < SEGMENT_SIZE; i++) - { - // If the position is reserved by an Enqueue operation, but the value is not written into, - // spin until the value is available. - spin.Reset(); - while (!curr.m_state[i].m_value) - { - spin.SpinOnce(); - } - yield return curr.m_array[i]; - } - curr = curr.Next; - } + for (int i = headHead; i < head._slots.Length; i++) yield return GetItemWhenAvailable(head, i); + for (int i = 0; i < headTail; i++) yield return GetItemWhenAvailable(head, i); + } + } - //iterate on tail segment - for (int i = 0; i <= tailHigh; i++) + // We've enumerated the head. If the tail is the same, we're done. + if (head != tail) + { + // Each segment between head and tail, not including head and tail. Since there were + // segments before these, for our purposes we consider it to start at the 0th element. + for (Segment s = head._nextSegment; s != tail; s = s._nextSegment) + { + Debug.Assert(s._preservedForObservation, "Would have had to been preserved as a segment part of enumeration"); + Debug.Assert(s._frozenForEnqueues, "Would have had to be frozen for enqueues as it's intermediate"); + + int sTail = s._headAndTail.Tail - s.FreezeOffset; + for (int i = 0; i < sTail; i++) { - // If the position is reserved by an Enqueue operation, but the value is not written into, - // spin until the value is available. - spin.Reset(); - while (!tail.m_state[i].m_value) - { - spin.SpinOnce(); - } - yield return tail.m_array[i]; + yield return GetItemWhenAvailable(s, i); } } - } - finally - { - // This Decrement must happen after the enumeration is over. - Interlocked.Decrement(ref m_numSnapshotTakers); + + // Enumerate the tail. Since there were segments before this, we can just start at + // its beginning, and iterate until the tail we already grabbed. + tailTail -= tail.FreezeOffset; + for (int i = 0; i < tailTail; i++) + { + yield return GetItemWhenAvailable(tail, i); + } } } - /// - /// Adds an object to the end of the . - /// - /// The object to add to the end of the . The value can be a null reference - /// (Nothing in Visual Basic) for reference types. + /// Round the specified value up to the next power of 2, if it isn't one already. + private static int RoundUpToPowerOf2(int i) + { + --i; + i |= i >> 1; + i |= i >> 2; + i |= i >> 4; + i |= i >> 8; + i |= i >> 16; + return i + 1; + } + + /// Adds an object to the end of the . + /// + /// The object to add to the end of the . + /// The value can be a null reference (Nothing in Visual Basic) for reference types. /// public void Enqueue(T item) { - SpinWait spin = new SpinWait(); + // Try to enqueue to the current tail. + if (!_tail.TryEnqueue(item)) + { + // If we're unable to, we need to take a slow path that will + // try to add a new tail segment. + EnqueueSlow(item); + } + } + + /// Adds to the end of the queue, adding a new segment if necessary. + private void EnqueueSlow(T item) + { while (true) { Segment tail = _tail; - if (tail.TryAppend(item)) + + // Try to append to the existing tail. + if (tail.TryEnqueue(item)) + { return; - spin.SpinOnce(); + } + + // If we were unsuccessful, take the lock so that we can compare and manipulate + // the tail. Assuming another enqueuer hasn't already added a new segment, + // do so, then loop around to try enqueueing again. + using (LockHolder.Hold(_crossSegmentLock)) // _head and _tail may only change while the lock is held. + { + if (tail == _tail) + { + // Make sure no one else can enqueue to this segment. + tail.EnsureFrozenForEnqueues(); + + // We determine the new segment's length based on the old length. + // In general, we double the size of the segment, to make it less likely + // that we'll need to grow again. However, if the tail segment is marked + // as preserved for observation, something caused us to avoid reusing this + // segment, and if that happens a lot and we grow, we'll end up allocating + // lots of wasted space. As such, in such situations we reset back to the + // initial segment length; if these observations are happening frequently, + // this will help to avoid wasted memory, and if they're not, we'll + // relatively quickly grow again to a larger size. + int nextSize = tail._preservedForObservation ? InitialSegmentLength : Math.Min(tail.Capacity * 2, MaxSegmentLength); + var newTail = new Segment(nextSize); + + // Hook up the new tail. + tail._nextSegment = newTail; + _tail = newTail; + } + } } } - /// /// Attempts to remove and return the object at the beginning of the . + /// cref="LowLevelConcurrentQueue{T}"/>. /// /// /// When this method returns, if the operation was successful, contains the /// object removed. If no object was available to be removed, the value is unspecified. /// - /// true if an element was removed and returned from the beggining of the - /// succesfully; otherwise, false. - public bool TryDequeue(out T result) + /// + /// true if an element was removed and returned from the beginning of the + /// successfully; otherwise, false. + /// + public bool TryDequeue(out T result) => + _head.TryDequeue(out result) || // fast-path that operates just on the head segment + TryDequeueSlow(out result); // slow path that needs to fix up segments + + /// Tries to dequeue an item, removing empty segments as needed. + private bool TryDequeueSlow(out T item) { - while (!IsEmpty) + while (true) { + // Get the current head Segment head = _head; - if (head.TryRemove(out result)) + + // Try to take. If we're successful, we're done. + if (head.TryDequeue(out item)) + { return true; - //since method IsEmpty spins, we don't need to spin in the while loop + } + + // Check to see whether this segment is the last. If it is, we can consider + // this to be a moment-in-time empty condition (even though between the TryDequeue + // check and this check, another item could have arrived). + if (head._nextSegment == null) + { + item = default(T); + return false; + } + + // At this point we know that head.Next != null, which means + // this segment has been frozen for additional enqueues. But between + // the time that we ran TryDequeue and checked for a next segment, + // another item could have been added. Try to dequeue one more time + // to confirm that the segment is indeed empty. + Debug.Assert(head._frozenForEnqueues); + if (head.TryDequeue(out item)) + { + return true; + } + + // This segment is frozen (nothing more can be added) and empty (nothing is in it). + // Update head to point to the next segment in the list, assuming no one's beat us to it. + using (LockHolder.Hold(_crossSegmentLock)) // _head and _tail may only change while the lock is held. + { + if (head == _head) + { + _head = head._nextSegment; + } + } } - result = default(T); - return false; } /// - /// private class for ConcurrentQueue. - /// a queue is a linked list of small arrays, each node is called a segment. - /// A segment contains an array, a pointer to the next segment, and m_low, m_high indices recording - /// the first and last valid elements of the array. + /// Attempts to return an object from the beginning of the + /// without removing it. /// - private class Segment - { - //we define two volatile arrays: m_array and m_state. Note that the accesses to the array items - //do not get volatile treatment. But we don't need to worry about loading adjacent elements or - //store/load on adjacent elements would suffer reordering. - // - Two stores: these are at risk, but CLRv2 memory model guarantees store-release hence we are safe. - // - Two loads: because one item from two volatile arrays are accessed, the loads of the array references - // are sufficient to prevent reordering of the loads of the elements. - internal volatile T[] m_array; - - // For each entry in m_array, the corresponding entry in m_state indicates whether this position contains - // a valid value. m_state is initially all false. - internal volatile VolatileBool[] m_state; - - //pointer to the next segment. null if the current segment is the last segment - private volatile Segment _next; - - //We use this zero based index to track how many segments have been created for the queue, and - //to compute how many active segments are there currently. - // * The number of currently active segments is : m_tail.m_index - m_head.m_index + 1; - // * m_index is incremented with every Segment.Grow operation. We use Int64 type, and we can safely - // assume that it never overflows. To overflow, we need to do 2^63 increments, even at a rate of 4 - // billion (2^32) increments per second, it takes 2^31 seconds, which is about 64 years. - internal readonly long m_index; - - //indices of where the first and last valid values - // - m_low points to the position of the next element to pop from this segment, range [0, infinity) - // m_low >= SEGMENT_SIZE implies the segment is disposable - // - m_high points to the position of the latest pushed element, range [-1, infinity) - // m_high == -1 implies the segment is new and empty - // m_high >= SEGMENT_SIZE-1 means this segment is ready to grow. - // and the thread who sets m_high to SEGMENT_SIZE-1 is responsible to grow the segment - // - Math.Min(m_low, SEGMENT_SIZE) > Math.Min(m_high, SEGMENT_SIZE-1) implies segment is empty - // - initially m_low =0 and m_high=-1; - private volatile int _low; - private volatile int _high; - - private volatile LowLevelConcurrentQueue _source; + /// + /// When this method returns, contains an object from + /// the beginning of the or default(T) + /// if the operation failed. + /// + /// true if and object was returned successfully; otherwise, false. + /// + /// For determining whether the collection contains any items, use of the + /// property is recommended rather than peeking. + /// + public bool TryPeek(out T result) => TryPeek(out result, resultUsed: true); - /// - /// Create and initialize a segment with the specified index. - /// - internal Segment(long index, LowLevelConcurrentQueue source) + /// Attempts to retrieve the value for the first element in the queue. + /// The value of the first element, if found. + /// true if the result is neede; otherwise false if only the true/false outcome is needed. + /// true if an element was found; otherwise, false. + private bool TryPeek(out T result, bool resultUsed) + { + // Starting with the head segment, look through all of the segments + // for the first one we can find that's not empty. + Segment s = _head; + while (true) { - m_array = new T[SEGMENT_SIZE]; - m_state = new VolatileBool[SEGMENT_SIZE]; //all initialized to false - _high = -1; - Debug.Assert(index >= 0); - m_index = index; - _source = source; - } + // Grab the next segment from this one, before we peek. + // This is to be able to see whether the value has changed + // during the peek operation. + Segment next = Volatile.Read(ref s._nextSegment); - /// - /// return the next segment - /// - internal Segment Next - { - get { return _next; } - } + // Peek at the segment. If we find an element, we're done. + if (s.TryPeek(out result, resultUsed)) + { + return true; + } + // The current segment was empty at the moment we checked. - /// - /// return true if the current segment is empty (doesn't have any element available to dequeue, - /// false otherwise - /// - internal bool IsEmpty - { - get { return (Low > High); } - } + if (next != null) + { + // If prior to the peek there was already a next segment, then + // during the peek no additional items could have been enqueued + // to it and we can just move on to check the next segment. + Debug.Assert(next == s._nextSegment); + s = next; + } + else if (Volatile.Read(ref s._nextSegment) == null) + { + // The next segment is null. Nothing more to peek at. + break; + } - /// - /// Add an element to the tail of the current segment - /// exclusively called by ConcurrentQueue.InitializedFromCollection - /// InitializeFromCollection is responsible to guaratee that there is no index overflow, - /// and there is no contention - /// - /// - internal void UnsafeAdd(T value) - { - Debug.Assert(_high < SEGMENT_SIZE - 1); - _high++; - m_array[_high] = value; - m_state[_high].m_value = true; + // The next segment was null before we peeked but non-null after. + // That means either when we peeked the first segment had + // already been frozen but the new segment not yet added, + // or that the first segment was empty and between the time + // that we peeked and then checked _nextSegment, so many items + // were enqueued that we filled the first segment and went + // into the next. Since we need to peek in order, we simply + // loop around again to peek on the same segment. The next + // time around on this segment we'll then either successfully + // peek or we'll find that next was non-null before peeking, + // and we'll traverse to that segment. } - /// - /// Create a new segment and append to the current one - /// Does not update the m_tail pointer - /// exclusively called by ConcurrentQueue.InitializedFromCollection - /// InitializeFromCollection is responsible to guaratee that there is no index overflow, - /// and there is no contention - /// - /// the reference to the new Segment - internal Segment UnsafeGrow() - { - Debug.Assert(_high >= SEGMENT_SIZE - 1); - Segment newSegment = new Segment(m_index + 1, _source); //m_index is Int64, we don't need to worry about overflow - _next = newSegment; - return newSegment; - } + result = default(T); + return false; + } - /// - /// Create a new segment and append to the current one - /// Update the m_tail pointer - /// This method is called when there is no contention - /// - internal void Grow() + /// + /// Removes all objects from the . + /// + public void Clear() + { + using (LockHolder.Hold(_crossSegmentLock)) // _head and _tail may only change while the lock is held. { - //no CAS is needed, since there is no contention (other threads are blocked, busy waiting) - Segment newSegment = new Segment(m_index + 1, _source); //m_index is Int64, we don't need to worry about overflow - _next = newSegment; - Debug.Assert(_source._tail == this); - _source._tail = _next; + // Simply substitute a new segment for the existing head/tail, + // as is done in the constructor. Operations currently in flight + // may still read from or write to an existing segment that's + // getting dropped, meaning that in flight operations may not be + // linear with regards to this clear operation. To help mitigate + // in-flight operations enqueuing onto the tail that's about to + // be dropped, we first freeze it; that'll force enqueuers to take + // this lock to synchronize and see the new tail. + _tail.EnsureFrozenForEnqueues(); + _tail = _head = new Segment(InitialSegmentLength); } + } + /// + /// Provides a multi-producer, multi-consumer thread-safe bounded segment. When the queue is full, + /// enqueues fail and return false. When the queue is empty, dequeues fail and return null. + /// These segments are linked together to form the unbounded . + /// + [DebuggerDisplay("Capacity = {Capacity}")] + private sealed class Segment + { + // Segment design is inspired by the algorithm outlined at: + // http://www.1024cores.net/home/lock-free-algorithms/queues/bounded-mpmc-queue - /// - /// Try to append an element at the end of this segment. - /// - /// the element to append - /// The tail. - /// true if the element is appended, false if the current segment is full - /// if appending the specified element succeeds, and after which the segment is full, - /// then grow the segment - internal bool TryAppend(T value) + /// The array of items in this queue. Each slot contains the item in that slot and its "sequence number". + internal readonly Slot[] _slots; + /// Mask for quickly accessing a position within the queue's array. + internal readonly int _slotsMask; + /// The head and tail positions, with padding to help avoid false sharing contention. + /// Dequeueing happens from the head, enqueueing happens at the tail. + internal PaddedHeadAndTail _headAndTail; // mutable struct: do not make this readonly + + /// Indicates whether the segment has been marked such that dequeues don't overwrite the removed data. + internal bool _preservedForObservation; + /// Indicates whether the segment has been marked such that no additional items may be enqueued. + internal bool _frozenForEnqueues; + /// The segment following this one in the queue, or null if this segment is the last in the queue. + internal Segment _nextSegment; + + /// Creates the segment. + /// + /// The maximum number of elements the segment can contain. Must be a power of 2. + /// + public Segment(int boundedLength) { - //quickly check if m_high is already over the boundary, if so, bail out - if (_high >= SEGMENT_SIZE - 1) - { - return false; - } + // Validate the length + Debug.Assert(boundedLength >= 2, $"Must be >= 2, got {boundedLength}"); + Debug.Assert((boundedLength & (boundedLength - 1)) == 0, $"Must be a power of 2, got {boundedLength}"); - //Now we will use a CAS to increment m_high, and store the result in newhigh. - //Depending on how many free spots left in this segment and how many threads are doing this Increment - //at this time, the returning "newhigh" can be - // 1) < SEGMENT_SIZE - 1 : we took a spot in this segment, and not the last one, just insert the value - // 2) == SEGMENT_SIZE - 1 : we took the last spot, insert the value AND grow the segment - // 3) > SEGMENT_SIZE - 1 : we failed to reserve a spot in this segment, we return false to - // Queue.Enqueue method, telling it to try again in the next segment. - - int newhigh = SEGMENT_SIZE; //initial value set to be over the boundary - - //We need do Interlocked.Increment and value/state update in a finally block to ensure that they run - //without interuption. This is to prevent anything from happening between them, and another dequeue - //thread maybe spinning forever to wait for m_state[] to be true; - try - { } - finally - { - newhigh = Interlocked.Increment(ref _high); - if (newhigh <= SEGMENT_SIZE - 1) - { - m_array[newhigh] = value; - m_state[newhigh].m_value = true; - } + // Initialize the slots and the mask. The mask is used as a way of quickly doing "% _slots.Length", + // instead letting us do "& _slotsMask". + _slots = new Slot[boundedLength]; + _slotsMask = boundedLength - 1; - //if this thread takes up the last slot in the segment, then this thread is responsible - //to grow a new segment. Calling Grow must be in the finally block too for reliability reason: - //if thread abort during Grow, other threads will be left busy spinning forever. - if (newhigh == SEGMENT_SIZE - 1) - { - Grow(); - } + // Initialize the sequence number for each slot. The sequence number provides a ticket that + // allows dequeuers to know whether they can dequeue and enqueuers to know whether they can + // enqueue. An enqueuer at position N can enqueue when the sequence number is N, and a dequeuer + // for position N can dequeue when the sequence number is N + 1. When an enqueuer is done writing + // at position N, it sets the sequence number to N so that a dequeuer will be able to dequeue, + // and when a dequeuer is done dequeueing at position N, it sets the sequence number to N + _slots.Length, + // so that when an enqueuer loops around the slots, it'll find that the sequence number at + // position N is N. This also means that when an enqueuer finds that at position N the sequence + // number is < N, there is still a value in that slot, i.e. the segment is full, and when a + // dequeuer finds that the value in a slot is < N + 1, there is nothing currently available to + // dequeue. (It is possible for multiple enqueuers to enqueue concurrently, writing into + // subsequent slots, and to have the first enqueuer take longer, so that the slots for 1, 2, 3, etc. + // may have values, but the 0th slot may still be being filled... in that case, TryDequeue will + // return false.) + for (int i = 0; i < _slots.Length; i++) + { + _slots[i].SequenceNumber = i; } - - //if newhigh <= SEGMENT_SIZE-1, it means the current thread successfully takes up a spot - return newhigh <= SEGMENT_SIZE - 1; } + /// Gets the number of elements this segment can store. + internal int Capacity => _slots.Length; + + /// Gets the "freeze offset" for this segment. + internal int FreezeOffset => _slots.Length * 2; /// - /// try to remove an element from the head of current segment + /// Ensures that the segment will not accept any subsequent enqueues that aren't already underway. /// - /// The result. - /// The head. - /// return false only if the current segment is empty - internal bool TryRemove(out T result) + /// + /// When we mark a segment as being frozen for additional enqueues, + /// we set the bool, but that's mostly + /// as a small helper to avoid marking it twice. The real marking comes + /// by modifying the Tail for the segment, increasing it by this + /// . This effectively knocks it off the + /// sequence expected by future enqueuers, such that any additional enqueuer + /// will be unable to enqueue due to it not lining up with the expected + /// sequence numbers. This value is chosen specially so that Tail will grow + /// to a value that maps to the same slot but that won't be confused with + /// any other enqueue/dequeue sequence number. + /// + internal void EnsureFrozenForEnqueues() // must only be called while queue's segment lock is held { - SpinWait spin = new SpinWait(); - int lowLocal = Low, highLocal = High; - while (lowLocal <= highLocal) + if (!_frozenForEnqueues) // flag used to ensure we don't increase the Tail more than once if frozen more than once { - //try to update m_low - if (Interlocked.CompareExchange(ref _low, lowLocal + 1, lowLocal) == lowLocal) + _frozenForEnqueues = true; + + // Increase the tail by FreezeOffset, spinning until we're successful in doing so. + var spinner = new SpinWait(); + while (true) { - //if the specified value is not available (this spot is taken by a push operation, - // but the value is not written into yet), then spin - SpinWait spinLocal = new SpinWait(); - while (!m_state[lowLocal].m_value) + int tail = Volatile.Read(ref _headAndTail.Tail); + if (Interlocked.CompareExchange(ref _headAndTail.Tail, tail + FreezeOffset, tail) == tail) { - spinLocal.SpinOnce(); + break; } - result = m_array[lowLocal]; + spinner.SpinOnce(); + } + } + } - // If there is no other thread taking snapshot (GetEnumerator(), ToList(), etc), reset the deleted entry to null. - // It is ok if after this conditional check m_numSnapshotTakers becomes > 0, because new snapshots won't include - // the deleted entry at m_array[lowLocal]. - if (_source.m_numSnapshotTakers <= 0) - { - m_array[lowLocal] = default(T); //release the reference to the object. - } + /// Tries to dequeue an element from the queue. + public bool TryDequeue(out T item) + { + // Loop in case of contention... + var spinner = new SpinWait(); + while (true) + { + // Get the head at which to try to dequeue. + int currentHead = Volatile.Read(ref _headAndTail.Head); + int slotsIndex = currentHead & _slotsMask; - //if the current thread sets m_low to SEGMENT_SIZE, which means the current segment becomes - //disposable, then this thread is responsible to dispose this segment, and reset m_head - if (lowLocal + 1 >= SEGMENT_SIZE) + // Read the sequence number for the head position. + int sequenceNumber = Volatile.Read(ref _slots[slotsIndex].SequenceNumber); + + // We can dequeue from this slot if it's been filled by an enqueuer, which + // would have left the sequence number at pos+1. + int diff = sequenceNumber - (currentHead + 1); + if (diff == 0) + { + // We may be racing with other dequeuers. Try to reserve the slot by incrementing + // the head. Once we've done that, no one else will be able to read from this slot, + // and no enqueuer will be able to read from this slot until we've written the new + // sequence number. WARNING: The next few lines are not reliable on a runtime that + // supports thread aborts. If a thread abort were to sneak in after the CompareExchange + // but before the Volatile.Write, enqueuers trying to enqueue into this slot would + // spin indefinitely. If this implementation is ever used on such a platform, this + // if block should be wrapped in a finally / prepared region. + if (Interlocked.CompareExchange(ref _headAndTail.Head, currentHead + 1, currentHead) == currentHead) { - // Invariant: we only dispose the current m_head, not any other segment - // In usual situation, disposing a segment is simply seting m_head to m_head.m_next - // But there is one special case, where m_head and m_tail points to the same and ONLY - //segment of the queue: Another thread A is doing Enqueue and finds that it needs to grow, - //while the *current* thread is doing *this* Dequeue operation, and finds that it needs to - //dispose the current (and ONLY) segment. Then we need to wait till thread A finishes its - //Grow operation, this is the reason of having the following while loop - spinLocal = new SpinWait(); - while (_next == null) + // Successfully reserved the slot. Note that after the above CompareExchange, other threads + // trying to dequeue from this slot will end up spinning until we do the subsequent Write. + item = _slots[slotsIndex].Item; + if (!Volatile.Read(ref _preservedForObservation)) { - spinLocal.SpinOnce(); + // If we're preserving, though, we don't zero out the slot, as we need it for + // enumerations, peeking, ToArray, etc. And we don't update the sequence number, + // so that an enqueuer will see it as full and be forced to move to a new segment. + _slots[slotsIndex].Item = default(T); + Volatile.Write(ref _slots[slotsIndex].SequenceNumber, currentHead + _slots.Length); } - Debug.Assert(_source._head == this); - _source._head = _next; + return true; } - return true; } - else + else if (diff < 0) { - //CAS failed due to contention: spin briefly and retry - spin.SpinOnce(); - lowLocal = Low; highLocal = High; + // The sequence number was less than what we needed, which means this slot doesn't + // yet contain a value we can dequeue, i.e. the segment is empty. Technically it's + // possible that multiple enqueuers could have written concurrently, with those + // getting later slots actually finishing first, so there could be elements after + // this one that are available, but we need to dequeue in order. So before declaring + // failure and that the segment is empty, we check the tail to see if we're actually + // empty or if we're just waiting for items in flight or after this one to become available. + bool frozen = _frozenForEnqueues; + int currentTail = Volatile.Read(ref _headAndTail.Tail); + if (currentTail - currentHead <= 0 || (frozen && (currentTail - FreezeOffset - currentHead <= 0))) + { + item = default(T); + return false; + } + + // It's possible it could have become frozen after we checked _frozenForEnqueues + // and before reading the tail. That's ok: in that rare race condition, we just + // loop around again. } - }//end of while - result = default(T); - return false; + + // Lost a race. Spin a bit, then try again. + spinner.SpinOnce(); + } } - /// - /// return the position of the head of the current segment - /// Value range [0, SEGMENT_SIZE], if it's SEGMENT_SIZE, it means this segment is exhausted and thus empty - /// - internal int Low + /// Tries to peek at an element from the queue, without removing it. + public bool TryPeek(out T result, bool resultUsed) { - get + if (resultUsed) { - return Math.Min(_low, SEGMENT_SIZE); + // In order to ensure we don't get a torn read on the value, we mark the segment + // as preserving for observation. Additional items can still be enqueued to this + // segment, but no space will be freed during dequeues, such that the segment will + // no longer be reusable. + _preservedForObservation = true; + Interlocked.MemoryBarrier(); + } + + // Loop in case of contention... + var spinner = new SpinWait(); + while (true) + { + // Get the head at which to try to peek. + int currentHead = Volatile.Read(ref _headAndTail.Head); + int slotsIndex = currentHead & _slotsMask; + + // Read the sequence number for the head position. + int sequenceNumber = Volatile.Read(ref _slots[slotsIndex].SequenceNumber); + + // We can peek from this slot if it's been filled by an enqueuer, which + // would have left the sequence number at pos+1. + int diff = sequenceNumber - (currentHead + 1); + if (diff == 0) + { + result = resultUsed ? _slots[slotsIndex].Item : default(T); + return true; + } + else if (diff < 0) + { + // The sequence number was less than what we needed, which means this slot doesn't + // yet contain a value we can peek, i.e. the segment is empty. Technically it's + // possible that multiple enqueuers could have written concurrently, with those + // getting later slots actually finishing first, so there could be elements after + // this one that are available, but we need to peek in order. So before declaring + // failure and that the segment is empty, we check the tail to see if we're actually + // empty or if we're just waiting for items in flight or after this one to become available. + bool frozen = _frozenForEnqueues; + int currentTail = Volatile.Read(ref _headAndTail.Tail); + if (currentTail - currentHead <= 0 || (frozen && (currentTail - FreezeOffset - currentHead <= 0))) + { + result = default(T); + return false; + } + + // It's possible it could have become frozen after we checked _frozenForEnqueues + // and before reading the tail. That's ok: in that rare race condition, we just + // loop around again. + } + + // Lost a race. Spin a bit, then try again. + spinner.SpinOnce(); } } /// - /// return the logical position of the tail of the current segment - /// Value range [-1, SEGMENT_SIZE-1]. When it's -1, it means this is a new segment and has no elemnet yet + /// Attempts to enqueue the item. If successful, the item will be stored + /// in the queue and true will be returned; otherwise, the item won't be stored, and false + /// will be returned. /// - internal int High + public bool TryEnqueue(T item) { - get + // Loop in case of contention... + var spinner = new SpinWait(); + while (true) { - //if m_high > SEGMENT_SIZE, it means it's out of range, we should return - //SEGMENT_SIZE-1 as the logical position - return Math.Min(_high, SEGMENT_SIZE - 1); + // Get the tail at which to try to return. + int currentTail = Volatile.Read(ref _headAndTail.Tail); + int slotsIndex = currentTail & _slotsMask; + + // Read the sequence number for the tail position. + int sequenceNumber = Volatile.Read(ref _slots[slotsIndex].SequenceNumber); + + // The slot is empty and ready for us to enqueue into it if its sequence + // number matches the slot. + int diff = sequenceNumber - currentTail; + if (diff == 0) + { + // We may be racing with other enqueuers. Try to reserve the slot by incrementing + // the tail. Once we've done that, no one else will be able to write to this slot, + // and no dequeuer will be able to read from this slot until we've written the new + // sequence number. WARNING: The next few lines are not reliable on a runtime that + // supports thread aborts. If a thread abort were to sneak in after the CompareExchange + // but before the Volatile.Write, other threads will spin trying to access this slot. + // If this implementation is ever used on such a platform, this if block should be + // wrapped in a finally / prepared region. + if (Interlocked.CompareExchange(ref _headAndTail.Tail, currentTail + 1, currentTail) == currentTail) + { + // Successfully reserved the slot. Note that after the above CompareExchange, other threads + // trying to return will end up spinning until we do the subsequent Write. + _slots[slotsIndex].Item = item; + Volatile.Write(ref _slots[slotsIndex].SequenceNumber, currentTail + 1); + return true; + } + } + else if (diff < 0) + { + // The sequence number was less than what we needed, which means this slot still + // contains a value, i.e. the segment is full. Technically it's possible that multiple + // dequeuers could have read concurrently, with those getting later slots actually + // finishing first, so there could be spaces after this one that are available, but + // we need to enqueue in order. + return false; + } + + // Lost a race. Spin a bit, then try again. + spinner.SpinOnce(); } } + + /// Represents a slot in the queue. + [StructLayout(LayoutKind.Auto)] + [DebuggerDisplay("Item = {Item}, SequenceNumber = {SequenceNumber}")] + internal struct Slot + { + /// The item. + public T Item; + /// The sequence number for this slot, used to synchronize between enqueuers and dequeuers. + public int SequenceNumber; + } } - }//end of class Segment + } - /// - /// A wrapper struct for volatile bool, please note the copy of the struct it self will not be volatile - /// for example this statement will not include in volatilness operation volatileBool1 = volatileBool2 the jit will copy the struct and will ignore the volatile - /// - internal struct VolatileBool + /// Padded head and tail indices, to avoid false sharing between producers and consumers. + [DebuggerDisplay("Head = {Head}, Tail = {Tail}")] + [StructLayout(LayoutKind.Explicit, Size = 192)] // padding before/between/after fields based on typical cache line size of 64 + internal struct PaddedHeadAndTail { - public VolatileBool(bool value) - { - m_value = value; - } - public volatile bool m_value; + [FieldOffset(64)] public int Head; + [FieldOffset(128)] public int Tail; } } diff --git a/src/System.Private.CoreLib/src/System/Threading/SemaphoreSlim.cs b/src/System.Private.CoreLib/src/System/Threading/SemaphoreSlim.cs index 51a92a1f832..ba2c0ecd973 100644 --- a/src/System.Private.CoreLib/src/System/Threading/SemaphoreSlim.cs +++ b/src/System.Private.CoreLib/src/System/Threading/SemaphoreSlim.cs @@ -89,7 +89,6 @@ void IThreadPoolWorkItem.ExecuteWorkItem() bool setSuccessfully = TrySetResult(true); Debug.Assert(setSuccessfully, "Should have been able to complete task"); } - //void IThreadPoolWorkItem.MarkAborted(ThreadAbortException tae) { /* nop */ } } #endregion diff --git a/src/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs b/src/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs index becb5862072..76011540b4d 100644 --- a/src/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs +++ b/src/System.Private.CoreLib/src/System/Threading/Tasks/Task.cs @@ -2353,7 +2353,6 @@ private void Execute() } } - /// /// IThreadPoolWorkItem override, which is the entry function for this task when the TP scheduler decides to run it. /// diff --git a/src/System.Private.CoreLib/src/System/Threading/Tasks/TaskContinuation.cs b/src/System.Private.CoreLib/src/System/Threading/Tasks/TaskContinuation.cs index 4bdc7f51419..a4133ec8a65 100644 --- a/src/System.Private.CoreLib/src/System/Threading/Tasks/TaskContinuation.cs +++ b/src/System.Private.CoreLib/src/System/Threading/Tasks/TaskContinuation.cs @@ -597,11 +597,6 @@ void IThreadPoolWorkItem.ExecuteWorkItem() ExecutionContext.Run(m_capturedContext, GetInvokeActionCallback(), m_action); } - ///// - ///// The ThreadPool calls this if a ThreadAbortException is thrown while trying to execute this workitem. - ///// - //void IThreadPoolWorkItem.MarkAborted(ThreadAbortException tae) { /* nop */ } - /// Cached delegate that invokes an Action passed as an object parameter. private static ContextCallback s_invokeActionCallback; diff --git a/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Unix.cs b/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Unix.cs index 2854bf9a676..df77c4c8583 100644 --- a/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Unix.cs +++ b/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Unix.cs @@ -81,7 +81,7 @@ public static void GetAvailableThreads(out int workerThreads, out int completion /// /// This method is called to request a new thread pool worker to handle pending work. /// - internal static void QueueDispatch() + internal static void RequestWorkerThread() { // For simplicity of the state management, we pre-create all thread pool workers on the first // request and then use the semaphore to release threads as new requests come in. diff --git a/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Windows.cs b/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Windows.cs index 3450135d432..5ca0604b4e1 100644 --- a/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Windows.cs +++ b/src/System.Private.CoreLib/src/System/Threading/ThreadPool.Windows.cs @@ -306,7 +306,7 @@ private static void DispatchCallback(IntPtr instance, IntPtr context, IntPtr wor wrapper.Exit(resetThread: false); } - internal static void QueueDispatch() + internal static void RequestWorkerThread() { if (s_work == IntPtr.Zero) { diff --git a/src/System.Private.CoreLib/src/System/Threading/ThreadPool.cs b/src/System.Private.CoreLib/src/System/Threading/ThreadPool.cs index 5a9b52dda11..eddd3ef62bf 100644 --- a/src/System.Private.CoreLib/src/System/Threading/ThreadPool.cs +++ b/src/System.Private.CoreLib/src/System/Threading/ThreadPool.cs @@ -27,8 +27,10 @@ */ using Internal.Runtime.Augments; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; using System.Runtime.InteropServices; @@ -50,71 +52,72 @@ public static ThreadPoolWorkQueue workQueue internal sealed class ThreadPoolWorkQueue { - // Simple sparsely populated array to allow lock-free reading. - internal class SparseArray where T : class + internal static class WorkStealingQueueList { - private volatile T[] m_array; - private readonly Lock m_lock = new Lock(); + private static volatile WorkStealingQueue[] _queues = new WorkStealingQueue[0]; - internal SparseArray(int initialSize) - { - m_array = new T[initialSize]; - } - - internal T[] Current - { - get { return m_array; } - } + public static WorkStealingQueue[] Queues => _queues; - internal int Add(T e) + public static void Add(WorkStealingQueue queue) { + Debug.Assert(queue != null); while (true) { - T[] array = m_array; - using (LockHolder.Hold(m_lock)) + WorkStealingQueue[] oldQueues = _queues; + Debug.Assert(Array.IndexOf(oldQueues, queue) == -1); + + var newQueues = new WorkStealingQueue[oldQueues.Length + 1]; + Array.Copy(oldQueues, 0, newQueues, 0, oldQueues.Length); + newQueues[newQueues.Length - 1] = queue; + if (Interlocked.CompareExchange(ref _queues, newQueues, oldQueues) == oldQueues) { - for (int i = 0; i < array.Length; i++) - { - if (array[i] == null) - { - Volatile.Write(ref array[i], e); - return i; - } - else if (i == array.Length - 1) - { - // Must resize. If we raced and lost, we start over again. - if (array != m_array) - continue; - - T[] newArray = new T[array.Length * 2]; - Array.Copy(array, newArray, i + 1); - newArray[i + 1] = e; - m_array = newArray; - return i + 1; - } - } + break; } } } - internal void Remove(T e) + public static void Remove(WorkStealingQueue queue) { - T[] array = m_array; - using (LockHolder.Hold(m_lock)) + Debug.Assert(queue != null); + while (true) { - for (int i = 0; i < m_array.Length; i++) + WorkStealingQueue[] oldQueues = _queues; + if (oldQueues.Length == 0) { - if (m_array[i] == e) - { - Volatile.Write(ref m_array[i], null); - break; - } + return; + } + + int pos = Array.IndexOf(oldQueues, queue); + if (pos == -1) + { + Debug.Fail("Should have found the queue"); + return; + } + + var newQueues = new WorkStealingQueue[oldQueues.Length - 1]; + if (pos == 0) + { + Array.Copy(oldQueues, 1, newQueues, 0, newQueues.Length); + } + else if (pos == oldQueues.Length - 1) + { + Array.Copy(oldQueues, 0, newQueues, 0, newQueues.Length); + } + else + { + Array.Copy(oldQueues, 0, newQueues, 0, pos); + Array.Copy(oldQueues, pos + 1, newQueues, pos, newQueues.Length - pos); + } + + if (Interlocked.CompareExchange(ref _queues, newQueues, oldQueues) == oldQueues) + { + break; } } } } - internal class WorkStealingQueue + internal sealed class WorkStealingQueue { private const int INITIAL_SIZE = 32; internal volatile IThreadPoolWorkItem[] m_array = new IThreadPoolWorkItem[INITIAL_SIZE]; @@ -130,7 +133,7 @@ internal class WorkStealingQueue private volatile int m_headIndex = START_INDEX; private volatile int m_tailIndex = START_INDEX; - private SpinLock m_foreignLock = new SpinLock(false); + private SpinLock m_foreignLock = new SpinLock(enableThreadOwnerTracking: false); public void LocalPush(IThreadPoolWorkItem obj) { @@ -164,7 +167,7 @@ public void LocalPush(IThreadPoolWorkItem obj) finally { if (lockTaken) - m_foreignLock.Exit(true); + m_foreignLock.Exit(useMemoryBarrier: true); } } @@ -189,7 +192,7 @@ public void LocalPush(IThreadPoolWorkItem obj) if (count >= m_mask) { // We're full; expand the queue by doubling its size. - IThreadPoolWorkItem[] newArray = new IThreadPoolWorkItem[m_array.Length << 1]; + var newArray = new IThreadPoolWorkItem[m_array.Length << 1]; for (int i = 0; i < m_array.Length; i++) newArray[i] = m_array[(i + head) & m_mask]; @@ -206,23 +209,20 @@ public void LocalPush(IThreadPoolWorkItem obj) finally { if (lockTaken) - m_foreignLock.Exit(false); + m_foreignLock.Exit(useMemoryBarrier: false); } } } + [SuppressMessage("Microsoft.Concurrency", "CA8001", Justification = "Reviewed for thread safety")] public bool LocalFindAndPop(IThreadPoolWorkItem obj) { // Fast path: check the tail. If equal, we can skip the lock. if (m_array[(m_tailIndex - 1) & m_mask] == obj) { - IThreadPoolWorkItem unused; - if (LocalPop(out unused)) - { - Debug.Assert(unused == obj); - return true; - } - return false; + IThreadPoolWorkItem unused = LocalPop(); + Debug.Assert(unused == null || unused == obj); + return unused != null; } // Else, do an O(N) search for the work item. The theory of work stealing and our @@ -238,13 +238,12 @@ public bool LocalFindAndPop(IThreadPoolWorkItem obj) if (m_array[i & m_mask] == obj) { // If we found the element, block out steals to avoid interference. - // @TODO: optimize away the lock? bool lockTaken = false; try { m_foreignLock.Enter(ref lockTaken); - // If we lost the race, bail. + // If we encountered a race condition, bail. if (m_array[i & m_mask] == null) return false; @@ -264,7 +263,7 @@ public bool LocalFindAndPop(IThreadPoolWorkItem obj) finally { if (lockTaken) - m_foreignLock.Exit(false); + m_foreignLock.Exit(useMemoryBarrier: false); } } } @@ -272,18 +271,20 @@ public bool LocalFindAndPop(IThreadPoolWorkItem obj) return false; } - public bool LocalPop(out IThreadPoolWorkItem obj) + public IThreadPoolWorkItem LocalPop() => m_headIndex < m_tailIndex ? LocalPopCore() : null; + + [SuppressMessage("Microsoft.Concurrency", "CA8001", Justification = "Reviewed for thread safety")] + private IThreadPoolWorkItem LocalPopCore() { while (true) { - // Decrement the tail using a fence to ensure subsequent read doesn't come before. int tail = m_tailIndex; if (m_headIndex >= tail) { - obj = null; - return false; + return null; } + // Decrement the tail using a fence to ensure subsequent read doesn't come before. tail -= 1; Interlocked.Exchange(ref m_tailIndex, tail); @@ -291,13 +292,13 @@ public bool LocalPop(out IThreadPoolWorkItem obj) if (m_headIndex <= tail) { int idx = tail & m_mask; - obj = Volatile.Read(ref m_array[idx]); + IThreadPoolWorkItem obj = Volatile.Read(ref m_array[idx]); // Check for nulls in the array. if (obj == null) continue; m_array[idx] = null; - return true; + return obj; } else { @@ -311,224 +312,81 @@ public bool LocalPop(out IThreadPoolWorkItem obj) { // Element still available. Take it. int idx = tail & m_mask; - obj = Volatile.Read(ref m_array[idx]); + IThreadPoolWorkItem obj = Volatile.Read(ref m_array[idx]); // Check for nulls in the array. if (obj == null) continue; m_array[idx] = null; - return true; + return obj; } else { - // We lost the race, element was stolen, restore the tail. + // If we encountered a race condition and element was stolen, restore the tail. m_tailIndex = tail + 1; - obj = null; - return false; + return null; } } finally { if (lockTaken) - m_foreignLock.Exit(false); + m_foreignLock.Exit(useMemoryBarrier: false); } } } } - public bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal) - { - return TrySteal(out obj, ref missedSteal, 0); // no blocking by default. - } + public bool CanSteal => m_headIndex < m_tailIndex; - private bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal, int millisecondsTimeout) + public IThreadPoolWorkItem TrySteal(ref bool missedSteal) { - obj = null; - while (true) { - if (m_headIndex >= m_tailIndex) - return false; - - bool taken = false; - try + if (CanSteal) { - m_foreignLock.TryEnter(millisecondsTimeout, ref taken); - if (taken) + bool taken = false; + try { - // Increment head, and ensure read of tail doesn't move before it (fence). - int head = m_headIndex; - Interlocked.Exchange(ref m_headIndex, head + 1); - - if (head < m_tailIndex) - { - int idx = head & m_mask; - obj = Volatile.Read(ref m_array[idx]); - - // Check for nulls in the array. - if (obj == null) continue; - - m_array[idx] = null; - return true; - } - else + m_foreignLock.TryEnter(ref taken); + if (taken) { - // Failed, restore head. - m_headIndex = head; - obj = null; - missedSteal = true; + // Increment head, and ensure read of tail doesn't move before it (fence). + int head = m_headIndex; + Interlocked.Exchange(ref m_headIndex, head + 1); + + if (head < m_tailIndex) + { + int idx = head & m_mask; + IThreadPoolWorkItem obj = Volatile.Read(ref m_array[idx]); + + // Check for nulls in the array. + if (obj == null) continue; + + m_array[idx] = null; + return obj; + } + else + { + // Failed, restore head. + m_headIndex = head; + } } } - else + finally { - missedSteal = true; + if (taken) + m_foreignLock.Exit(useMemoryBarrier: false); } - } - finally - { - if (taken) - m_foreignLock.Exit(false); - } - - return false; - } - } - } - - internal class QueueSegment - { - // Holds a segment of the queue. Enqueues/Dequeues start at element 0, and work their way up. - internal readonly IThreadPoolWorkItem[] nodes; - private const int QueueSegmentLength = 256; - - // Holds the indexes of the lowest and highest valid elements of the nodes array. - // The low index is in the lower 16 bits, high index is in the upper 16 bits. - // Use GetIndexes and CompareExchangeIndexes to manipulate this. - private volatile int indexes; - - // The next segment in the queue. - public volatile QueueSegment Next; - - - private const int SixteenBits = 0xffff; - private void GetIndexes(out int upper, out int lower) - { - int i = indexes; - upper = (i >> 16) & SixteenBits; - lower = i & SixteenBits; - - Debug.Assert(upper >= lower); - Debug.Assert(upper <= nodes.Length); - Debug.Assert(lower <= nodes.Length); - Debug.Assert(upper >= 0); - Debug.Assert(lower >= 0); - } - - private bool CompareExchangeIndexes(ref int prevUpper, int newUpper, ref int prevLower, int newLower) - { - Debug.Assert(newUpper >= newLower); - Debug.Assert(newUpper <= nodes.Length); - Debug.Assert(newLower <= nodes.Length); - Debug.Assert(newUpper >= 0); - Debug.Assert(newLower >= 0); - Debug.Assert(newUpper >= prevUpper); - Debug.Assert(newLower >= prevLower); - Debug.Assert(newUpper == prevUpper ^ newLower == prevLower); - - int oldIndexes = (prevUpper << 16) | (prevLower & SixteenBits); - int newIndexes = (newUpper << 16) | (newLower & SixteenBits); - int prevIndexes = Interlocked.CompareExchange(ref indexes, newIndexes, oldIndexes); - prevUpper = (prevIndexes >> 16) & SixteenBits; - prevLower = prevIndexes & SixteenBits; - return prevIndexes == oldIndexes; - } - - public QueueSegment() - { - Debug.Assert(QueueSegmentLength <= SixteenBits); - nodes = new IThreadPoolWorkItem[QueueSegmentLength]; - } - - - public bool IsUsedUp() - { - int upper, lower; - GetIndexes(out upper, out lower); - return (upper == nodes.Length) && - (lower == nodes.Length); - } - - public bool TryEnqueue(IThreadPoolWorkItem node) - { - // - // If there's room in this segment, atomically increment the upper count (to reserve - // space for this node), then store the node. - // Note that this leaves a window where it will look like there is data in that - // array slot, but it hasn't been written yet. This is taken care of in TryDequeue - // with a busy-wait loop, waiting for the element to become non-null. This implies - // that we can never store null nodes in this data structure. - // - Debug.Assert(null != node); - - int upper, lower; - GetIndexes(out upper, out lower); - - while (true) - { - if (upper == nodes.Length) - return false; - - if (CompareExchangeIndexes(ref upper, upper + 1, ref lower, lower)) - { - Debug.Assert(Volatile.Read(ref nodes[upper]) == null); - Volatile.Write(ref nodes[upper], node); - return true; - } - } - } - - public bool TryDequeue(out IThreadPoolWorkItem node) - { - // - // If there are nodes in this segment, increment the lower count, then take the - // element we find there. - // - int upper, lower; - GetIndexes(out upper, out lower); - - while (true) - { - if (lower == upper) - { - node = null; - return false; - } - - if (CompareExchangeIndexes(ref upper, upper, ref lower, lower + 1)) - { - // It's possible that a concurrent call to Enqueue hasn't yet - // written the node reference to the array. We need to spin until - // it shows up. - SpinWait spinner = new SpinWait(); - while ((node = Volatile.Read(ref nodes[lower])) == null) - spinner.SpinOnce(); - - // Null-out the reference so the object can be GC'd earlier. - nodes[lower] = null; - - return true; + missedSteal = true; } + return null; } } } - // The head and tail of the queue. We enqueue to the head, and dequeue from the tail. - internal volatile QueueSegment queueHead; - internal volatile QueueSegment queueTail; - - internal static SparseArray allThreadQueues = new SparseArray(16); //TODO: base this on processor count, once the security restrictions are removed from Environment.ProcessorCount - + internal readonly LowLevelConcurrentQueue workItems = new LowLevelConcurrentQueue(); + private volatile int numOutstandingThreadRequests = 0; // The number of threads executing work items in the Dispatch method @@ -536,20 +394,16 @@ public bool TryDequeue(out IThreadPoolWorkItem node) public ThreadPoolWorkQueue() { - queueTail = queueHead = new QueueSegment(); } - public ThreadPoolWorkQueueThreadLocals EnsureCurrentThreadHasQueue() - { - if (null == ThreadPoolWorkQueueThreadLocals.Current) - ThreadPoolWorkQueueThreadLocals.Current = new ThreadPoolWorkQueueThreadLocals(this); - return ThreadPoolWorkQueueThreadLocals.Current; - } + public ThreadPoolWorkQueueThreadLocals EnsureCurrentThreadHasQueue() => + ThreadPoolWorkQueueThreadLocals.threadLocals ?? + (ThreadPoolWorkQueueThreadLocals.threadLocals = new ThreadPoolWorkQueueThreadLocals(this)); internal void EnsureThreadRequested() { // - // If we have not yet requested #procs threads from the VM, then request a new thread. + // If we have not yet requested #procs threads, then request a new thread. // int count = numOutstandingThreadRequests; while (count < ThreadPoolGlobals.processorCount) @@ -557,7 +411,7 @@ internal void EnsureThreadRequested() int prev = Interlocked.CompareExchange(ref numOutstandingThreadRequests, count + 1, count); if (prev == count) { - ThreadPool.QueueDispatch(); + ThreadPool.RequestWorkerThread(); break; } count = prev; @@ -586,7 +440,7 @@ public void Enqueue(IThreadPoolWorkItem callback, bool forceGlobal) { ThreadPoolWorkQueueThreadLocals tl = null; if (!forceGlobal) - tl = ThreadPoolWorkQueueThreadLocals.Current; + tl = ThreadPoolWorkQueueThreadLocals.threadLocals; if (null != tl) { @@ -594,18 +448,7 @@ public void Enqueue(IThreadPoolWorkItem callback, bool forceGlobal) } else { - QueueSegment head = queueHead; - - while (!head.TryEnqueue(callback)) - { - Interlocked.CompareExchange(ref head.Next, new QueueSegment(), null); - - while (head.Next != null) - { - Interlocked.CompareExchange(ref queueHead, head.Next, head); - head = queueHead; - } - } + workItems.Enqueue(callback); } EnsureThreadRequested(); @@ -613,64 +456,41 @@ public void Enqueue(IThreadPoolWorkItem callback, bool forceGlobal) internal bool LocalFindAndPop(IThreadPoolWorkItem callback) { - ThreadPoolWorkQueueThreadLocals tl = ThreadPoolWorkQueueThreadLocals.Current; - if (null == tl) - return false; - - return tl.workStealingQueue.LocalFindAndPop(callback); + ThreadPoolWorkQueueThreadLocals tl = ThreadPoolWorkQueueThreadLocals.threadLocals; + return tl != null && tl.workStealingQueue.LocalFindAndPop(callback); } - public void Dequeue(ThreadPoolWorkQueueThreadLocals tl, out IThreadPoolWorkItem callback, out bool missedSteal) + public IThreadPoolWorkItem Dequeue(ThreadPoolWorkQueueThreadLocals tl, ref bool missedSteal) { - callback = null; - missedSteal = false; - WorkStealingQueue wsq = tl.workStealingQueue; - - if (wsq.LocalPop(out callback)) - Debug.Assert(null != callback); - - if (null == callback) - { - QueueSegment tail = queueTail; - while (true) - { - if (tail.TryDequeue(out callback)) - { - Debug.Assert(null != callback); - break; - } - - if (null == tail.Next || !tail.IsUsedUp()) - { - break; - } - else - { - Interlocked.CompareExchange(ref queueTail, tail.Next, tail); - tail = queueTail; - } - } - } + WorkStealingQueue localWsq = tl.workStealingQueue; + IThreadPoolWorkItem callback; - if (null == callback) + if ((callback = localWsq.LocalPop()) == null && // first try the local queue + !workItems.TryDequeue(out callback)) // then try the global queue { - WorkStealingQueue[] otherQueues = allThreadQueues.Current; - int i = tl.random.Next(otherQueues.Length); - int c = otherQueues.Length; + // finally try to steal from another thread's local queue + WorkStealingQueue[] queues = WorkStealingQueueList.Queues; + int c = queues.Length; + Debug.Assert(c > 0, "There must at least be a queue for this thread."); + int maxIndex = c - 1; + int i = tl.random.Next(c); while (c > 0) { - WorkStealingQueue otherQueue = Volatile.Read(ref otherQueues[i % otherQueues.Length]); - if (otherQueue != null && - otherQueue != wsq && - otherQueue.TrySteal(out callback, ref missedSteal)) + i = (i < maxIndex) ? i + 1 : 0; + WorkStealingQueue otherQueue = queues[i]; + if (otherQueue != localWsq && otherQueue.CanSteal) { - Debug.Assert(null != callback); - break; + callback = otherQueue.TrySteal(ref missedSteal); + if (callback != null) + { + break; + } } - i++; c--; } } + + return callback; } /// @@ -703,6 +523,7 @@ internal static bool Dispatch() // false later, but only if we're absolutely certain that the queue is empty. // bool needAnotherThread = true; + IThreadPoolWorkItem workItem = null; try { // @@ -715,7 +536,8 @@ internal static bool Dispatch() // while (ThreadPool.KeepDispatching(startTickCount)) { - workQueue.Dequeue(tl, out IThreadPoolWorkItem workItem, out bool missedSteal); + bool missedSteal = false; + workItem = workQueue.Dequeue(tl, ref missedSteal); if (workItem == null) { @@ -727,6 +549,8 @@ internal static bool Dispatch() // which will be more efficient than this thread doing it anyway. // needAnotherThread = missedSteal; + + // Tell the VM we're returning normally, not because Hill Climbing asked us to return. return true; } @@ -750,9 +574,7 @@ internal static bool Dispatch() RuntimeThread.CurrentThread.ResetThreadPoolThread(); if (!ThreadPool.NotifyWorkItemComplete()) - { return false; - } } // If we get here, it's because our quantum expired. @@ -779,22 +601,49 @@ internal static bool Dispatch() } } + + // Simple random number generator. We don't need great randomness, we just need a little and for it to be fast. + internal struct FastRandom // xorshift prng + { + private uint _w, _x, _y, _z; + + public FastRandom(int seed) + { + _x = (uint)seed; + _w = 88675123; + _y = 362436069; + _z = 521288629; + } + + public int Next(int maxValue) + { + Debug.Assert(maxValue > 0); + + uint t = _x ^ (_x << 11); + _x = _y; _y = _z; _z = _w; + _w = _w ^ (_w >> 19) ^ (t ^ (t >> 8)); + + return (int)(_w % (uint)maxValue); + } + } + + // Holds a WorkStealingQueue, and remmoves it from the list when this object is no longer referened. internal sealed class ThreadPoolWorkQueueThreadLocals { [ThreadStatic] - public static ThreadPoolWorkQueueThreadLocals Current; + public static ThreadPoolWorkQueueThreadLocals threadLocals; public readonly ThreadPoolWorkQueue workQueue; public readonly ThreadPoolWorkQueue.WorkStealingQueue workStealingQueue; - public readonly Random random = new Random(Environment.CurrentManagedThreadId); + public FastRandom random = new FastRandom(Environment.CurrentManagedThreadId); // mutable struct, do not copy or make readonly public ThreadPoolWorkQueueThreadLocals(ThreadPoolWorkQueue tpq) { workQueue = tpq; workStealingQueue = new ThreadPoolWorkQueue.WorkStealingQueue(); - ThreadPoolWorkQueue.allThreadQueues.Add(workStealingQueue); + ThreadPoolWorkQueue.WorkStealingQueueList.Add(workStealingQueue); } private void CleanUp() @@ -803,23 +652,15 @@ private void CleanUp() { if (null != workQueue) { - bool done = false; - while (!done) + IThreadPoolWorkItem cb; + while ((cb = workStealingQueue.LocalPop()) != null) { - IThreadPoolWorkItem cb = null; - if (workStealingQueue.LocalPop(out cb)) - { - Debug.Assert(null != cb); - workQueue.Enqueue(cb, true); - } - else - { - done = true; - } + Debug.Assert(null != cb); + workQueue.Enqueue(cb, forceGlobal: true); } } - ThreadPoolWorkQueue.allThreadQueues.Remove(workStealingQueue); + ThreadPoolWorkQueue.WorkStealingQueueList.Remove(workStealingQueue); } } @@ -856,8 +697,8 @@ internal interface IThreadPoolWorkItem internal sealed class QueueUserWorkItemCallback : IThreadPoolWorkItem { private WaitCallback callback; - private ExecutionContext context; - private Object state; + private readonly ExecutionContext context; + private readonly Object state; #if DEBUG private volatile int executed; @@ -894,7 +735,11 @@ void IThreadPoolWorkItem.ExecuteWorkItem() try { if (context == null) - callback(state); + { + WaitCallback cb = callback; + callback = null; + cb(state); + } else ExecutionContext.Run(context, ccb, this); } @@ -905,12 +750,12 @@ void IThreadPoolWorkItem.ExecuteWorkItem() } } - internal static ContextCallback ccb = new ContextCallback(WaitCallback_Context); + internal static readonly ContextCallback ccb = new ContextCallback(WaitCallback_Context); private static void WaitCallback_Context(Object state) { QueueUserWorkItemCallback obj = (QueueUserWorkItemCallback)state; - WaitCallback wc = obj.callback as WaitCallback; + WaitCallback wc = obj.callback; Debug.Assert(null != wc); wc(obj.state); } @@ -920,7 +765,7 @@ private static void WaitCallback_Context(Object state) internal sealed class QueueUserWorkItemCallbackDefaultContext : IThreadPoolWorkItem { private WaitCallback callback; - private Object state; + private readonly Object state; #if DEBUG private volatile int executed; @@ -964,26 +809,26 @@ void IThreadPoolWorkItem.ExecuteWorkItem() } } - internal static ContextCallback ccb = new ContextCallback(WaitCallback_Context); + internal static readonly ContextCallback ccb = new ContextCallback(WaitCallback_Context); private static void WaitCallback_Context(Object state) { QueueUserWorkItemCallbackDefaultContext obj = (QueueUserWorkItemCallbackDefaultContext)state; - WaitCallback wc = obj.callback as WaitCallback; + WaitCallback wc = obj.callback; Debug.Assert(null != wc); + obj.callback = null; wc(obj.state); } } internal class _ThreadPoolWaitOrTimerCallback { + private WaitOrTimerCallback _waitOrTimerCallback; + private ExecutionContext _executionContext; + private Object _state; private static readonly ContextCallback _ccbt = new ContextCallback(WaitOrTimerCallback_Context_t); private static readonly ContextCallback _ccbf = new ContextCallback(WaitOrTimerCallback_Context_f); - private readonly WaitOrTimerCallback _waitOrTimerCallback; - private readonly ExecutionContext _executionContext; - private readonly Object _state; - internal _ThreadPoolWaitOrTimerCallback(WaitOrTimerCallback waitOrTimerCallback, Object state, bool flowExecutionContext) { _waitOrTimerCallback = waitOrTimerCallback; @@ -991,6 +836,7 @@ internal _ThreadPoolWaitOrTimerCallback(WaitOrTimerCallback waitOrTimerCallback, if (flowExecutionContext) { + // capture the exection context _executionContext = ExecutionContext.Capture(); } } @@ -1167,48 +1013,32 @@ internal static void UnsafeQueueCustomWorkItem(IThreadPoolWorkItem workItem, boo // This method tries to take the target callback out of the current thread's queue. internal static bool TryPopCustomWorkItem(IThreadPoolWorkItem workItem) { + Debug.Assert(null != workItem); return ThreadPoolGlobals.workQueue.LocalFindAndPop(workItem); } // Get all workitems. Called by TaskScheduler in its debugger hooks. internal static IEnumerable GetQueuedWorkItems() { - return EnumerateQueuedWorkItems(ThreadPoolWorkQueue.allThreadQueues.Current, ThreadPoolGlobals.workQueue.queueTail); - } - - internal static IEnumerable EnumerateQueuedWorkItems(ThreadPoolWorkQueue.WorkStealingQueue[] wsQueues, ThreadPoolWorkQueue.QueueSegment globalQueueTail) - { - if (wsQueues != null) + // Enumerate the global queue + foreach (IThreadPoolWorkItem workItem in ThreadPoolGlobals.workQueue.workItems) { - // First, enumerate all workitems in thread-local queues. - foreach (ThreadPoolWorkQueue.WorkStealingQueue wsq in wsQueues) - { - if (wsq != null && wsq.m_array != null) - { - IThreadPoolWorkItem[] items = wsq.m_array; - for (int i = 0; i < items.Length; i++) - { - IThreadPoolWorkItem item = items[i]; - if (item != null) - yield return item; - } - } - } + yield return workItem; } - if (globalQueueTail != null) + // Enumerate each local queue + foreach (ThreadPoolWorkQueue.WorkStealingQueue wsq in ThreadPoolWorkQueue.WorkStealingQueueList.Queues) { - // Now the global queue - for (ThreadPoolWorkQueue.QueueSegment segment = globalQueueTail; - segment != null; - segment = segment.Next) + if (wsq != null && wsq.m_array != null) { - IThreadPoolWorkItem[] items = segment.nodes; + IThreadPoolWorkItem[] items = wsq.m_array; for (int i = 0; i < items.Length; i++) { IThreadPoolWorkItem item = items[i]; if (item != null) + { yield return item; + } } } } @@ -1216,13 +1046,20 @@ internal static IEnumerable EnumerateQueuedWorkItems(Thread internal static IEnumerable GetLocallyQueuedWorkItems() { - return EnumerateQueuedWorkItems(new ThreadPoolWorkQueue.WorkStealingQueue[] { ThreadPoolWorkQueueThreadLocals.Current.workStealingQueue }, null); + ThreadPoolWorkQueue.WorkStealingQueue wsq = ThreadPoolWorkQueueThreadLocals.threadLocals.workStealingQueue; + if (wsq != null && wsq.m_array != null) + { + IThreadPoolWorkItem[] items = wsq.m_array; + for (int i = 0; i < items.Length; i++) + { + IThreadPoolWorkItem item = items[i]; + if (item != null) + yield return item; + } + } } - internal static IEnumerable GetGloballyQueuedWorkItems() - { - return EnumerateQueuedWorkItems(null, ThreadPoolGlobals.workQueue.queueTail); - } + internal static IEnumerable GetGloballyQueuedWorkItems() => ThreadPoolGlobals.workQueue.workItems; private static object[] ToObjectArray(IEnumerable workitems) { @@ -1281,6 +1118,6 @@ public static bool BindHandle(SafeHandle osHandle) throw new PlatformNotSupportedException(SR.Arg_PlatformNotSupported); // Replaced by ThreadPoolBoundHandle.BindHandle } - internal static bool IsThreadPoolThread { get { return ThreadPoolWorkQueueThreadLocals.Current != null; } } + internal static bool IsThreadPoolThread { get { return ThreadPoolWorkQueueThreadLocals.threadLocals != null; } } } }