-
-
Save chaelim/0c4b311fd20c964ac6a5b43aa1a1933b to your computer and use it in GitHub Desktop.
Inflatable SeqLock
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// InflatableSeqLock | |
// Copyright(C) 2016 Oracle and/or its affiliates | |
// Dave Dice : https://blogs.oracle.com/dave | |
// | |
// Remarks: | |
// * Implements composite writer mutex and seqlock in a single word-sized lock. | |
// Allows optimistic reading and pessimistic writing. | |
// Readers don't write to shared synchronization metadata, which helps avoid | |
// coherence traffic. Readers must tolerate observing inconsistent state, however. | |
// * Writer mutex is based on LIFO-CR lock from http://arxiv.org/abs/1511.06035. | |
// * If 2 words were available then we'd simply use a (SeqLock,WriterLock) tuple. | |
// The writerlock could be a classic LIFO-CR lock. | |
// We'd update the SeqLock least significant bit to reflect the WriterLock state. | |
// This can be accomplished with normal non-atomic accesses. | |
// Additional fencing is required for those non-atomic SeqLock updates. | |
// * Does not support reentrant locking -- nesting | |
// The identity of lock owner is anonymous | |
// * Optimistic readers that fail repeatedly on the same episode are | |
// expected to fall back to pessimistic write locking to ensure progress. | |
// * Because we're interested in footprint and density, a single-word lock will be | |
// more vulnerable to false sharing for contended or promiscuous use cases. | |
// * LIFO-CR properties : | |
// + single-word lock | |
// + succession by direct handoff | |
// + aggressive self-deflating locks -- prompt deflation | |
// + Uses Spin-then-park polite waiting ; no unbounded spinning | |
// + provides local spinning in spin-then-park waiting | |
// + Requires CAS in both lock and unlock fast paths | |
// + Explicit lock-free stack of waiters | |
// The stacks are immune to the ABA problem as access is single-consumer-multiple-producer. | |
// The SC safety property derives from lock itself. | |
// Only the lock owner can pop. | |
// + When writer threads are waiting, the upper bits of the SeqLock field point to | |
// the head (top) of a stack of waiting threads. | |
// The set of waiting threads are linked together via on-stack "PushNode" elements. | |
// + uses Bernoulli trials for long-term fairness | |
// Occasionally select the tail of the stack for succession. | |
// Unfair over the short-term but enforces long-term fairness. | |
// A classic seqlock that uses the LSB as a test-and-set lock can exhibit | |
// sustained long-term unfairness and starvation. | |
// + Through happenstance, the current implementation happens to be | |
// "thread oblivious" in the sense that thread T1 might acquire the lock | |
// and thread T2 could subsequently release the lock. | |
// * A viable alternative to the LIFO-CR lock is the following: | |
// Yoshihiro Oyama, Kenjiro Taura, Akinori Yonezawa | |
// Executing Parallel Programs with Synchronization Bottlenecks Efficiently | |
// PDSIA 1999 | |
// * If necessary, a writer can access the displaced inflatable seqlock word | |
// by traversing the lock stack. The displaced word is the terminal element. | |
// * Lock word encoding = (Variant : CONTENDED : LOCKED) | |
// Normal : V:0:0 V is Version | |
// Locked : V:0:1 Writer is active | |
// Contended : P:1:1 P is pointer to PushNode element | |
// Illegal : *:0:1 | |
// CONTENDED=1 implies LOCKED=1 | |
// LOCKED=0 implies CONTENDED=0 | |
// CONTENDED=1 implies remainder contains pointer to PushNode | |
// CONTENDED=0 implies remainder contains version | |
// * The implementation is not strictly complaint C++ given the casting | |
// of bit operations on pointers. | |
// We assume the low order 2 bits of pointers are available for our use, | |
// which in turn presumes alignment of the referents. | |
// * Invariant: | |
// The displaced version number resides at the tail of the stack -- (V:0:1) | |
// * We assume a 64-bit environment with 62 bits for the seqlock. | |
// Roll-over is not a practical concern. | |
// * The implementation may batch together groups of writes under one seqlock "increment" | |
// episode. | |
// | |
// Remarks specific to the implementation sketch: | |
// * For the purposes of explication, we've implemented waiting via unbounded spinning. | |
// It is trivial, however, to convert the code to use park-unpark facilities. | |
// * The implementation is conservatively over-fenced with std::memory_order_seq_cst. | |
// Using more efficient and relaxed fencing is left as an exercise for the reader. | |
// * Potential thrown exceptions in the lambdas are ignored. | |
// * Various bits of support infrastructure are assumed. | |
// * To make the code a bit shorter I've used gotos. | |
static const auto LCAS = [](auto ARef, auto Cmp, auto Set) { | |
std::atomic_compare_exchange_strong (ARef, &Cmp, Set) ; | |
return Cmp ; | |
} ; | |
class InflatableSeqLock { | |
private: | |
// ISeqLock tag and VERSION(1) encoding ... | |
enum { LOCKED=1, CONTENDED=2, TAGMASK=3, VERSION1=4, } ; | |
std::atomic<uintptr_t> ISeqLock {VERSION1} ; | |
// Policies for succession ... | |
static const enum {LIBERAL, STRICT} SuccessionMode {LIBERAL} ; | |
static const double TailProbability = 0.0001 ; | |
static_assert(sizeof(uintptr_t) == 8, "invariant") ; | |
struct PushNode { | |
std::atomic<uintptr_t> Next alignas(128) {0} ; | |
std::atomic<int> Admit {0} ; | |
} ; | |
// Pass lock ownership to thread referenced by "t" | |
// Unpark(t) as necessary | |
void Resume (PushNode * t) { | |
// FENCE LD-ST |ST | |
ASSERT (t->Admit == 0) ; | |
t->Admit = 1 ; | |
} | |
public: | |
// Standard RAII wrapper | |
class Guard { | |
private: | |
InflatableSeqLock & I ; | |
public: | |
Guard (InflatableSeqLock & s) : I(s) { I.Lock() ; } | |
Guard (InflatableSeqLock * s) : I(*s) { I.Lock() ; } | |
~Guard() { I.Unlock() ; } | |
Guard (const Guard &) = delete ; | |
Guard & operator=(const Guard &) = delete ; | |
} ; | |
uintptr_t ReadVersion() { return ISeqLock ;} | |
int IsLocked (uintptr_t v) { return (v & LOCKED) != 0; } | |
int Validate (uintptr_t v) { return ((v & LOCKED) | (v ^ ISeqLock)) == 0; } | |
void Lock() { | |
uintptr_t w = ISeqLock ; | |
Again : (0) ; | |
if ((w & LOCKED) == 0) { | |
// Uncontended lock fast path ... | |
// Locked == 0 implies Contended == 0 | |
ASSERT ((w & CONTENDED) == 0) ; | |
const uintptr_t v = LCAS (&ISeqLock, w, w|LOCKED) ; | |
if (v == w) { | |
return ; | |
} | |
w = v ; | |
// CAS failed via remote interference; we lost the race | |
// Inopportune interleaving | |
goto Again ; | |
} | |
// Contended arrival ... | |
// Push reference to TSelf onto arrival stack | |
PushNode TSelf ; | |
TSelf.Admit = 0 ; | |
TSelf.Next = w ; | |
ASSERT ((uintptr_t(&TSelf) & TAGMASK) == 0) ; | |
const uintptr_t v = LCAS (&ISeqLock, w, uintptr_t(&TSelf)|LOCKED|CONTENDED) ; | |
if (v != w) { | |
w = v ; | |
goto Again ; | |
} | |
// Contended waiting phase ... | |
// Should park() here | |
while (TSelf.Admit == 0) { | |
Pause() ; | |
} | |
// FENCE: LD | ST-LD | |
ASSERT (IsLocked(ISeqLock)) ; | |
} | |
void Unlock () { | |
// Consider: prefetch-for-write | |
uintptr_t w = ISeqLock ; | |
ASSERT (IsLocked(w)) ; | |
if ((w & CONTENDED) == 0) { | |
// Fast uncontended unlock path ... | |
// Version number remains installed in ISeqLock | |
// Transition V:0:1 --> V+1:0:0 | |
// Clear LOCKED and increment version field in ISeqLock | |
// Stream of unlocked version numbers should be monotone ascending. | |
uintptr_t v = LCAS (&ISeqLock, w, (w & ~TAGMASK)+VERSION1) ; | |
if (w == v) { | |
return ; | |
} | |
w = v ; | |
// New threads have arrived ... | |
} | |
ASSERT ((w & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ; | |
// Impose long-term fairness - anti-starvation | |
// periodically select tail as successor | |
// Bernoulli trial determines if we pass to tail. | |
// All interior elements should be LOCKED|CONTENTED | |
// The ultimate element is the displaced version number and is LOCKED only. | |
// Runs in O(T) time where T is the depth of the stack | |
if (Bernoulli (TailProbability)) { | |
PushNode * Penult = nullptr ; | |
PushNode * Tail = reinterpret_cast<PushNode *>(w & ~TAGMASK) ; | |
// traverse list to find penultimate | |
for (;;) { | |
uintptr_t nw = Tail->Next ; | |
ASSERT (nw & LOCKED) ; | |
PushNode * nxt = reinterpret_cast<PushNode *>(nw & ~TAGMASK) ; | |
if ((nw & TAGMASK) == LOCKED) break ; | |
Penult = Tail ; | |
Tail = nxt ; | |
} | |
if (Penult != nullptr) { | |
// claim : At least two threads on the stack | |
ASSERT ((uintptr_t(ISeqLock) & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ; | |
ASSERT (Tail != nullptr) ; | |
ASSERT ((uintptr_t(Penult->Next) & ~TAGMASK) == uintptr_t(Tail)) ; | |
ASSERT ((uintptr_t(Tail->Next) & (LOCKED|CONTENDED)) == LOCKED) ; | |
// remove tail from list and grant ownership to tail | |
// Truncate list | |
Penult->Next = Underlying(Tail->Next) ; | |
Resume(Tail) ; | |
return ; | |
} | |
} | |
// There is at least one thread on the stack | |
// While locked, the stack only grows : push-only | |
// While the lock remains held, code in unlock should never observe | |
// the stack shrink, or the top-of-stack change from LOCKED|CONTENDED to any other state. | |
// It _is legal to observe transitions from LOCKED to LOCKED|CONTENDED. | |
TryPop : (0) ; | |
auto Head = reinterpret_cast<PushNode *> (w & ~TAGMASK) ; | |
ASSERT (Head != nullptr) ; | |
uintptr_t T2 = Head->Next ; | |
const uintptr_t v = LCAS (&ISeqLock, w, T2) ; | |
if (v == w) { | |
// CAS was successful | |
Resume(Head) ; | |
return ; | |
} | |
// CAS failed; we raced and lost | |
// inopportune interleaving -- concurrent interference and race | |
// some other thread modified ISeqLock in the LD-CAS window above. | |
// new threads have arrived in interim window | |
ASSERT ((v & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ; | |
if (SuccessionMode == STRICT) { | |
// Force strict pedantic LIFO ordering. | |
// Retry the "pop" operation. | |
// unlock() is NOT constant-time for STRICT mode. | |
w = v ; | |
goto TryPop ; | |
} | |
ASSERT (SuccessionMode == LIBERAL) ; | |
// CAS failed, so there must be 2 or more threads on the stack | |
// Wake the 2nd thread. | |
// unlock() runs in constant time with no loops but the admission | |
// order is not strict LIFO. | |
Head = reinterpret_cast<PushNode *> (v & ~TAGMASK) ; | |
ASSERT (Head != nullptr) ; | |
T2 = Head->Next ; | |
ASSERT ((T2 & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ; | |
const auto nxt = reinterpret_cast<PushNode *> (T2 & ~TAGMASK) ; | |
ASSERT (nxt != nullptr) ; | |
Head->Next = nxt->Next.load() ; | |
Resume (nxt) ; | |
} | |
// Alternative concise usage: int v = SeqLock + [&]{....} ; | |
// Allow lambda body to return values thru seqlock. | |
auto operator+ (auto && fn) -> decltype(fn()) { | |
Guard G (this) ; | |
return fn() ; | |
} | |
void Write (std::function<void(void)> Activity) { | |
Guard G (this) ; | |
Activity() ; | |
} | |
// Encapsulated read operator that takes a reader-CS expressed as a lambda | |
// This allows us to hide the policy code that reverts from | |
// optimistic reads to pessimistic writes. | |
void Read (std::function<void(void)> Activity) { | |
// Optimistic-speculative phase | |
int OptimisticAttempts = 5 ; | |
for (;;) { | |
if (--OptimisticAttempts < 0) break ; | |
if (TryRead (Activity) == 0) return ; | |
} | |
// revert to pessimistic mutual exclusion - ensure eventual progress | |
Write(Activity) ; | |
} | |
// TryRead() return values | |
// 0 : success : observed values were consistent | |
// 1 : aborted because writer was present at start of attempt | |
// 2 : aborted because writer arrived during attempt | |
int TryRead (std::function<void(void)> Activity) { | |
const auto v = ReadVersion() ; | |
if (IsLocked(v)) return 1 ; | |
// speculative read attempt ... | |
// The reader can see inconsistent values and is expected to | |
// tolerate and behave gracefully. | |
// Be extremely careful of dependent loads and pointers! | |
// FENCE : LD | LD | |
Activity() ; | |
// FENCE : LD | LD | |
if (Validate(v)) return 0 ; | |
return 2 ; | |
} | |
} ; | |
static_assert(sizeof(InflatableSeqLock) == sizeof(intptr_t), "invariant") ; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment