chaelim · December 13, 2016 22:42
diff --git a/InflatableSeqLock.cpp b/InflatableSeqLock.cpp
 // InflatableSeqLock
 // Copyright(C) 2016 Oracle and/or its affiliates
 // Dave Dice : https://blogs.oracle.com/dave
 //
 // Remarks:
 // *  Implements composite writer mutex and seqlock in a single word-sized lock.   
 //    Allows optimistic reading and pessimistic writing.
 //    Readers don't write to shared synchronization metadata, which helps avoid
 //    coherence traffic.  Readers must tolerate observing inconsistent state, however.  
 // *  Writer mutex is based on LIFO-CR lock from http://arxiv.org/abs/1511.06035.  
 // *  If 2 words were available then we'd simply use a (SeqLock,WriterLock) tuple.
 //    The writerlock could be a classic LIFO-CR lock. 
 //    We'd update the SeqLock least significant bit to reflect the WriterLock state.
 //    This can be accomplished with normal non-atomic accesses. 
 //    Additional fencing is required for those non-atomic SeqLock updates. 
 // *  Does not support reentrant locking -- nesting
 //    The identity of lock owner is anonymous
 // *  Optimistic readers that fail repeatedly on the same episode are
 //    expected to fall back to pessimistic write locking to ensure progress.
 // *  Because we're interested in footprint and density, a single-word lock will be 
 //    more vulnerable to false sharing for contended or promiscuous use cases.  
 // *  LIFO-CR properties :
 //    +  single-word lock
 //    +  succession by direct handoff
 //    +  aggressive self-deflating locks -- prompt deflation
 //    +  Uses Spin-then-park polite waiting ; no unbounded spinning
 //    +  provides local spinning in spin-then-park waiting
 //    +  Requires CAS in both lock and unlock fast paths
 //    +  Explicit lock-free stack of waiters
 //       The stacks are immune to the ABA problem as access is single-consumer-multiple-producer.
 //       The SC safety property derives from lock itself.
 //       Only the lock owner can pop. 
 //    +  When writer threads are waiting, the upper bits of the SeqLock field point to
 //       the head (top) of a stack of waiting threads.   
 //       The set of waiting threads are linked together via on-stack "PushNode" elements.  
 //    +  uses Bernoulli trials for long-term fairness
 //       Occasionally select the tail of the stack for succession. 
 //       Unfair over the short-term but enforces long-term fairness.  
 //       A classic seqlock that uses the LSB as a test-and-set lock can exhibit 
 //       sustained long-term unfairness and starvation.  
 //    +  Through happenstance, the current implementation happens to be
 //       "thread oblivious" in the sense that thread T1 might acquire the lock
 //       and thread T2 could subsequently release the lock.  
 // *  A viable alternative to the LIFO-CR lock is the following:
 //    Yoshihiro Oyama, Kenjiro Taura, Akinori Yonezawa
 //    Executing Parallel Programs with Synchronization Bottlenecks Efficiently
 //    PDSIA 1999
 // *  If necessary, a writer can access the displaced inflatable seqlock word
 //    by traversing the lock stack.  The displaced word is the terminal element.  
 // *  Lock word encoding = (Variant : CONTENDED : LOCKED)
 //    Normal    : V:0:0     V is Version
 //    Locked    : V:0:1     Writer is active
 //    Contended : P:1:1     P is pointer to PushNode element
 //    Illegal   : *:0:1 
 //    CONTENDED=1 implies LOCKED=1 
 //    LOCKED=0 implies CONTENDED=0
 //    CONTENDED=1 implies remainder contains pointer to PushNode
 //    CONTENDED=0 implies remainder contains version
 // *  The implementation is not strictly complaint C++ given the casting
 //    of bit operations on pointers.  
 //    We assume the low order 2 bits of pointers are available for our use,
 //    which in turn presumes alignment of the referents.  
 // *  Invariant: 
 //    The displaced version number resides at the tail of the stack -- (V:0:1) 
 // *  We assume a 64-bit environment with 62 bits for the seqlock.
 //    Roll-over is not a practical concern.
 // *  The implementation may batch together groups of writes under one seqlock "increment"
 //    episode.  
 // 
 // Remarks specific to the implementation sketch:
 // *  For the purposes of explication, we've implemented waiting via unbounded spinning.
 //    It is trivial, however, to convert the code to use park-unpark facilities. 
 // *  The implementation is conservatively over-fenced with std::memory_order_seq_cst.
 //    Using more efficient and relaxed fencing is left as an exercise for the reader. 
 // *  Potential thrown exceptions in the lambdas are ignored.  
 // *  Various bits of support infrastructure are assumed.  
 // *  To make the code a bit shorter I've used gotos. 

 static const auto LCAS = [](auto ARef, auto Cmp, auto Set) { 
  std::atomic_compare_exchange_strong (ARef, &Cmp, Set) ; 
  return Cmp ; 
 } ;

 class InflatableSeqLock { 
 private:
  // ISeqLock tag and VERSION(1) encoding ...
  enum { LOCKED=1, CONTENDED=2, TAGMASK=3, VERSION1=4, } ; 

  std::atomic<uintptr_t> ISeqLock {VERSION1} ; 

  // Policies for succession ...
  static const enum {LIBERAL, STRICT} SuccessionMode {LIBERAL} ; 

  static const double TailProbability = 0.0001 ; 

  static_assert(sizeof(uintptr_t) == 8, "invariant") ; 

  struct PushNode {
    std::atomic<uintptr_t> Next alignas(128) {0} ; 
    std::atomic<int> Admit {0} ; 
  } ;  
    
  // Pass lock ownership to thread referenced by "t"
  // Unpark(t) as necessary
  void Resume (PushNode * t) {
    // FENCE LD-ST |ST 
    ASSERT (t->Admit == 0) ; 
    t->Admit = 1 ; 
  }

 public:

  // Standard RAII wrapper
  class Guard {
   private:
    InflatableSeqLock & I ; 
   public:
    Guard    (InflatableSeqLock & s) : I(s) { I.Lock() ; } 
    Guard    (InflatableSeqLock * s) : I(*s) { I.Lock() ; } 
    ~Guard() { I.Unlock() ; } 
    Guard    (const Guard &) = delete ;
    Guard &  operator=(const Guard &) = delete ;
  } ; 

  uintptr_t ReadVersion() { return ISeqLock ;} 
  int IsLocked (uintptr_t v) { return (v & LOCKED) != 0; } 
  int Validate (uintptr_t v) { return ((v & LOCKED) | (v ^ ISeqLock)) == 0; } 

  void Lock() { 
    uintptr_t w = ISeqLock ; 
    Again : (0) ; 
    if ((w & LOCKED) == 0) {
      // Uncontended lock fast path ...
      // Locked == 0 implies Contended == 0
      ASSERT ((w & CONTENDED) == 0) ; 
      const uintptr_t v = LCAS (&ISeqLock, w, w|LOCKED) ; 
      if (v == w) {
        return ; 
      }
      w = v ; 
      // CAS failed via remote interference; we lost the race
      // Inopportune interleaving
      goto Again ;
    }

    // Contended arrival ... 
    // Push reference to TSelf onto arrival stack
    PushNode TSelf ; 
    TSelf.Admit = 0 ;
    TSelf.Next  = w ; 
    ASSERT ((uintptr_t(&TSelf) & TAGMASK) == 0) ; 
    const uintptr_t v = LCAS (&ISeqLock, w, uintptr_t(&TSelf)|LOCKED|CONTENDED) ; 
    if (v != w) { 
      w = v ; 
      goto Again ; 
    }

    // Contended waiting phase ...
    // Should park() here 
    while (TSelf.Admit == 0) {
      Pause() ; 
    }

    // FENCE: LD | ST-LD
    ASSERT (IsLocked(ISeqLock)) ; 
  }

  void Unlock () {
    // Consider: prefetch-for-write
    uintptr_t w = ISeqLock ; 
    ASSERT (IsLocked(w)) ; 
    if ((w & CONTENDED) == 0) {
      // Fast uncontended unlock path ...
      // Version number remains installed in ISeqLock
      // Transition V:0:1 --> V+1:0:0
      // Clear LOCKED and increment version field in ISeqLock
      // Stream of unlocked version numbers should be monotone ascending.
      uintptr_t v = LCAS (&ISeqLock, w, (w & ~TAGMASK)+VERSION1) ; 
      if (w == v) {
        return ; 
      }
      w = v ; 
      // New threads have arrived ...
    }
    ASSERT ((w & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ; 

    // Impose long-term fairness - anti-starvation
    // periodically select tail as successor
    // Bernoulli trial determines if we pass to tail.  
    // All interior elements should be LOCKED|CONTENTED
    // The ultimate element is the displaced version number and is LOCKED only.  
    // Runs in O(T) time where T is the depth of the stack
    if (Bernoulli (TailProbability)) { 
      PushNode * Penult = nullptr ; 
      PushNode * Tail   = reinterpret_cast<PushNode *>(w & ~TAGMASK) ; 
      // traverse list to find penultimate
      for (;;) { 
        uintptr_t nw = Tail->Next ; 
        ASSERT (nw & LOCKED) ;
        PushNode * nxt =  reinterpret_cast<PushNode *>(nw & ~TAGMASK) ; 
        if ((nw & TAGMASK) == LOCKED) break ; 
        Penult = Tail ; 
        Tail = nxt ; 
      }
      if (Penult != nullptr) { 
        // claim : At least two threads on the stack 
        ASSERT ((uintptr_t(ISeqLock) & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ;  
        ASSERT (Tail != nullptr) ; 
        ASSERT ((uintptr_t(Penult->Next) & ~TAGMASK) == uintptr_t(Tail)) ; 
        ASSERT ((uintptr_t(Tail->Next) & (LOCKED|CONTENDED)) == LOCKED) ; 
        // remove tail from list and grant ownership to tail
        // Truncate list
        Penult->Next = Underlying(Tail->Next) ; 
        Resume(Tail) ; 
        return ; 
      }
    }

    // There is at least one thread on the stack
    // While locked, the stack only grows : push-only
    // While the lock remains held, code in unlock should never observe
    // the stack shrink, or the top-of-stack change from LOCKED|CONTENDED to any other state. 
    // It _is legal to observe transitions from LOCKED to LOCKED|CONTENDED.   
    TryPop : (0) ; 
    auto Head = reinterpret_cast<PushNode *> (w & ~TAGMASK) ; 
    ASSERT (Head != nullptr) ; 
    uintptr_t T2 = Head->Next ; 
    const uintptr_t v = LCAS (&ISeqLock, w, T2) ; 
    if (v == w) { 
      // CAS was successful 
      Resume(Head) ; 
      return ; 
    }

    // CAS failed; we raced and lost
    // inopportune interleaving -- concurrent interference and race
    // some other thread modified ISeqLock in the LD-CAS window above. 
    // new threads have arrived in interim window 
    ASSERT ((v & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ; 

    if (SuccessionMode == STRICT) { 
      // Force strict pedantic LIFO ordering. 
      // Retry the "pop" operation.  
      // unlock() is NOT constant-time for STRICT mode.
      w = v ; 
      goto TryPop ; 
    }
    
    ASSERT (SuccessionMode == LIBERAL) ; 
    // CAS failed, so there must be 2 or more threads on the stack
    // Wake the 2nd thread. 
    // unlock() runs in constant time with no loops but the admission
    // order is not strict LIFO.  
    Head = reinterpret_cast<PushNode *> (v & ~TAGMASK) ; 
    ASSERT (Head != nullptr) ; 
    T2   = Head->Next ; 
    ASSERT ((T2 & (LOCKED|CONTENDED)) == (LOCKED|CONTENDED)) ; 
    const auto nxt = reinterpret_cast<PushNode *> (T2 & ~TAGMASK) ; 
    ASSERT (nxt != nullptr) ; 
    Head->Next = nxt->Next.load() ; 
    Resume (nxt) ; 
  }

  // Alternative concise usage: int v = SeqLock + [&]{....} ; 
  // Allow lambda body to return values thru seqlock.  
  auto operator+ (auto && fn) -> decltype(fn()) { 
    Guard G (this) ; 
    return fn() ; 
  }

  void Write (std::function<void(void)> Activity) { 
    Guard G (this) ; 
    Activity() ; 
  }

  // Encapsulated read operator that takes a reader-CS expressed as a lambda
  // This allows us to hide the policy code that reverts from 
  // optimistic reads to pessimistic writes. 
  void Read (std::function<void(void)> Activity) { 
    // Optimistic-speculative phase
    int OptimisticAttempts = 5 ; 
    for (;;) {
      if (--OptimisticAttempts < 0) break ; 
      if (TryRead (Activity) == 0) return ; 
    }

    // revert to pessimistic mutual exclusion - ensure eventual progress
    Write(Activity) ; 
  }

  // TryRead() return values
  // 0 : success : observed values were consistent
  // 1 : aborted because writer was present at start of attempt
  // 2 : aborted because writer arrived during attempt
  int TryRead (std::function<void(void)> Activity) { 
    const auto v = ReadVersion() ; 
    if (IsLocked(v)) return 1 ; 
    // speculative read attempt ...
    // The reader can see inconsistent values and is expected to 
    // tolerate and behave gracefully.
    // Be extremely careful of dependent loads and pointers!
    // FENCE : LD | LD
    Activity() ; 
    // FENCE : LD | LD
    if (Validate(v)) return 0 ; 
    return 2 ; 
  }
 } ;
 
 static_assert(sizeof(InflatableSeqLock) == sizeof(intptr_t), "invariant") ;
	// InflatableSeqLock
	// Copyright(C) 2016 Oracle and/or its affiliates
	// Dave Dice : https://blogs.oracle.com/dave
	//
	// Remarks:
	// * Implements composite writer mutex and seqlock in a single word-sized lock.
	// Allows optimistic reading and pessimistic writing.
	// Readers don't write to shared synchronization metadata, which helps avoid
	// coherence traffic. Readers must tolerate observing inconsistent state, however.
	// * Writer mutex is based on LIFO-CR lock from http://arxiv.org/abs/1511.06035.
	// * If 2 words were available then we'd simply use a (SeqLock,WriterLock) tuple.
	// The writerlock could be a classic LIFO-CR lock.
	// We'd update the SeqLock least significant bit to reflect the WriterLock state.
	// This can be accomplished with normal non-atomic accesses.
	// Additional fencing is required for those non-atomic SeqLock updates.
	// * Does not support reentrant locking -- nesting
	// The identity of lock owner is anonymous
	// * Optimistic readers that fail repeatedly on the same episode are
	// expected to fall back to pessimistic write locking to ensure progress.
	// * Because we're interested in footprint and density, a single-word lock will be
	// more vulnerable to false sharing for contended or promiscuous use cases.
	// * LIFO-CR properties :
	// + single-word lock
	// + succession by direct handoff
	// + aggressive self-deflating locks -- prompt deflation
	// + Uses Spin-then-park polite waiting ; no unbounded spinning
	// + provides local spinning in spin-then-park waiting
	// + Requires CAS in both lock and unlock fast paths
	// + Explicit lock-free stack of waiters
	// The stacks are immune to the ABA problem as access is single-consumer-multiple-producer.
	// The SC safety property derives from lock itself.
	// Only the lock owner can pop.
	// + When writer threads are waiting, the upper bits of the SeqLock field point to
	// the head (top) of a stack of waiting threads.
	// The set of waiting threads are linked together via on-stack "PushNode" elements.
	// + uses Bernoulli trials for long-term fairness
	// Occasionally select the tail of the stack for succession.
	// Unfair over the short-term but enforces long-term fairness.
	// A classic seqlock that uses the LSB as a test-and-set lock can exhibit
	// sustained long-term unfairness and starvation.
	// + Through happenstance, the current implementation happens to be
	// "thread oblivious" in the sense that thread T1 might acquire the lock
	// and thread T2 could subsequently release the lock.
	// * A viable alternative to the LIFO-CR lock is the following:
	// Yoshihiro Oyama, Kenjiro Taura, Akinori Yonezawa
	// Executing Parallel Programs with Synchronization Bottlenecks Efficiently
	// PDSIA 1999
	// * If necessary, a writer can access the displaced inflatable seqlock word
	// by traversing the lock stack. The displaced word is the terminal element.
	// * Lock word encoding = (Variant : CONTENDED : LOCKED)
	// Normal : V:0:0 V is Version
	// Locked : V:0:1 Writer is active
	// Contended : P:1:1 P is pointer to PushNode element
	// Illegal : *:0:1
	// CONTENDED=1 implies LOCKED=1
	// LOCKED=0 implies CONTENDED=0
	// CONTENDED=1 implies remainder contains pointer to PushNode
	// CONTENDED=0 implies remainder contains version
	// * The implementation is not strictly complaint C++ given the casting
	// of bit operations on pointers.
	// We assume the low order 2 bits of pointers are available for our use,
	// which in turn presumes alignment of the referents.
	// * Invariant:
	// The displaced version number resides at the tail of the stack -- (V:0:1)
	// * We assume a 64-bit environment with 62 bits for the seqlock.
	// Roll-over is not a practical concern.
	// * The implementation may batch together groups of writes under one seqlock "increment"
	// episode.
	//
	// Remarks specific to the implementation sketch:
	// * For the purposes of explication, we've implemented waiting via unbounded spinning.
	// It is trivial, however, to convert the code to use park-unpark facilities.
	// * The implementation is conservatively over-fenced with std::memory_order_seq_cst.
	// Using more efficient and relaxed fencing is left as an exercise for the reader.
	// * Potential thrown exceptions in the lambdas are ignored.
	// * Various bits of support infrastructure are assumed.
	// * To make the code a bit shorter I've used gotos.

	static const auto LCAS = [](auto ARef, auto Cmp, auto Set) {
	std::atomic_compare_exchange_strong (ARef, &Cmp, Set) ;
	return Cmp ;
	} ;

	class InflatableSeqLock {
	private:
	// ISeqLock tag and VERSION(1) encoding ...
	enum { LOCKED=1, CONTENDED=2, TAGMASK=3, VERSION1=4, } ;

	std::atomic<uintptr_t> ISeqLock {VERSION1} ;

	// Policies for succession ...
	static const enum {LIBERAL, STRICT} SuccessionMode {LIBERAL} ;

	static const double TailProbability = 0.0001 ;

	static_assert(sizeof(uintptr_t) == 8, "invariant") ;

	struct PushNode {
	std::atomic<uintptr_t> Next alignas(128) {0} ;
	std::atomic<int> Admit {0} ;
	} ;

	// Pass lock ownership to thread referenced by "t"
	// Unpark(t) as necessary
	void Resume (PushNode * t) {
	// FENCE LD-ST \|ST
	ASSERT (t->Admit == 0) ;
	t->Admit = 1 ;
	}

	public:

	// Standard RAII wrapper
	class Guard {
	private:
	InflatableSeqLock & I ;
	public:
	Guard (InflatableSeqLock & s) : I(s) { I.Lock() ; }
	Guard (InflatableSeqLock * s) : I(*s) { I.Lock() ; }
	~Guard() { I.Unlock() ; }
	Guard (const Guard &) = delete ;
	Guard & operator=(const Guard &) = delete ;
	} ;

	uintptr_t ReadVersion() { return ISeqLock ;}
	int IsLocked (uintptr_t v) { return (v & LOCKED) != 0; }
	int Validate (uintptr_t v) { return ((v & LOCKED) \| (v ^ ISeqLock)) == 0; }

	void Lock() {
	uintptr_t w = ISeqLock ;
	Again : (0) ;
	if ((w & LOCKED) == 0) {
	// Uncontended lock fast path ...
	// Locked == 0 implies Contended == 0
	ASSERT ((w & CONTENDED) == 0) ;
	const uintptr_t v = LCAS (&ISeqLock, w, w\|LOCKED) ;
	if (v == w) {
	return ;
	}
	w = v ;
	// CAS failed via remote interference; we lost the race
	// Inopportune interleaving
	goto Again ;
	}

	// Contended arrival ...
	// Push reference to TSelf onto arrival stack
	PushNode TSelf ;
	TSelf.Admit = 0 ;
	TSelf.Next = w ;
	ASSERT ((uintptr_t(&TSelf) & TAGMASK) == 0) ;
	const uintptr_t v = LCAS (&ISeqLock, w, uintptr_t(&TSelf)\|LOCKED\|CONTENDED) ;
	if (v != w) {
	w = v ;
	goto Again ;
	}

	// Contended waiting phase ...
	// Should park() here
	while (TSelf.Admit == 0) {
	Pause() ;
	}

	// FENCE: LD \| ST-LD
	ASSERT (IsLocked(ISeqLock)) ;
	}

	void Unlock () {
	// Consider: prefetch-for-write
	uintptr_t w = ISeqLock ;
	ASSERT (IsLocked(w)) ;
	if ((w & CONTENDED) == 0) {
	// Fast uncontended unlock path ...
	// Version number remains installed in ISeqLock
	// Transition V:0:1 --> V+1:0:0
	// Clear LOCKED and increment version field in ISeqLock
	// Stream of unlocked version numbers should be monotone ascending.
	uintptr_t v = LCAS (&ISeqLock, w, (w & ~TAGMASK)+VERSION1) ;
	if (w == v) {
	return ;
	}
	w = v ;
	// New threads have arrived ...
	}
	ASSERT ((w & (LOCKED\|CONTENDED)) == (LOCKED\|CONTENDED)) ;

	// Impose long-term fairness - anti-starvation
	// periodically select tail as successor
	// Bernoulli trial determines if we pass to tail.
	// All interior elements should be LOCKED\|CONTENTED
	// The ultimate element is the displaced version number and is LOCKED only.
	// Runs in O(T) time where T is the depth of the stack
	if (Bernoulli (TailProbability)) {
	PushNode * Penult = nullptr ;
	PushNode * Tail = reinterpret_cast<PushNode *>(w & ~TAGMASK) ;
	// traverse list to find penultimate
	for (;;) {
	uintptr_t nw = Tail->Next ;
	ASSERT (nw & LOCKED) ;
	PushNode * nxt = reinterpret_cast<PushNode *>(nw & ~TAGMASK) ;
	if ((nw & TAGMASK) == LOCKED) break ;
	Penult = Tail ;
	Tail = nxt ;
	}
	if (Penult != nullptr) {
	// claim : At least two threads on the stack
	ASSERT ((uintptr_t(ISeqLock) & (LOCKED\|CONTENDED)) == (LOCKED\|CONTENDED)) ;
	ASSERT (Tail != nullptr) ;
	ASSERT ((uintptr_t(Penult->Next) & ~TAGMASK) == uintptr_t(Tail)) ;
	ASSERT ((uintptr_t(Tail->Next) & (LOCKED\|CONTENDED)) == LOCKED) ;
	// remove tail from list and grant ownership to tail
	// Truncate list
	Penult->Next = Underlying(Tail->Next) ;
	Resume(Tail) ;
	return ;
	}
	}

	// There is at least one thread on the stack
	// While locked, the stack only grows : push-only
	// While the lock remains held, code in unlock should never observe
	// the stack shrink, or the top-of-stack change from LOCKED\|CONTENDED to any other state.
	// It _is legal to observe transitions from LOCKED to LOCKED\|CONTENDED.
	TryPop : (0) ;
	auto Head = reinterpret_cast<PushNode *> (w & ~TAGMASK) ;
	ASSERT (Head != nullptr) ;
	uintptr_t T2 = Head->Next ;
	const uintptr_t v = LCAS (&ISeqLock, w, T2) ;
	if (v == w) {
	// CAS was successful
	Resume(Head) ;
	return ;
	}

	// CAS failed; we raced and lost
	// inopportune interleaving -- concurrent interference and race
	// some other thread modified ISeqLock in the LD-CAS window above.
	// new threads have arrived in interim window
	ASSERT ((v & (LOCKED\|CONTENDED)) == (LOCKED\|CONTENDED)) ;

	if (SuccessionMode == STRICT) {
	// Force strict pedantic LIFO ordering.
	// Retry the "pop" operation.
	// unlock() is NOT constant-time for STRICT mode.
	w = v ;
	goto TryPop ;
	}

	ASSERT (SuccessionMode == LIBERAL) ;
	// CAS failed, so there must be 2 or more threads on the stack
	// Wake the 2nd thread.
	// unlock() runs in constant time with no loops but the admission
	// order is not strict LIFO.
	Head = reinterpret_cast<PushNode *> (v & ~TAGMASK) ;
	ASSERT (Head != nullptr) ;
	T2 = Head->Next ;
	ASSERT ((T2 & (LOCKED\|CONTENDED)) == (LOCKED\|CONTENDED)) ;
	const auto nxt = reinterpret_cast<PushNode *> (T2 & ~TAGMASK) ;
	ASSERT (nxt != nullptr) ;
	Head->Next = nxt->Next.load() ;
	Resume (nxt) ;
	}

	// Alternative concise usage: int v = SeqLock + [&]{....} ;
	// Allow lambda body to return values thru seqlock.
	auto operator+ (auto && fn) -> decltype(fn()) {
	Guard G (this) ;
	return fn() ;
	}

	void Write (std::function<void(void)> Activity) {
	Guard G (this) ;
	Activity() ;
	}

	// Encapsulated read operator that takes a reader-CS expressed as a lambda
	// This allows us to hide the policy code that reverts from
	// optimistic reads to pessimistic writes.
	void Read (std::function<void(void)> Activity) {
	// Optimistic-speculative phase
	int OptimisticAttempts = 5 ;
	for (;;) {
	if (--OptimisticAttempts < 0) break ;
	if (TryRead (Activity) == 0) return ;
	}

	// revert to pessimistic mutual exclusion - ensure eventual progress
	Write(Activity) ;
	}

	// TryRead() return values
	// 0 : success : observed values were consistent
	// 1 : aborted because writer was present at start of attempt
	// 2 : aborted because writer arrived during attempt
	int TryRead (std::function<void(void)> Activity) {
	const auto v = ReadVersion() ;
	if (IsLocked(v)) return 1 ;
	// speculative read attempt ...
	// The reader can see inconsistent values and is expected to
	// tolerate and behave gracefully.
	// Be extremely careful of dependent loads and pointers!
	// FENCE : LD \| LD
	Activity() ;
	// FENCE : LD \| LD
	if (Validate(v)) return 0 ;
	return 2 ;
	}
	} ;

	static_assert(sizeof(InflatableSeqLock) == sizeof(intptr_t), "invariant") ;
No results found