Skip to content

Instantly share code, notes, and snippets.

@benoitjacquier
Created July 21, 2020 09:48
Show Gist options
  • Save benoitjacquier/abb92da92a6e491e35d07c5c3f6744f7 to your computer and use it in GitHub Desktop.
Save benoitjacquier/abb92da92a6e491e35d07c5c3f6744f7 to your computer and use it in GitHub Desktop.
#pragma once
/* Single file header Atomic lib with class support
64 bit only. MSVC/GCC/CLANG support
TODO:
- 16 bit atomics?
- x86/32 bit support
- check godbolt / macro godbolt
- exchangeprt vs 64?
- check alignment Atomic::atom todo: alignement?
- compare exchange fail/success memory order
*/
#ifndef ATOMIC_INLINE
#define ATOMIC_INLINE inline
#endif
#define ATOMIC_CLASS
// MSVC forward declaration
#if defined(_MSC_VER)
#ifndef _M_X64
#error
#endif
typedef signed char int8_t;
typedef int int32_t;
typedef long long int int64_t;
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
typedef uint64_t size_t;
static_assert(sizeof(char) == 1, "size type error");
static_assert(sizeof(int) == 4, "size type error");
static_assert(sizeof(long long) == 8, "size type error");
#define _Compiler_barrier() _ReadWriteBarrier()
#if defined(__cplusplus)
extern "C" {
#endif
void * memcpy ( void * destination, const void * source, size_t num );
extern void _mm_pause(void);
#pragma intrinsic(_mm_pause)
extern void _ReadWriteBarrier(void);
#pragma intrinsic(_ReadWriteBarrier)
char _InterlockedExchange8(char volatile *, char);
long _InterlockedExchange(long volatile *, long);
long long _InterlockedExchange64(long long volatile *, long long);
char _InterlockedExchangeAdd8(char volatile *, char);
long _InterlockedExchangeAdd(long volatile *, long);
long long _InterlockedExchangeAdd64(long long volatile *, long long);
char _InterlockedCompareExchange8(char volatile *, char, char);
long _InterlockedCompareExchange(long volatile *, long, long);
long long _InterlockedCompareExchange64(long long volatile *, long long, long long);
long long _InterlockedOr64(volatile long long *, long long);
#define InterlockedExchange8 _InterlockedExchange8
#define InterlockedExchange _InterlockedExchange
#define InterlockedExchange64 _InterlockedExchange64
#define InterlockedExchangeAdd8 _InterlockedExchangeAdd8
#define InterlockedExchangeAdd _InterlockedExchangeAdd
#define InterlockedExchangeAdd64 _InterlockedExchangeAdd64
#define InterlockedCompareExchange8 _InterlockedCompareExchange8
#define InterlockedCompareExchange _InterlockedCompareExchange
#define InterlockedCompareExchange64 _InterlockedCompareExchange64
#define InterlockedExchangeOr64 _InterlockedExchangeOr64
#if defined(__cplusplus)
}
#endif
#else // _MSC_VER
#if defined(__clang__) && __has_builtin(__atomic_load_n) \
&& __has_builtin(__atomic_store_n) \
&& __has_builtin(__atomic_add_fetch) \
&& __has_builtin(__atomic_exchange_n) \
&& __has_builtin(__atomic_compare_exchange_n) \
&& defined(__ATOMIC_SEQ_CST)
# define _ATOMIC_HAS_ATOMIC_BUILTINS
#elif !defined(__clang__) && defined(__GNUC__)
# define _ATOMIC_HAS_ATOMIC_BUILTINS
#endif
typedef signed char int8_t;
typedef int int32_t;
typedef long int64_t;
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long uint64_t;
typedef uint64_t size_t;
#if defined(__cplusplus)
extern "C" {
#endif
extern void _mm_pause(void);
void * memcpy(void * destination, const void * source, size_t num);
#if defined(__cplusplus)
}
#endif
#endif // !_MSC_VER
#define PA_IS_TRIVIALLY_COPYABLE(T) __is_trivially_copyable(T)
#define PA_PROCESSOR_YELD _mm_pause();
static_assert(sizeof(int8_t) == 1, "size type error");
static_assert(sizeof(int32_t) == 4, "size type error");
static_assert(sizeof(int64_t) == 8, "size type error");
// STORES
ATOMIC_INLINE void AtomicStore8(volatile int8_t* dest, int8_t value) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
__atomic_store_n(dest, value, __ATOMIC_SEQ_CST);
#else
InterlockedExchange8((char volatile*)dest, (char)value);
#endif
}
ATOMIC_INLINE void AtomicStore32(volatile int32_t* dest, int32_t value) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
__atomic_store_n(dest, value, __ATOMIC_SEQ_CST);
#else
InterlockedExchange((long volatile*)dest, (long)value);
#endif
}
ATOMIC_INLINE void AtomicStore64(volatile int64_t* dest, int64_t value) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
__atomic_store_n(dest, value, __ATOMIC_SEQ_CST);
#else
InterlockedExchange64(dest, value);
#endif
}
// LOADS
ATOMIC_INLINE int8_t AtomicLoad8(volatile int8_t* source) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_load_n(source, __ATOMIC_SEQ_CST);
#else
int8_t value;
value = *source;
_Compiler_barrier();
return value;
#endif
}
ATOMIC_INLINE int32_t AtomicLoad32(volatile int32_t* source) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_load_n(source, __ATOMIC_SEQ_CST);
#else
int32_t value;
value = *source;
_Compiler_barrier();
return value;
#endif
}
ATOMIC_INLINE int64_t AtomicLoad64(volatile int64_t *source)
{
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_load_n(source, __ATOMIC_SEQ_CST);
#else
int64_t value;
#if defined(_M_X64)
value = *source;
_Compiler_barrier();
#else
value = static_cast<int64_t>(_InterlockedOr64((volatile long long *)source, 0));
#endif
return value;
#endif
}
// FETCH ADD
ATOMIC_INLINE int8_t AtomicFetchAdd8(volatile int8_t* dest, int8_t value) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_fetch_add(dest, value, __ATOMIC_SEQ_CST);
#else
return InterlockedExchangeAdd8((volatile char*)dest, value);
#endif
}
ATOMIC_INLINE int32_t AtomicFetchAdd32(volatile int32_t* dest, int32_t value) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_fetch_add(dest, value, __ATOMIC_SEQ_CST);
#else
return InterlockedExchangeAdd((volatile long*)dest, value);
#endif
}
ATOMIC_INLINE int64_t AtomicFetchAdd64(volatile int64_t* dest, int64_t value) {
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_fetch_add(dest, value, __ATOMIC_SEQ_CST);
#else
return InterlockedExchangeAdd64((volatile long long*)dest, value);
#endif
}
// COMPARE EXCHANGE
// note: param order are switched on win32 intrinsics
ATOMIC_INLINE bool AtomicCompareExchange8(volatile int8_t* dest, int8_t comparand, int8_t newValue)
{
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_compare_exchange_n(dest, &comparand, newValue, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
#else
int8_t result = InterlockedCompareExchange8((volatile char*)dest, newValue, comparand);
return result == comparand;
#endif
}
ATOMIC_INLINE bool AtomicCompareExchange32(volatile int32_t* dest, int32_t comparand, int32_t newValue)
{
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_compare_exchange_n(dest, &comparand, newValue, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
#else
int32_t result = InterlockedCompareExchange((volatile long*)dest, newValue, comparand);
return result == comparand;
#endif
}
ATOMIC_INLINE bool AtomicCompareExchange64(volatile int64_t* dest, int64_t comparand, int64_t newValue)
{
#ifdef _ATOMIC_HAS_ATOMIC_BUILTINS
return __atomic_compare_exchange_n(dest, &comparand, newValue, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
#else
int64_t result = InterlockedCompareExchange64(dest, newValue, comparand);
return result == comparand;
#endif
}
// SPIN LOCK
typedef int32_t AtomicFlag;
const AtomicFlag AtomicFlagFree = 0;
const AtomicFlag AtomicFlagTaken = 1;
ATOMIC_INLINE void AtomicSpinInit( volatile AtomicFlag* flag )
{
AtomicStore32( flag, AtomicFlagFree );
}
ATOMIC_INLINE void AtomicSpinLock( volatile AtomicFlag* flag )
{
while( AtomicCompareExchange32( flag, AtomicFlagFree, AtomicFlagTaken ) == false )
{
PA_PROCESSOR_YELD;
}
}
ATOMIC_INLINE void AtomicSpinUnLock( volatile AtomicFlag* flag )
{
AtomicStore32( flag, AtomicFlagFree );
}
ATOMIC_INLINE void AtomicCopy( void* dest, const void* source, size_t size, volatile AtomicFlag* flag )
{
AtomicSpinLock( flag );
memcpy( dest, source, size );
AtomicSpinUnLock( flag );
}
#ifdef ATOMIC_CLASS
template<size_t _Size >
struct AtomicImpl
{
AtomicImpl() { AtomicSpinInit(&m_iFlag); }
bool IsLockFree() const { return false; }
protected:
void _Store(void* pDest, const void *pSource) { AtomicCopy(pDest, pSource, _Size, &m_iFlag); }
void _Load(void* pDest, const void *pSource) const { AtomicCopy(pDest, pSource, _Size, &m_iFlag); }
mutable AtomicFlag m_iFlag; //todo: alignas(64) ??
};
template<> struct AtomicImpl<1>
{
bool IsLockFree() const { return true; }
protected:
void _Store(void* pDest, const void *pSource) { AtomicStore8((int8_t*)pDest, *(int8_t*)pSource); }
void _Load(void* pDest, const void *pSource) const { *(int8_t*)pDest = AtomicLoad8((int8_t*)pSource); }
};
template<> struct AtomicImpl<4>
{
bool IsLockFree() const { return true; }
protected:
void _Store(void* pDest, const void *pSource) { AtomicStore32((int32_t*)pDest, *(int32_t*)pSource); }
void _Load(void* pDest, const void *pSource) const { *(int32_t*)pDest = AtomicLoad32((int32_t*)pSource); }
};
template<> struct AtomicImpl<8>
{
bool IsLockFree() const { return true; }
protected:
void _Store(void* pDest, const void *pSource) { AtomicStore64((int64_t*)pDest, *(int64_t*)pSource); }
void _Load(void* pDest, const void *pSource) const { *(int64_t*)pDest = AtomicLoad64((int64_t*)pSource); }
};
template<class T, size_t N>
struct AtomicImplArithmetic : public AtomicImpl<N>
{
protected:
T m_oAtom;
};
// Pointer support aritmethic
template<class T, size_t N>
struct AtomicImplArithmetic<T*, N> : public AtomicImpl<N>
{
T* operator++(int) { return FetchAdd(1); }
T* operator++() { return FetchAdd(1) + 1; }
T* operator--(int) { return FetchAdd(-1); }
T* operator--() { return FetchAdd(-1) - 1; }
// increment by oValue and return pre-incremented value
#ifdef _WIN64
static_assert(sizeof(T*) == 8, "unexpected ptr size");
T* FetchAdd(int64_t oValue) { return (T*)AtomicFetchAdd64((int64_t*)&m_oAtom, oValue); }
#else
static_assert(sizeof(T*) == 4, "unexpected ptr size");
T* FetchAdd(int32_t oValue) { return (T*)AtomicFetchAdd32((int32_t*)&m_oAtom, oValue); }
#endif
protected:
T* m_oAtom;
};
#define ATOMIC_DECL_ARITHMETIC_TYPE( _type, _bits ) template<> struct AtomicImplArithmetic<_type,sizeof(_type)>: public AtomicImpl<sizeof(_type)> { \
_type operator++( int ) { return FetchAdd( 1 ); } \
_type operator++() { return FetchAdd( 1 ) + 1; } \
_type operator--( int ) { return FetchSub( 1 ); } \
_type operator--() { return FetchSub( 1 ) - 1; } \
_type operator+=( const _type& oValue ) { return FetchAdd( oValue ) + oValue; } \
_type operator-=( const _type& oValue ) { return FetchSub( oValue ) - oValue; } \
_type FetchAdd( const _type& oValue ) { return AtomicFetchAdd##_bits( (int##_bits##_t*)&m_oAtom, oValue ); } \
_type FetchSub( const _type& oValue ) { return AtomicFetchAdd##_bits( (int##_bits##_t*)&m_oAtom, 0-oValue ); } \
protected: \
_type m_oAtom; \
};
// Arithmetic only supported on integer type
ATOMIC_DECL_ARITHMETIC_TYPE(int32_t, 32);
ATOMIC_DECL_ARITHMETIC_TYPE(uint32_t, 32);
ATOMIC_DECL_ARITHMETIC_TYPE(int64_t, 64);
ATOMIC_DECL_ARITHMETIC_TYPE(uint64_t, 64);
template<class T>
class Atomic : public AtomicImplArithmetic<T, sizeof(T)>
{
using AtomicImplArithmetic<T, sizeof(T)>::m_oAtom;
typedef AtomicImplArithmetic<T, sizeof(T)> _Impl;
static_assert(PA_IS_TRIVIALLY_COPYABLE(T) == true, "Atomic only supported trivially copyable types");
public:
Atomic() {}
Atomic(const T& oValue) { Store(oValue); }
Atomic& operator=(const T& oValue) { Store(oValue); return *this; }
Atomic& operator=(const Atomic<T>&) = delete;
operator T() const { return Load(); }
void Store(const T& oValue) { _Impl::_Store(&m_oAtom, &oValue); }
T Load() const {
T oResult;
_Impl::_Load( &oResult, &m_oAtom );
return oResult;
}
};
#endif // ATOMIC_CLASS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment