Last active
August 12, 2020 05:44
-
-
Save DBJDBJ/adcd593a07372194a4be936314ad041f to your computer and use it in GitHub Desktop.
yet another allocation deallocation comparing? yes but this time clang shows insane fast time for plain malloc/free ... probaly correctly optimized code?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://docs.microsoft.com/en-us/cpp/parallel/concrt/how-to-use-alloc-and-free-to-improve-memory-performance?view=vs-2019 | |
// allocators.cpp | |
// compile with: /EHsc | |
#include <windows.h> | |
#include <ppl.h> | |
#include <crtdbg.h> | |
#include <iostream> | |
#include <vector> | |
#include "dbj_alloc.h" | |
// DBJ added anonymous namespace | |
namespace { | |
using namespace concurrency; | |
using namespace std; | |
// A type that defines the new and delete operators. These operators | |
// call the global new and delete operators, respectively. | |
struct new_delete final | |
{ | |
static void* operator new(size_t size) | |
{ | |
return ::operator new(size); | |
} | |
static void operator delete(void* p) | |
{ | |
::operator delete(p); | |
} | |
int _data; | |
}; | |
// A type that defines the new and delete operators. These operators | |
// call the C Runtime malloc and free functions, respectively. | |
struct malloc_free final | |
{ | |
static void* operator new(size_t size) | |
{ | |
return malloc(size); | |
} | |
static void operator delete(void* p) | |
{ | |
free(p); | |
} | |
int _data; | |
}; | |
// A type that defines the new and delete operators. These operators | |
// call the Concurrency Runtime Alloc and Free functions, respectively. | |
struct Alloc_Free final | |
{ | |
public: | |
static void* operator new(size_t size) | |
{ | |
return Alloc(size); | |
} | |
static void operator delete(void* p) | |
{ | |
Free(p); | |
} | |
int _data; | |
}; | |
// These operators | |
// call the WIN32 Heap Alloc and Heap Free functions | |
struct dbj_alloc_free final | |
{ | |
public: | |
static void* operator new(size_t size) | |
{ | |
return DBJ_NANO_MALLOC(size); | |
} | |
static void operator delete(void* p) | |
{ | |
DBJ_NANO_FREE(p); | |
} | |
int _data; | |
}; | |
// Calls the provided work function and returns the number of milliseconds | |
// that it takes to call that function. | |
template <class Function> | |
constexpr __int64 time_call(Function&& f) | |
{ | |
__int64 begin = GetTickCount(); | |
f(); | |
return GetTickCount() - begin; | |
} | |
// Exchanges the contents of a[index1] with a[index2]. | |
template<class T> | |
void swap(T* a, int index1, int index2) | |
{ | |
// For illustration, allocate memory from the heap. | |
// This is useful when sizeof(T) is large. | |
T* temp = new T; | |
*temp = a[index1]; | |
a[index1] = a[index2]; | |
a[index2] = *temp; | |
delete temp; | |
} | |
constexpr const int size_ = 5000000; | |
constexpr const int size_half_ = size_ / 2; | |
constexpr const int repeat = 11; | |
// Computes the time that it takes to reverse the elements of a | |
// large array of the specified type. | |
template <typename T> | |
constexpr __int64 reverse_array() | |
{ | |
T* a = new T[size_]; | |
__int64 time = 0; | |
// Repeat the operation several times to amplify the time difference. | |
for (int i = 0; i < repeat; ++i) | |
{ | |
time += time_call([&] { | |
parallel_for(0, size_half_, | |
[&](int index) { | |
swap(a, index, size_ - index - 1); | |
}); | |
}); | |
} | |
delete[] a; | |
return time; | |
} | |
} // anon ns | |
// ----------------------------------------------------------------------------- | |
int wmain() | |
{ | |
#ifdef __clang__ | |
wcout << "\n" << __VERSION__ << "\n\n" ; | |
#else | |
wcout << "\nMSVC " << _MSC_VER << "\n\n"; | |
#endif | |
// Compute the time that it takes to reverse large arrays of | |
// different types. | |
// new_delete | |
wcout << L"Took " << reverse_array<new_delete>() | |
<< " ms with new/delete." << endl; | |
// malloc_free | |
wcout << L"Took " << reverse_array<malloc_free>() | |
<< " ms with malloc/free." << endl; | |
// Alloc_Free | |
wcout << L"Took " << reverse_array<Alloc_Free>() | |
<< " ms with Alloc/Free." << endl; | |
// dbj_alloc_free | |
wcout << L"Took " << reverse_array<dbj_alloc_free>() | |
<< " ms with win32 heap alloc free" << endl; | |
system("@echo ."); | |
system("@echo ."); | |
system("@pause"); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef DBJ_HEAP_ALLOC_INCLUDE | |
#define DBJ_HEAP_ALLOC_INCLUDE | |
/* | |
(c) 2020 by [email protected] CC BY SA 4.0 | |
Also here are the forward declaration to avoid | |
including windows.h | |
*/ | |
#ifdef _WIN32 | |
#define DBJ_NANO_CALLOC(N_, TSIZE_) (T_*)HeapAlloc(GetProcessHeap(), 0, N_ * TSIZE_ ) | |
#define DBJ_NANO_MALLOC(S_)HeapAlloc(GetProcessHeap(), 0, S_) | |
#define DBJ_NANO_FREE(P_) HeapFree(GetProcessHeap(), 0, (void*)P_) | |
#ifndef _INC_WINDOWS | |
#ifdef __cplusplus | |
extern "C" { | |
#endif __cplusplus | |
__declspec(dllimport) void* __stdcall GetProcessHeap(void); | |
__declspec(allocator) void* __stdcall HeapAlloc(void* /*hHeap*/, int /* flags */, size_t /*dwBytes*/); | |
int __stdcall HeapFree(void* /*hHeap*/, int /*dwFlags*/, void* /*lpMem*/); | |
#ifdef __cplusplus | |
} // extern "C" | |
#endif __cplusplus | |
#endif // _INC_WINDOWS | |
#else // not WIN32 | |
/// no WIN32 -- standard allocation | |
#define DBJ_NANO_CALLOC(T_,S_) (T_*)calloc( S_ , sizeof(T_)) | |
#define DBJ_NANO_MALLOC(T_,S_)(T_*)malloc( S_ ) | |
#define DBJ_NANO_FREE(P_) do { assert(P_ != NULL ); if(P_ != NULL) free(P_); P_ = NULL; } while(0) | |
#endif // not WIN32 | |
#endif // DBJ_HEAP_ALLOC_INCLUDE |
It seems line# 101 (the cpp file)is the problem for MSVC malloc/free ... It seems clang optimizes that away during compile time. Oven no optimization is requested.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Machine is 8 GB, i5, WIN10 PRO
Visual Studio 2019 all up to date
Release builds ... This is the results when using MSVC
This is the results when using clang toolset
Other that that no other project setting are touched .. Vanilla build