Skip to content

Instantly share code, notes, and snippets.

@DBJDBJ
Last active August 12, 2020 05:44
Show Gist options
  • Save DBJDBJ/adcd593a07372194a4be936314ad041f to your computer and use it in GitHub Desktop.
Save DBJDBJ/adcd593a07372194a4be936314ad041f to your computer and use it in GitHub Desktop.
yet another allocation deallocation comparing? yes but this time clang shows insane fast time for plain malloc/free ... probaly correctly optimized code?
// https://docs.microsoft.com/en-us/cpp/parallel/concrt/how-to-use-alloc-and-free-to-improve-memory-performance?view=vs-2019
// allocators.cpp
// compile with: /EHsc
#include <windows.h>
#include <ppl.h>
#include <crtdbg.h>
#include <iostream>
#include <vector>
#include "dbj_alloc.h"
// DBJ added anonymous namespace
namespace {
using namespace concurrency;
using namespace std;
// A type that defines the new and delete operators. These operators
// call the global new and delete operators, respectively.
struct new_delete final
{
static void* operator new(size_t size)
{
return ::operator new(size);
}
static void operator delete(void* p)
{
::operator delete(p);
}
int _data;
};
// A type that defines the new and delete operators. These operators
// call the C Runtime malloc and free functions, respectively.
struct malloc_free final
{
static void* operator new(size_t size)
{
return malloc(size);
}
static void operator delete(void* p)
{
free(p);
}
int _data;
};
// A type that defines the new and delete operators. These operators
// call the Concurrency Runtime Alloc and Free functions, respectively.
struct Alloc_Free final
{
public:
static void* operator new(size_t size)
{
return Alloc(size);
}
static void operator delete(void* p)
{
Free(p);
}
int _data;
};
// These operators
// call the WIN32 Heap Alloc and Heap Free functions
struct dbj_alloc_free final
{
public:
static void* operator new(size_t size)
{
return DBJ_NANO_MALLOC(size);
}
static void operator delete(void* p)
{
DBJ_NANO_FREE(p);
}
int _data;
};
// Calls the provided work function and returns the number of milliseconds
// that it takes to call that function.
template <class Function>
constexpr __int64 time_call(Function&& f)
{
__int64 begin = GetTickCount();
f();
return GetTickCount() - begin;
}
// Exchanges the contents of a[index1] with a[index2].
template<class T>
void swap(T* a, int index1, int index2)
{
// For illustration, allocate memory from the heap.
// This is useful when sizeof(T) is large.
T* temp = new T;
*temp = a[index1];
a[index1] = a[index2];
a[index2] = *temp;
delete temp;
}
constexpr const int size_ = 5000000;
constexpr const int size_half_ = size_ / 2;
constexpr const int repeat = 11;
// Computes the time that it takes to reverse the elements of a
// large array of the specified type.
template <typename T>
constexpr __int64 reverse_array()
{
T* a = new T[size_];
__int64 time = 0;
// Repeat the operation several times to amplify the time difference.
for (int i = 0; i < repeat; ++i)
{
time += time_call([&] {
parallel_for(0, size_half_,
[&](int index) {
swap(a, index, size_ - index - 1);
});
});
}
delete[] a;
return time;
}
} // anon ns
// -----------------------------------------------------------------------------
int wmain()
{
#ifdef __clang__
wcout << "\n" << __VERSION__ << "\n\n" ;
#else
wcout << "\nMSVC " << _MSC_VER << "\n\n";
#endif
// Compute the time that it takes to reverse large arrays of
// different types.
// new_delete
wcout << L"Took " << reverse_array<new_delete>()
<< " ms with new/delete." << endl;
// malloc_free
wcout << L"Took " << reverse_array<malloc_free>()
<< " ms with malloc/free." << endl;
// Alloc_Free
wcout << L"Took " << reverse_array<Alloc_Free>()
<< " ms with Alloc/Free." << endl;
// dbj_alloc_free
wcout << L"Took " << reverse_array<dbj_alloc_free>()
<< " ms with win32 heap alloc free" << endl;
system("@echo .");
system("@echo .");
system("@pause");
}
#ifndef DBJ_HEAP_ALLOC_INCLUDE
#define DBJ_HEAP_ALLOC_INCLUDE
/*
(c) 2020 by [email protected] CC BY SA 4.0
Also here are the forward declaration to avoid
including windows.h
*/
#ifdef _WIN32
#define DBJ_NANO_CALLOC(N_, TSIZE_) (T_*)HeapAlloc(GetProcessHeap(), 0, N_ * TSIZE_ )
#define DBJ_NANO_MALLOC(S_)HeapAlloc(GetProcessHeap(), 0, S_)
#define DBJ_NANO_FREE(P_) HeapFree(GetProcessHeap(), 0, (void*)P_)
#ifndef _INC_WINDOWS
#ifdef __cplusplus
extern "C" {
#endif __cplusplus
__declspec(dllimport) void* __stdcall GetProcessHeap(void);
__declspec(allocator) void* __stdcall HeapAlloc(void* /*hHeap*/, int /* flags */, size_t /*dwBytes*/);
int __stdcall HeapFree(void* /*hHeap*/, int /*dwFlags*/, void* /*lpMem*/);
#ifdef __cplusplus
} // extern "C"
#endif __cplusplus
#endif // _INC_WINDOWS
#else // not WIN32
/// no WIN32 -- standard allocation
#define DBJ_NANO_CALLOC(T_,S_) (T_*)calloc( S_ , sizeof(T_))
#define DBJ_NANO_MALLOC(T_,S_)(T_*)malloc( S_ )
#define DBJ_NANO_FREE(P_) do { assert(P_ != NULL ); if(P_ != NULL) free(P_); P_ = NULL; } while(0)
#endif // not WIN32
#endif // DBJ_HEAP_ALLOC_INCLUDE
@DBJDBJ
Copy link
Author

DBJDBJ commented Aug 12, 2020

It seems line# 101 (the cpp file)is the problem for MSVC malloc/free ... It seems clang optimizes that away during compile time. Oven no optimization is requested.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment