Last active
December 20, 2017 23:17
-
-
Save whizzter/6c9ae04f9f3b7c815aca56a471d44ab9 to your computer and use it in GitHub Desktop.
Comparing performance of polymorphic member accessors to plain struct accessing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cl /Ox /O2 /EHsc /Fetpp.exe /DPLAINPROP t.cpp | |
cl /Ox /O2 /EHsc /Fetvirt.exe /DVIRT t.cpp | |
cl /Ox /O2 /EHsc /Fetvoff.exe /DVOFF t.cpp |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tpp 1000 1000000 | |
tpp 1000000 1000 | |
tvirt 1000 1000000 | |
tvirt 1000000 1000 | |
tvoff 1000 1000000 | |
tvoff 1000000 1000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Compile with a define like PLAINPROP VIRT or VOFF to enable different runtime behaviours. | |
// then run the benchmark like ./a.out 1000000 1000 | |
// this runs 1000 iterations over a million elements and times it. | |
#include <stdio.h> | |
#include <time.h> | |
#include <stdint.h> | |
#include <stddef.h> | |
#include <stdlib.h> | |
#include <vector> | |
#ifdef PLAINPROP | |
// This is the baseline case, just the same plain struct to get the baseline time spent in the code. | |
struct C { | |
int a; | |
int b; | |
inline int& ra() { return a; } | |
inline int& rb() { return b; } | |
}; | |
typedef C A; | |
typedef C B; | |
#define RA(x) ((x)->ra()) | |
#define RB(x) ((x)->rb()) | |
#endif | |
#ifdef VIRT | |
// This is using the compiler built in VTables for dispatching to measure the overhead of property accesses through them. | |
struct C { | |
virtual int& ra() =0; | |
virtual int& rb() =0; | |
}; | |
struct A : C { | |
int a; | |
int b; | |
inline int& ra() { return a; } | |
inline int& rb() { return b; } | |
}; | |
struct B : C { | |
int b; | |
int a; | |
inline int& ra() { return a; } | |
inline int& rb() { return b; } | |
}; | |
#define RA(x) ((x)->ra()) | |
#define RB(x) ((x)->rb()) | |
#endif | |
#ifdef VOFF | |
// Using a virutal offset table going directly to the properties, no compiler assisted vtables but manual offset calcs. | |
struct C { | |
size_t *voff; | |
int data[2]; | |
}; | |
size_t aoff[2]={offsetof(C,data[0]),offsetof(C,data[1])}; | |
size_t boff[2]={offsetof(C,data[1]),offsetof(C,data[0])}; | |
struct A : C { | |
A() { | |
voff=aoff; | |
} | |
}; | |
struct B : C { | |
B() { | |
voff=boff; | |
} | |
}; | |
#define RA(x) (*(int*)( ((char*)(x)) + (x->voff[0]) )) | |
#define RB(x) (*(int*)( ((char*)(x)) + (x->voff[1]) )) | |
#endif | |
// This function builds an output array of pointers that are interleaved from input vectors al and bl | |
std::vector<C*> init(int count,std::vector<A> &al,std::vector<B> &bl) { | |
std::vector<C*> out; | |
// the input data for calculations, the pairs is a multiplication followed by a downshit to cancel eachother out. | |
int tab[]={2,1,16,4}; | |
// initialize the array | |
for (int i=0;i<count;i++) { | |
// do interleaved adding of pointers to elements in each of the vectors. | |
if (i&1) { | |
out.push_back(&bl[i>>1]); | |
} else { | |
out.push_back(&al[i>>1]); | |
} | |
// now set the A and B properties of each element via the macro accessor. | |
RA(out[i])=tab[(i&2)+0]; | |
RB(out[i])=tab[(i&2)+1]; | |
} | |
return std::move(out); | |
} | |
int main(int argc,char **argv) { | |
// we require an element count and a number of times to run the algo as input arguments. | |
if (argc<3) | |
return -1; | |
int count=atoi(argv[1]); | |
int times=atoi(argv[2]); | |
if (count<=0 || times<=0) | |
return -1; | |
// storage of arrays of each element type. | |
std::vector<A> al(count); | |
std::vector<B> bl(count); | |
// produce an output array with pointers to both arrays | |
std::vector<C*> cl=init(count,al,bl); | |
// just a value to mutate through the calculations (it should come out unchanged) | |
int val=100; | |
// time us. | |
time_t be=clock(); | |
for (int i=0;i<times;i++) { | |
// the inner loop goes through the list | |
for (int j=0;j<count;j++) { | |
// gets a pointer | |
auto ra=cl[j]; | |
// multiplie the value by A and then downshifts by B (the 2 should cancel out) | |
val= (val*RA(ra)) >> RB(ra); | |
} | |
} | |
time_t en=clock(); | |
printf("Time taken:%f res:%d\n",(en-be)/(double)CLOCKS_PER_SEC,val); | |
// debug prints to get the A and B offsets of the 3 first elements. | |
// for a plain array they are the same but should be mixed for the polymorphic cases. | |
for (int i=0;i<3;i++) { | |
ptrdiff_t base=(ptrdiff_t)(cl[i]); | |
ptrdiff_t ao=(ptrdiff_t)&RA(cl[i]); | |
ptrdiff_t bo=(ptrdiff_t)&RB(cl[i]); | |
printf("[%d.A: %d] [%d.A %d]\n",i,(int)(ao-base),i,(int)(bo-base)); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment