Created
November 10, 2013 01:11
-
-
Save munificent/7392306 to your computer and use it in GitHub Desktop.
An artificial benchmark to get a feel for how much memory layout and data cache misses affect the performance of a typical game loop. I found the numbers pretty astonishing, but I'd definitely appreciate feedback on if I did a decent job on the benchmark here. Try it, tweak it, and tell me what you think. Don't forget to run it in release mode!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <time.h> | |
// This tests a simulated game loop with a bunch of different memory | |
// configurations. You have a bunch of actors. Each actor has a few components. | |
// Each frame every component for every actor is updated. Except for "wrong | |
// order", all components of the same "type" are update for each actor. | |
// In other words, it updates component #1 for every actor, then component #2, | |
// etc. | |
// | |
// Each component has a few fields of state. "Updating" a component just does a | |
// minimal amount of work to read each state. | |
// | |
// The tests cover different ways the actors and components can be organized | |
// in memory to show cache effects. On my machine, output is something like: | |
// | |
// just components 9.4320ms | |
// wrong order 15.5460ms 1.65x | |
// inline actors 15.7990ms 1.68x | |
// order by actors 22.4630ms 2.38x | |
// order by components 13.0540ms 1.38x | |
// random components 89.5480ms 9.49x | |
// random actors 149.9980ms 15.90x | |
// | |
// This is with 4 components and 4 words of state for each component. Varying | |
// those numbers affects the timing. With 8 components and 1 word of state for | |
// each (a pathologically bad setup), I get numbers like: | |
// | |
// wrong order 7.8380ms 1.91x | |
// inline actors 17.7670ms 4.33x | |
// order by actors 44.5870ms 10.87x | |
// order by components 37.3180ms 9.10x | |
// random components 112.7880ms 27.51x | |
// random actors 221.1880ms 53.95x <-- !!! | |
static const int NUM_ACTORS = 1000000; | |
static const int NUM_COMPONENTS = 4; | |
static const int NUM_FIELDS = 4; | |
class Component | |
{ | |
public: | |
Component() | |
{ | |
for (int i = 0; i < NUM_FIELDS; i++) data[i] = i; | |
} | |
int data[NUM_FIELDS]; | |
int update() | |
{ | |
int sum = 0; | |
for (int i = 0; i < NUM_FIELDS; i++) sum += data[i]; | |
return sum; | |
} | |
}; | |
class InlineActor | |
{ | |
public: | |
Component components[NUM_COMPONENTS]; | |
}; | |
class Actor | |
{ | |
public: | |
Component* components[NUM_COMPONENTS]; | |
}; | |
clock_t startTime; | |
void startProfile() | |
{ | |
startTime = clock(); | |
} | |
float endProfile(const char* message) | |
{ | |
float elapsed = (float)(clock() - startTime) * 1000.0f / CLOCKS_PER_SEC; | |
printf("%s%10.4fms\n", message, elapsed); | |
return elapsed; | |
} | |
float endProfile(const char* message, float comparison) | |
{ | |
float elapsed = (float)(clock() - startTime) * 1000.0f / CLOCKS_PER_SEC; | |
printf("%s%10.4fms %6.2fx\n", message, elapsed, elapsed / comparison); | |
return elapsed; | |
} | |
// Get random value in the given range (half-inclusive). | |
int randRange(int m, int n) | |
{ | |
return rand() % (n - m) + m; | |
} | |
// Make sure the code leading to the argument to this call isn't compiled out. | |
void use(long sum) | |
{ | |
if (sum == 123) printf("!"); | |
} | |
int* shuffledArray(int length) | |
{ | |
int* array = new int[length]; | |
for (int i = 0; i < length; i++) array[i] = i; | |
for (int i = 0; i < length; i++) | |
{ | |
int j = randRange(i, length); | |
int t = array[i]; | |
array[i] = array[j]; | |
array[j] = t; | |
} | |
return array; | |
} | |
// Iterate through the components in memory directly instead of going through | |
// actors. This is the best case scenario. | |
float testComponents() | |
{ | |
Component* components = new Component[NUM_ACTORS * NUM_COMPONENTS]; | |
startProfile(); | |
long sum = 0; | |
for (int i = 0; i < NUM_ACTORS * NUM_COMPONENTS; i++) | |
{ | |
sum += components[i].update(); | |
} | |
float elapsed = endProfile(" just components "); | |
use(sum); | |
delete [] components; | |
return elapsed; | |
} | |
// For each actor, update all of its components. Not representative of real | |
// game since games update all components of one type. But gives us a point of | |
// comparison for testInline(). | |
void testWrongOrder(float best) | |
{ | |
InlineActor* actors = new InlineActor[NUM_ACTORS]; | |
startProfile(); | |
long sum = 0; | |
for (int i = 0; i < NUM_ACTORS; i++) | |
{ | |
for (int j = 0; j < NUM_COMPONENTS; j++) | |
{ | |
sum += actors[i].components[j].update(); | |
} | |
} | |
endProfile(" wrong order ", best); | |
use(sum); | |
delete [] actors; | |
} | |
// For each component, update each actor, with components stored inline in the | |
// actor. | |
void testInline(float best) | |
{ | |
InlineActor* actors = new InlineActor[NUM_ACTORS]; | |
startProfile(); | |
long sum = 0; | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
sum += actors[j].components[i].update(); | |
} | |
} | |
endProfile(" inline actors ", best); | |
use(sum); | |
delete [] actors; | |
} | |
// For each component, update each actor. The actor stores pointers to the | |
// components, and the components are ordered in memory by actor then component. | |
void testOrderedByActor(float best) | |
{ | |
Actor* actors = new Actor[NUM_ACTORS]; | |
Component* components = new Component[NUM_ACTORS * NUM_COMPONENTS]; | |
// Wire up the components to the actors. | |
for (int i = 0; i < NUM_ACTORS; i++) | |
{ | |
for (int j = 0; j < NUM_COMPONENTS; j++) | |
{ | |
actors[i].components[j] = &components[i * NUM_COMPONENTS + j]; | |
} | |
} | |
startProfile(); | |
long sum = 0; | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
sum += actors[j].components[i]->update(); | |
} | |
} | |
endProfile(" order by actors ", best); | |
use(sum); | |
delete [] actors; | |
delete [] components; | |
} | |
// For each component, update each actor. The actor stores pointers to the | |
// components, and the components are ordered in memory by component then actor. | |
void testOrderedByComponent(float best) | |
{ | |
Actor* actors = new Actor[NUM_ACTORS]; | |
Component* components = new Component[NUM_ACTORS * NUM_COMPONENTS]; | |
// Wire up the components to the actors. | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
actors[j].components[i] = &components[i * NUM_ACTORS + j]; | |
} | |
} | |
startProfile(); | |
long sum = 0; | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
sum += actors[j].components[i]->update(); | |
} | |
} | |
endProfile("order by components ", best); | |
use(sum); | |
delete [] actors; | |
delete [] components; | |
} | |
// For each component, update each actor. The actor stores pointers to the | |
// components, and the components are in random order. | |
void testRandomComponents(float best) | |
{ | |
Actor* actors = new Actor[NUM_ACTORS]; | |
Component* components = new Component[NUM_ACTORS * NUM_COMPONENTS]; | |
int* indexes = shuffledArray(NUM_ACTORS * NUM_COMPONENTS); | |
// Wire up the components to the actors. | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
actors[j].components[i] = &components[indexes[i * NUM_ACTORS + j]]; | |
} | |
} | |
delete [] indexes; | |
startProfile(); | |
long sum = 0; | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
sum += actors[j].components[i]->update(); | |
} | |
} | |
endProfile(" random components ", best); | |
use(sum); | |
delete [] actors; | |
delete [] components; | |
} | |
// For each component, update each actor. The list of actors is pointers to | |
// actors that are in random order in memory. Each actor points to components | |
// that are in random order in memory. | |
// | |
// This is a realistic and also worst case. | |
void testRandomActors(float best) | |
{ | |
Actor* actors = new Actor[NUM_ACTORS]; | |
Component* components = new Component[NUM_ACTORS * NUM_COMPONENTS]; | |
int* componentIndexes = shuffledArray(NUM_ACTORS * NUM_COMPONENTS); | |
// Wire up the components to the actors. | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
actors[j].components[i] = &components[componentIndexes[i * NUM_ACTORS + j]]; | |
} | |
} | |
delete [] componentIndexes; | |
// Fill the list of actors. | |
Actor** actorList = new Actor*[NUM_ACTORS]; | |
int* actorIndexes = shuffledArray(NUM_ACTORS); | |
for (int i = 0; i < NUM_ACTORS; i++) | |
{ | |
actorList[i] = &actors[actorIndexes[i]]; | |
} | |
delete [] actorIndexes; | |
startProfile(); | |
long sum = 0; | |
for (int i = 0; i < NUM_COMPONENTS; i++) | |
{ | |
for (int j = 0; j < NUM_ACTORS; j++) | |
{ | |
sum += actorList[j]->components[i]->update(); | |
} | |
} | |
endProfile(" random actors ", best); | |
use(sum); | |
delete [] actors; | |
delete [] components; | |
delete [] actorList; | |
} | |
int main(int argc, const char * argv[]) | |
{ | |
for (int i = 0; i < 6; i++) | |
{ | |
float components = testComponents(); | |
testWrongOrder(components); | |
testInline(components); | |
testOrderedByActor(components); | |
testOrderedByComponent(components); | |
testRandomComponents(components); | |
testRandomActors(components); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment