Last active
July 20, 2018 21:32
-
-
Save charmoniumQ/618d3cd18f8bdcfc6cdc910faf362849 to your computer and use it in GitHub Desktop.
Malloc performance test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.6 | |
import numpy as np | |
import re | |
data = {} | |
run_envs = ['linux', 'nautk'] | |
series = ['mem', 'chunks', 'time'] | |
files = {'linux': 'data/linux_run_mem2.out', 'nautk': 'data/nautk_run_mem2.out'} | |
data_pattern = re.compile(r'(\d*),(\d*),(\d*)') | |
data = {} | |
for run_env in run_envs: | |
data[run_env] = {} | |
data[run_env]['mem'] = [] | |
data[run_env]['chunks'] = [] | |
data[run_env]['time'] = [] | |
with open(files[run_env]) as fil: | |
for line in fil: | |
m = data_pattern.match(line) | |
if m: | |
mem, chunks, time = map(int, m.groups()) | |
data[run_env]['mem'].append(mem) | |
data[run_env]['chunks'].append(chunks) | |
data[run_env]['time'].append(time) | |
data[run_env]['xs'] = np.vstack((data[run_env]['chunks'], | |
data[run_env]['mem' ])).T | |
data[run_env]['time'] = np.array(data[run_env]['time']) | |
def aggregate(xs, ys, fn): | |
unique_x = sorted(list(frozenset(map(tuple, xs)))) | |
result = [] | |
for x in unique_x: | |
matching = (xs == x).all(axis=1) | |
result.append(fn(ys[matching])) | |
unique_x = np.array(unique_x) | |
return unique_x, np.array(result) | |
from scipy.interpolate import interp2d | |
xy_grid, linux_ts = aggregate(data['linux']['xs'], data['linux']['time'], np.mean) | |
linux_ts = np.log2(linux_ts) | |
xy_grid = np.log2(xy_grid) | |
chunks = xy_grid[:, 0] | |
mem = xy_grid[:, 1] | |
linux_f = interp2d(chunks, mem, linux_ts, kind='linear') | |
xy_grid_, nautk_ts = aggregate(data['nautk']['xs'], data['nautk']['time'], np.mean) | |
nautk_ts = np.log2(nautk_ts) | |
nautk_f = interp2d(chunks, mem, nautk_ts, kind='linear') | |
v_chunks = np.linspace(np.min(chunks), np.max(chunks), 100) | |
v_mem = np.linspace(np.min(mem ), np.max(mem ), 100) | |
tmax = max(linux_ts.max(), nautk_ts.max()) | |
tmin = min(linux_ts.min(), nautk_ts.min()) | |
from bokeh.models import ColumnDataSource, Legend, ColorBar, LinearColorMapper, Plot | |
from bokeh.plotting import figure, curdoc | |
from bokeh.layouts import widgetbox, column, row, gridplot | |
from bokeh.models.widgets import Slider, RadioButtonGroup, Div, Paragraph, PreText | |
from bokeh.document import Document | |
width = 350 | |
height = 220 | |
chunks_fig = figure(plot_width=width, plot_height=height, x_range=(v_chunks.min(), v_chunks.max()), y_range=(tmin, tmax)) | |
chunks_linux_line = chunks_fig.line(x=[], y=[], color='red' , legend='linux') | |
chunks_nautk_line = chunks_fig.line(x=[], y=[], color='blue', legend='nautk') | |
chunks_line = chunks_fig.line(x=[], y=[], color='gray', alpha=0.5) | |
chunks_fig.xaxis.axis_label = 'log num_chunks' | |
chunks_fig.yaxis.axis_label = 'log time' | |
chunks_fig.legend.location = 'top_left' | |
mem_fig = figure(plot_width=width, plot_height=height, y_range=(v_mem.min(), v_mem.max()), x_range=(tmin, tmax)) | |
mem_linux_line = mem_fig.line(x=[], y=[], color='red' ) | |
mem_nautk_line = mem_fig.line(x=[], y=[], color='blue') | |
mem_line = mem_fig.line(x=[], y=[], color='gray', alpha=0.5) | |
mem_fig.yaxis.axis_label = 'log memory' | |
mem_fig.xaxis.axis_label = 'log time' | |
center_fig = figure(plot_width=width, plot_height=height, x_range=(v_chunks.min(), v_chunks.max()), y_range=(v_mem.min(), v_mem.max())) | |
color_mapper = LinearColorMapper(palette='Viridis256', low=tmin, high=tmax) | |
image = center_fig.image([], | |
x=v_chunks.min(), | |
y=v_mem.min(), | |
dw=v_chunks.max() - v_chunks.min(), | |
dh=v_mem.max() - v_mem.min(), | |
color_mapper=color_mapper, | |
) | |
center_fig.xaxis.axis_label = 'log num_chunks' | |
center_fig.yaxis.axis_label = 'log mem' | |
mem_line2 = center_fig.line(x=[], y=[], color='gray') | |
chunks_line2 = center_fig.line(x=[], y=[], color='gray') | |
center_fig.circle( | |
x=xy_grid[:, 0], | |
y=xy_grid[:, 1], | |
color='red', | |
size=1 | |
) | |
color_bar = ColorBar(color_mapper=color_mapper) | |
color_bar.orientation = 'horizontal' | |
color_bar.width = width | |
cb_height = 80 | |
color_bar.width = int(0.75 * width) | |
color_bar.height = 20 | |
color_bar.title = 'log time' | |
import bokeh.palettes | |
def change_image(_, __, button_i): | |
if buttons[button_i] == 'linux': | |
data = linux_f(v_chunks, v_mem) | |
color_mapper.palette = bokeh.palettes.Viridis[256] | |
color_mapper.low = tmin | |
color_mapper.high = tmax | |
image.data_source.data = { | |
'image': [data], | |
} | |
elif buttons[button_i] == 'nautk': | |
data = nautk_f(v_chunks, v_mem) | |
color_mapper.palette = bokeh.palettes.Viridis[256] | |
color_mapper.low = tmin | |
color_mapper.high = tmax | |
image.data_source.data = { | |
'image': [data], | |
} | |
elif buttons[button_i] == 'linux / nautk': | |
data = linux_f(v_chunks, v_mem) - nautk_f(v_chunks, v_mem) | |
color_mapper.palette = bokeh.palettes.RdBu[11] | |
color_mapper.low = min( data.min(), -data.max()) | |
color_mapper.high = max(-data.min(), data.max()) | |
image.data_source.data = { | |
'image': [data], | |
} | |
def change_mem(_, __, c_mem): | |
if c_mem: | |
chunks_linux_line.data_source.data = { | |
'x': v_chunks, | |
'y': linux_f(v_chunks, c_mem), | |
} | |
chunks_nautk_line.data_source.data = { | |
'x': v_chunks, | |
'y': nautk_f(v_chunks, c_mem), | |
} | |
mem_line.data_source.data = { | |
'x': [0, tmax], | |
'y': [c_mem, c_mem], | |
} | |
mem_line2.data_source.data = { | |
'x': [v_chunks.min(), v_chunks.max()], | |
'y': [c_mem, c_mem], | |
} | |
def change_chunks(_, __, c_chunks): | |
if c_chunks: | |
mem_linux_line.data_source.data = { | |
'x': linux_f(c_chunks, v_mem), | |
'y': v_mem, | |
} | |
mem_nautk_line.data_source.data = { | |
'x': nautk_f(c_chunks, v_mem), | |
'y': v_mem, | |
} | |
chunks_line.data_source.data = { | |
'x': [c_chunks, c_chunks], | |
'y': [0, tmax], | |
} | |
chunks_line2.data_source.data = { | |
'x': [c_chunks, c_chunks], | |
'y': [v_mem.min(), v_mem.max()], | |
} | |
chunks_slider = Slider(start=v_chunks.min(), end=v_chunks.max(), value=0, step=1) | |
chunks_slider.on_change('value', change_chunks) | |
chunks_slider.value = v_chunks.mean() | |
top_cell = column(chunks_fig, widgetbox(chunks_slider)) | |
mem_slider = Slider(start=v_mem.min(), end=v_mem.max(), value=0, step=1) | |
mem_slider.on_change('value', change_mem) | |
mem_slider.value = v_mem.mean() | |
mem_slider.orientation = 'vertical' | |
mem_slider.direction = 'rtl' | |
mem_slider.height = int(height * 0.6) | |
right_cell = row(widgetbox(mem_slider, width=90, height=mem_slider.height), mem_fig) | |
buttons = ["linux", "nautk", "linux / nautk"] | |
center_buttons = RadioButtonGroup( | |
labels=buttons, | |
) | |
center_buttons.on_change('active', change_image) | |
center_buttons.active = 0 | |
control_fig = figure(plot_width=width, height=cb_height) | |
# color_bar.margin = 0 | |
# color_bar.padding = 0 | |
# control_fig.min_border = 0 | |
color_bar.border_line_alpha = 0 | |
color_bar.location = (0,0) | |
control_fig.add_layout(color_bar) | |
control_cell = column(control_fig, center_buttons) | |
description = Div(text=""" | |
<h2>How to read this graph</h2> | |
<p>This shows the <i>time</i> (in CPU cycles) to allocate <i>num_chunks</i> equal chunks whose combined size is <i>memory</i> (in bytes). All axes are in log2 space, so a value of 23 means 8Mi.</p> | |
<p><b>The center</b> shows both parameter sampled simultaneously on a 2d grid. The red dots is where the actual samples are. Use the radio-buttons to select which value is plotted on that chart. <i>linux / nautk</i> shows the difference in log-space, i.e. <code>log2(linux_time) - log2(nautk_time)</code>.</p> | |
<p><b>The side-plots</b> show a 1d slice of the 2d plot, where one parameter is fixed and the other moves. Move the sliders with your mouse or the left/right cursor keys to changed the fixed-value. The right side-plot is 'sideways' with <i>memory</i> on the vertical axis and <i>time</i> on the horizontal one.</p> | |
<h2>The experiment</h2> | |
<ul> | |
<li>I use <code>rdtsc</code> to count cycles.</li> | |
<li>I use <code>-O3</code> and <code>GCC 6.3.1</code> to compile both versions.</li> | |
<li>I average over 10 repetitions.</li> | |
</ul> | |
<pre><code> | |
uint64_t start = 0; rdtsc(start); | |
for(size_t i = 0; i < num_chunks; ++i) { | |
try_nnull(allocs[i] = malloc(chunk_size)); | |
} | |
for(size_t i = 0; i < num_chunks; ++i) { | |
free(allocs[i]); | |
} | |
uint64_t stop = 0; rdtsc(stop); | |
</code></pre> | |
<h2><a href="https://gist.github.com/charmoniumQ/618d3cd18f8bdcfc6cdc910faf362849">full code</a></h2> | |
""") | |
#layout = gridplot([description], [top_cell, None], [center_fig, right_cell], [control_cell, None], sizing_mode='scale_width', plot_width=int(width * 2.2)) | |
layout = gridplot([top_cell, None], [center_fig, right_cell], [control_cell, None]) | |
curdoc().add_root(gridplot([layout], [description], sizing_mode='scale_width')) | |
from pathlib import Path | |
title = Path(__file__).name[:-3].replace('_',' ').capitalize() | |
curdoc().title = title |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef NDEBUG | |
#warning Compiling with diagnostics | |
#define try(call) \ | |
int ret = (call); \ | |
if(!ret) { \ | |
printf("%s:%d: %s returned %d\n", \ | |
__FILE__, __LINE__, #call, ret); \ | |
return -1; \ | |
} | |
#define try_nnull(call) try((call) != NULL) | |
#define try_zero(call) try(!(call)) | |
#else | |
#define try(call) (call) | |
#define try_nnull(call) (call) | |
#define try_zero(call) (call) | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef MAIN_H | |
#define MAIN_H | |
#ifdef __NAUTILUS__ | |
void app_main(); | |
#endif | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define rdtsc(val) \ | |
{ \ | |
uint32_t hi, lo; \ | |
__asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); \ | |
*(uint64_t*) &val = (((uint64_t) hi) << 32) | lo; \ | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifdef __NAUTILUS__ | |
#include <nautilus/libccompat.h> | |
#else | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#endif | |
#include "app/main.h" | |
#include "app/error.h" | |
#include "app/rdtsc.h" | |
int malloc_time_test(size_t chunk_size, size_t num_chunks, uint64_t* result) { | |
void** allocs = malloc(num_chunks * sizeof(void*)); | |
uint64_t start = 0; rdtsc(start); | |
for(size_t i = 0; i < num_chunks; ++i) { | |
try_nnull(allocs[i] = malloc(chunk_size)); | |
} | |
for(size_t i = 0; i < num_chunks; ++i) { | |
free(allocs[i]); | |
} | |
uint64_t stop = 0; rdtsc(stop); | |
free(allocs); | |
*result = (uint64_t) (stop - start); | |
return 0; | |
} | |
int mem_test_main() { | |
const size_t kilo = 1024 * 1 ; | |
const size_t mega = 1024 * kilo; | |
const size_t giga = 1024 * mega; | |
const size_t mem_step = 2; | |
const size_t mem_min = 1 * mega; | |
const size_t mem_max = 8 * giga; | |
const size_t num_chunks_step = 2; | |
const size_t num_chunks_min = 1; | |
const size_t num_chunks_max = 128 * mega; | |
const size_t num_reps = 10; | |
printf("total mem (bytes),chunks,time (cycles)\n"); | |
for(size_t rep = 0; rep < num_reps; ++rep) { | |
for(size_t mem = mem_min; mem <= mem_max; mem *= mem_step) { | |
for(size_t num_chunks = num_chunks_min; | |
num_chunks <= num_chunks_max && num_chunks <= mem / 4; | |
num_chunks *= num_chunks_step) { | |
size_t chunk_size = mem / num_chunks; | |
uint64_t time = 0; | |
try_zero(malloc_time_test(chunk_size, num_chunks, &time)); | |
printf("%ld,%lu,%lu\n", chunk_size * num_chunks, num_chunks, time); | |
fflush(stdout); | |
} | |
} | |
} | |
return 0; | |
} | |
#ifdef __NAUTILUS__ | |
void app_main() { | |
mem_test_main(); | |
} | |
#else | |
int main() { | |
try_zero(mem_test_main()); | |
return 0; | |
} | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#CC = clang | |
CFLAGS += -Wall -Wextra -I./include/ -std=gnu99 | |
CFLAGS_DBG = -Og -g | |
CFLAGS_OPT = -O3 -DNDEBUG | |
main: main.out | |
cp main.out main | |
main.out: main.c | |
$(CC) $(CFLAGS) $(CFLAGS_OPT) -o $@ $< | |
main.out_dbg: main.c | |
$(CC) $(CFLAGS) $(CFLAGS_DBG) -o $@ $< | |
run: main.out | |
./main.out | |
.PHONY: out | |
debug: main.out_dbg | |
gdb -quiet ./main.out_dbg -ex r | |
.PHONY: debug | |
memtest: main.out_dbg | |
valgrind ./main.out_dbg | |
.PHONY: memtest | |
clean: | |
$(RM) main.out main.out_dbg |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CFLAGS += -Wall -Wextra -O3 -DNDEBUG | |
obj-y += main.o |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment