-
-
Save Veedrac/d25148faf20669589993 to your computer and use it in GitHub Desktop.
#include <iostream> | |
#include <iterator> | |
#include <numeric> | |
#include <unordered_set> | |
#include <vector> | |
int32_t some_calculations(int32_t number) { | |
std::vector<int32_t> a; | |
std::unordered_set<int32_t> s; | |
// This is the fraction that PyPy uses: | |
// http://stackoverflow.com/q/25968487/1763356 | |
// | |
// You can go faster by making this even smaller, | |
// but I'm already letting C++ use 32 bit integers | |
// and calling reserve on the vector. | |
s.max_load_factor(2./3); | |
a.reserve(number); | |
int32_t x = 0; | |
for (int32_t i=0; i<number; ++i) { | |
x += i; | |
int item = i%2 ? -x : x; | |
s.insert(item); | |
a.emplace_back(item); | |
} | |
int32_t tot = 0; | |
for (auto x=std::begin(a); x != std::end(a); ++x) { | |
for (auto y=std::next(x); y != std::end(a); ++y) { | |
if (-(*x+*y) != *x && -(*x+*y) != *y && s.find(-(*x+*y)) != std::end(s)) { | |
++tot; | |
} | |
} | |
} | |
return tot / 3; | |
} | |
int main(int, char **) { | |
int32_t tot = 0; | |
for (int i=0; i<500; ++i) { | |
tot += some_calculations(i); | |
} | |
std::cout << tot << std::endl; | |
} |
def some_calculations(number): | |
a = [] | |
x = 0 | |
for i in range(number): | |
x += i; | |
a.append(-x if i%2 else x); | |
s = set(a) | |
tot = 0 | |
for i, x in enumerate(a): | |
for y in a[i+1:]: | |
if -(x + y) not in (x, y) and -(x + y) in s: | |
tot += 1 | |
return tot // 3 | |
print(sum(map(some_calculations, range(500)))) |
""" | |
Graph the time to run the input commands. | |
Usage: | |
plot_times.py <n> <command>... | |
""" | |
import docopt | |
import numpy | |
import resource | |
import seaborn | |
import shlex | |
import subprocess | |
import sys | |
from matplotlib import pyplot | |
options = docopt.docopt(__doc__) | |
try: | |
repeats = int(options["<n>"]) | |
except ValueError: | |
print("<n> has to be an integer.") | |
raise SystemExit | |
datas = [] | |
# Time | |
for raw_command in options["<command>"]: | |
command = shlex.split(raw_command) | |
data = [resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime] | |
for i in range(repeats): | |
print("\r{}: {} of {}".format(raw_command, i+1, repeats), end="") | |
sys.stdout.flush() | |
subprocess.check_output(command) | |
data.append(resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime) | |
print() | |
datas.append(data) | |
# Plot | |
figure = pyplot.figure(figsize=(24, 6)) | |
seaborn.set(style="white", font_scale=2) | |
for command, data in zip(options["<command>"], datas): | |
times = numpy.diff(data) | |
seaborn.distplot( | |
times, | |
label=command, | |
bins=len(set(times.round(2))), | |
norm_hist=True, | |
kde_kws={"clip": (min(times), max(times))}, | |
hist_kws={"histtype": "stepfilled", "alpha": 0.2} | |
) | |
seaborn.despine() | |
pyplot.title("Time to run") | |
pyplot.legend() | |
figure.savefig("time_to_run.png") |
/Ofast flag should be used for the best speed instead of /O3: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html Also there're tens of those flags, we should either compare nonchanged STL version with no modification to python wih no imports and all options left default, or versions as heavily optimized as possible. We've chosen the second way, so we should try that variety of compiler flags both for C++ and Python and to try nonstandard classes implementations if used - STL is notorious for its bad implementations. There should probably be some extra flags for optimizing calculations with floating point numbers and division not included in Ofast, though I'm not sure.
Looking at stats make me supposing the bottleneck is somewhere in wrong/not optimized instructions chosen for the platform by compiler/STL, because all stats are much better for C++, but cycles are worse, it means some instructions took extra cycles. But those theories should be checked by profiling of cause.
P.S. I don't say Python is bad, I love how you make an approximately same speed with much much less effort.
I know that this gist is old, but why didn't yo use the optimized C++ compilation?