Created
July 1, 2021 20:36
-
-
Save jakobkogler/6e92105c56317f87bd51e9224f4e2787 to your computer and use it in GitHub Desktop.
Benchmark prime sieve speed between vector<char> and vector<bool>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>> g++ -std=c++20 -Ofast a.cpp && ./a.out | |
N = 30000000: | |
norm: bool: 157 ms 1857859 | |
norm: char: 257 ms 1857859 | |
norm: bool is 1.637 times FASTER | |
seg: bool: 177 ms 1857859 | |
seg: char: 57 ms 1857859 | |
seg: bool is 3.105 times SLOWER | |
seg opt: char: 19 ms 1857859 | |
N = 60000000: | |
norm: bool: 352 ms 3562115 | |
norm: char: 545 ms 3562115 | |
norm: bool is 1.548 times FASTER | |
seg: bool: 370 ms 3562115 | |
seg: char: 150 ms 3562115 | |
seg: bool is 2.467 times SLOWER | |
seg opt: char: 57 ms 3562115 | |
N = 90000000: | |
norm: bool: 645 ms 5216954 | |
norm: char: 812 ms 5216954 | |
norm: bool is 1.259 times FASTER | |
seg: bool: 645 ms 5216954 | |
seg: char: 198 ms 5216954 | |
seg: bool is 3.258 times SLOWER | |
seg opt: char: 68 ms 5216954 | |
N = 200000000: | |
norm: bool: 1576 ms 11078937 | |
norm: char: 2099 ms 11078937 | |
norm: bool is 1.332 times FASTER | |
seg: bool: 1378 ms 11078937 | |
seg: char: 507 ms 11078937 | |
seg: bool is 2.718 times SLOWER | |
seg opt: char: 148 ms 11078937 | |
N = 500000000: | |
norm: bool: 4388 ms 26355867 | |
norm: char: 6034 ms 26355867 | |
norm: bool is 1.375 times FASTER | |
seg: bool: 4020 ms 26355867 | |
seg: char: 1617 ms 26355867 | |
seg: bool is 2.486 times SLOWER | |
seg opt: char: 431 ms 26355867 | |
N = 1000000000: | |
norm: bool: 10243 ms 50847534 | |
norm: char: 11573 ms 50847534 | |
norm: bool is 1.130 times FASTER | |
seg: bool: 8229 ms 50847534 | |
seg: char: 3628 ms 50847534 | |
seg: bool is 2.268 times SLOWER | |
seg opt: char: 810 ms 50847534 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma GCC target("avx,avx2,fma") | |
#pragma GCC optimization("unroll-loops") | |
#include "bits/stdc++.h" | |
using namespace std; | |
template <typename T> | |
int count_primes_seg_opt(int n) { | |
const int S = 30000; | |
int nsqrt = std::round(std::sqrt(n)); | |
std::vector<char> is_prime(nsqrt + 1, true); | |
std::vector<int> sieve_primes, start_indices; | |
for (int i = 3; i <= nsqrt; i += 2) { | |
if (is_prime[i]) { | |
sieve_primes.push_back(i); | |
start_indices.push_back((i * i - 1) / 2); | |
for (int j = i * i; j <= nsqrt; j += 2 * i) | |
is_prime[j] = false; | |
} | |
} | |
int cnt = 1; | |
std::vector<char> block(S); | |
int high = (n - 1) / 2; | |
for (int low = 0; low <= high; low += S) { | |
fill(block.begin(), block.end(), true); | |
for (auto i = 0u; i < sieve_primes.size(); i++) { | |
int p = sieve_primes[i]; | |
int idx = start_indices[i]; | |
for (; idx < S; idx += p) | |
block[idx] = false; | |
start_indices[i] = idx - S; | |
} | |
if (low == 0) | |
block[0] = false; | |
for (int i = 0; i < S && low + i <= high; i++) { | |
if (block[i]) | |
cnt++; | |
} | |
}; | |
return cnt; | |
} | |
template <typename T> | |
int count_primes_seg(int n) { | |
const int S = 10000; | |
vector<int> primes; | |
int nsqrt = sqrt(n); | |
vector<T> is_prime(nsqrt + 1, true); | |
for (int i = 2; i <= nsqrt; i++) { | |
if (is_prime[i]) { | |
primes.push_back(i); | |
for (int j = i * i; j <= nsqrt; j += i) | |
is_prime[j] = false; | |
} | |
} | |
int result = 0; | |
vector<T> block(S); | |
for (int k = 0; k * S <= n; k++) { | |
fill(block.begin(), block.end(), true); | |
int start = k * S; | |
for (int p : primes) { | |
int start_idx = (start + p - 1) / p; | |
int j = max(start_idx, p) * p - start; | |
for (; j < S; j += p) | |
block[j] = false; | |
} | |
if (k == 0) | |
block[0] = block[1] = false; | |
for (int i = 0; i < S && start + i <= n; i++) { | |
if (block[i]) | |
result++; | |
} | |
} | |
return result; | |
} | |
template <typename T> | |
int count_primes(int n) { | |
vector<T> is_prime(n+1, true); | |
is_prime[0] = is_prime[1] = false; | |
for (int i = 2; i * i <= n; i++) { | |
if (is_prime[i]) { | |
for (int j = i * i; j <= n; j += i) | |
is_prime[j] = false; | |
} | |
} | |
int cnt = 0; | |
for (int i = 2; i <= n; i++) { | |
if (is_prime[i]) | |
cnt++; | |
} | |
return cnt; | |
} | |
template <typename T> | |
long long benchmark(int n, string s) | |
{ | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
int res = count_primes<T>(n); | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1); | |
cout << " norm: " << s << ": " << duration.count() << " ms " << res << endl; | |
return duration.count(); | |
} | |
template <typename T> | |
long long benchmark_seg(int n, string s) | |
{ | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
int res = count_primes_seg<T>(n); | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1); | |
cout << " seg: " << s << ": " << duration.count() << " ms " << res << endl; | |
return duration.count(); | |
} | |
template <typename T> | |
long long benchmark_seg_opt(int n, string s) | |
{ | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
int res = count_primes_seg_opt<T>(n); | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1); | |
cout << " seg opt: " << s << ": " << duration.count() << " ms " << res << endl; | |
return duration.count(); | |
} | |
int main() { | |
ios_base::sync_with_stdio(false); | |
cin.tie(nullptr); | |
std::cout << std::fixed << std::setprecision(3); | |
for (int n : {3e7, 6e7, 9e7, 2e8, 5e8, 1e9}) { | |
cout << "N = " << n << ": " << endl; | |
{ | |
long long a = benchmark<bool>(n, "bool"); | |
long long b = benchmark<char>(n, "char"); | |
cout << " norm: bool is "; | |
if (a < b) { | |
cout << ((double)b / a) << " times FASTER" << endl; | |
} else { | |
cout << ((double)a / b) << " times SLOWER" << endl; | |
} | |
} | |
{ | |
long long a = benchmark_seg<bool>(n, "bool"); | |
long long b = benchmark_seg<char>(n, "char"); | |
cout << " seg: bool is "; | |
if (a < b) { | |
cout << ((double)b / a) << " times FASTER" << endl; | |
} else { | |
cout << ((double)a / b) << " times SLOWER" << endl; | |
} | |
} | |
long long b = benchmark_seg_opt<char>(n, "char"); | |
cout << endl; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment