jakobkogler · July 1, 2021 19:59 · jxu · Oct 16, 2021 · jakobkogler · Oct 17, 2021
diff --git a/benchmark_output.txt b/benchmark_output.txt
 >> g++ -std=c++20 -Ofast benchmark_prime_sieve.cpp && ./a.out
 N = 30000000: 
    norm: bool: 106 ms
    norm: char: 235 ms
    norm: bool is 2.217 times FASTER
    seg:  bool: 147 ms
    seg:  char: 38 ms
    seg:  bool is 3.868 times SLOWER

 N = 60000000: 
    norm: bool: 300 ms
    norm: char: 527 ms
    norm: bool is 1.757 times FASTER
    seg:  bool: 311 ms
    seg:  char: 87 ms
    seg:  bool is 3.575 times SLOWER

 N = 90000000: 
    norm: bool: 498 ms
    norm: char: 811 ms
    norm: bool is 1.629 times FASTER
    seg:  bool: 488 ms
    seg:  char: 143 ms
    seg:  bool is 3.413 times SLOWER

 N = 200000000: 
    norm: bool: 1305 ms
    norm: char: 1987 ms
    norm: bool is 1.523 times FASTER
    seg:  bool: 1164 ms
    seg:  char: 380 ms
    seg:  bool is 3.063 times SLOWER

 N = 500000000: 
    norm: bool: 3731 ms
    norm: char: 5210 ms
    norm: bool is 1.396 times FASTER
    seg:  bool: 3268 ms
    seg:  char: 1214 ms
    seg:  bool is 2.692 times SLOWER

 N = 1000000000: 
    norm: bool: 7868 ms
    norm: char: 11066 ms
    norm: bool is 1.406 times FASTER
    seg:  bool: 7273 ms
    seg:  char: 3039 ms
    seg:  bool is 2.393 times SLOWER
diff --git a/benchmark_prime_sieve.cpp b/benchmark_prime_sieve.cpp
 #include "bits/stdc++.h"
 using namespace std;

 template <typename T>
 int count_primes_seg(int n) {
    const int S = 10'000;

    vector<int> primes;
    int nsqrt = sqrt(n);
    vector<T> is_prime(nsqrt + 1, true);
    for (int i = 2; i <= nsqrt; i++) {
        if (is_prime[i]) {
            primes.push_back(i);
            for (int j = i * i; j <= nsqrt; j += i)
                is_prime[j] = false;
        }
    }

    int result = 0;
    vector<T> block(S);
    for (int k = 0; k * S <= n; k++) {
        fill(block.begin(), block.end(), true);
        int start = k * S;
        for (int p : primes) {
            int start_idx = (start + p - 1) / p;
            int j = max(start_idx, p) * p - start;
            for (; j < S; j += p)
                block[j] = false;
        }
        if (k == 0)
            block[0] = block[1] = false;
        for (int i = 0; i < S && start + i <= n; i++) {
            if (block[i])
                result++;
        }
    }
    return result;
 }


 template <typename T>
 int count_primes(int n) {
    vector<T> is_prime(n+1, true);
    is_prime[0] = is_prime[1] = false;
    for (int i = 2; i * i <= n; i++) {
        if (is_prime[i]) {
            for (int j = i * i; j <= n; j += i)
                is_prime[j] = false;
        }
    }
    int cnt = 0;
    for (int i = 2; i <= n; i++) {
        if (is_prime[i])
            cnt++;
    }
    return cnt;
    /* return is_prime[n]; */
 }


 template <typename T>
 long long benchmark(int n, string s)
 {
    auto t1 = std::chrono::high_resolution_clock::now();
    int res = count_primes<T>(n);
    auto t2 = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
    cout << "    norm: " << s << ": " << duration.count() << " ms" << endl;
    return duration.count();
 }


 template <typename T>
 long long benchmark_seg(int n, string s)
 {
    auto t1 = std::chrono::high_resolution_clock::now();
    int res = count_primes_seg<T>(n);
    auto t2 = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
    cout << "    seg:  " << s << ": " << duration.count() << " ms" << endl;
    return duration.count();
 }

 int main() {
    ios_base::sync_with_stdio(false);
    cin.tie(nullptr);

    std::cout << std::fixed << std::setprecision(3);
    for (int n : {3e7, 6e7, 9e7, 2e8, 5e8, 1e9}) {
        cout << "N = " << n << ": " << endl;
        {
            long long a = benchmark<bool>(n, "bool");
            long long b = benchmark<char>(n, "char");
            cout << "    norm: bool is ";
            if (a < b) {
                cout << ((double)b / a) << " times FASTER" << endl;
            } else {
                cout << ((double)a / b) << " times SLOWER" << endl;
            }
        }

        {
            long long a = benchmark_seg<bool>(n, "bool");
            long long b = benchmark_seg<char>(n, "char");
            cout << "    seg:  bool is ";
            if (a < b) {
                cout << ((double)b / a) << " times FASTER" << endl;
            } else {
                cout << ((double)a / b) << " times SLOWER" << endl;
            }
        }
        cout << endl;
    }
 }
	>> g++ -std=c++20 -Ofast benchmark_prime_sieve.cpp && ./a.out
	N = 30000000:
	norm: bool: 106 ms
	norm: char: 235 ms
	norm: bool is 2.217 times FASTER
	seg: bool: 147 ms
	seg: char: 38 ms
	seg: bool is 3.868 times SLOWER

	N = 60000000:
	norm: bool: 300 ms
	norm: char: 527 ms
	norm: bool is 1.757 times FASTER
	seg: bool: 311 ms
	seg: char: 87 ms
	seg: bool is 3.575 times SLOWER

	N = 90000000:
	norm: bool: 498 ms
	norm: char: 811 ms
	norm: bool is 1.629 times FASTER
	seg: bool: 488 ms
	seg: char: 143 ms
	seg: bool is 3.413 times SLOWER

	N = 200000000:
	norm: bool: 1305 ms
	norm: char: 1987 ms
	norm: bool is 1.523 times FASTER
	seg: bool: 1164 ms
	seg: char: 380 ms
	seg: bool is 3.063 times SLOWER

	N = 500000000:
	norm: bool: 3731 ms
	norm: char: 5210 ms
	norm: bool is 1.396 times FASTER
	seg: bool: 3268 ms
	seg: char: 1214 ms
	seg: bool is 2.692 times SLOWER

	N = 1000000000:
	norm: bool: 7868 ms
	norm: char: 11066 ms
	norm: bool is 1.406 times FASTER
	seg: bool: 7273 ms
	seg: char: 3039 ms
	seg: bool is 2.393 times SLOWER
	#include "bits/stdc++.h"
	using namespace std;

	template <typename T>
	int count_primes_seg(int n) {
	const int S = 10'000;

	vector<int> primes;
	int nsqrt = sqrt(n);
	vector<T> is_prime(nsqrt + 1, true);
	for (int i = 2; i <= nsqrt; i++) {
	if (is_prime[i]) {
	primes.push_back(i);
	for (int j = i * i; j <= nsqrt; j += i)
	is_prime[j] = false;
	}
	}

	int result = 0;
	vector<T> block(S);
	for (int k = 0; k * S <= n; k++) {
	fill(block.begin(), block.end(), true);
	int start = k * S;
	for (int p : primes) {
	int start_idx = (start + p - 1) / p;
	int j = max(start_idx, p) * p - start;
	for (; j < S; j += p)
	block[j] = false;
	}
	if (k == 0)
	block[0] = block[1] = false;
	for (int i = 0; i < S && start + i <= n; i++) {
	if (block[i])
	result++;
	}
	}
	return result;
	}


	template <typename T>
	int count_primes(int n) {
	vector<T> is_prime(n+1, true);
	is_prime[0] = is_prime[1] = false;
	for (int i = 2; i * i <= n; i++) {
	if (is_prime[i]) {
	for (int j = i * i; j <= n; j += i)
	is_prime[j] = false;
	}
	}
	int cnt = 0;
	for (int i = 2; i <= n; i++) {
	if (is_prime[i])
	cnt++;
	}
	return cnt;
	/* return is_prime[n]; */
	}


	template <typename T>
	long long benchmark(int n, string s)
	{
	auto t1 = std::chrono::high_resolution_clock::now();
	int res = count_primes<T>(n);
	auto t2 = std::chrono::high_resolution_clock::now();
	auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
	cout << " norm: " << s << ": " << duration.count() << " ms" << endl;
	return duration.count();
	}


	template <typename T>
	long long benchmark_seg(int n, string s)
	{
	auto t1 = std::chrono::high_resolution_clock::now();
	int res = count_primes_seg<T>(n);
	auto t2 = std::chrono::high_resolution_clock::now();
	auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1);
	cout << " seg: " << s << ": " << duration.count() << " ms" << endl;
	return duration.count();
	}

	int main() {
	ios_base::sync_with_stdio(false);
	cin.tie(nullptr);

	std::cout << std::fixed << std::setprecision(3);
	for (int n : {3e7, 6e7, 9e7, 2e8, 5e8, 1e9}) {
	cout << "N = " << n << ": " << endl;
	{
	long long a = benchmark<bool>(n, "bool");
	long long b = benchmark<char>(n, "char");
	cout << " norm: bool is ";
	if (a < b) {
	cout << ((double)b / a) << " times FASTER" << endl;
	} else {
	cout << ((double)a / b) << " times SLOWER" << endl;
	}
	}

	{
	long long a = benchmark_seg<bool>(n, "bool");
	long long b = benchmark_seg<char>(n, "char");
	cout << " seg: bool is ";
	if (a < b) {
	cout << ((double)b / a) << " times FASTER" << endl;
	} else {
	cout << ((double)a / b) << " times SLOWER" << endl;
	}
	}
	cout << endl;
	}
	}