Created
December 27, 2022 13:42
-
-
Save wolfv/5a9a10e06afb6420a0b274a74ff76353 to your computer and use it in GitHub Desktop.
Benchmark anaconda repodata downloads with cURL and compression algorithms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <filesystem> | |
#include <curl/curl.h> | |
#include <zstd.h> | |
#include <bzlib.h> | |
#include <chrono> | |
namespace fs = std::filesystem; | |
constexpr size_t BUFFER_SIZE = 256000; | |
constexpr bool CURL_VERBOSE = false; | |
struct ZstdStream | |
{ | |
ZstdStream(const fs::path &out_path) : stream(ZSTD_createDCtx()), | |
out(std::make_unique<std::ofstream>(out_path)) | |
{ | |
ZSTD_initDStream(stream); | |
} | |
ZSTD_DCtx *stream; | |
char buffer[BUFFER_SIZE]; | |
std::unique_ptr<std::ofstream> out; | |
~ZstdStream() | |
{ | |
ZSTD_freeDCtx(stream); | |
} | |
}; | |
int64_t stream_decompress_zstd(void *ptr, int64_t size, int64_t nmemb, ZstdStream *stream) | |
{ | |
ZSTD_inBuffer in = {ptr, size_t(size * nmemb), 0}; | |
while (in.pos != in.size) | |
{ | |
ZSTD_outBuffer out = {stream->buffer, BUFFER_SIZE, 0}; | |
const size_t rc = ZSTD_decompressStream(stream->stream, &out, &in); | |
if (ZSTD_isError(rc)) | |
{ | |
throw std::runtime_error(ZSTD_getErrorName(rc)); | |
} | |
stream->out->write((const char *)out.dst, out.pos); | |
} | |
return size * nmemb; | |
} | |
struct Bzip2Stream | |
{ | |
Bzip2Stream(const fs::path &out_path) : stream{.bzalloc = nullptr, .bzfree = nullptr, .opaque = nullptr}, | |
out(std::make_unique<std::ofstream>(out_path)) | |
{ | |
error = BZ2_bzDecompressInit(&stream, 0, false); | |
if (error != BZ_OK) | |
{ | |
throw std::runtime_error("BZ2_bzDecompressInit failed"); | |
} | |
} | |
bz_stream stream; | |
char buffer[BUFFER_SIZE]; | |
std::unique_ptr<std::ofstream> out; | |
int error; | |
~Bzip2Stream() | |
{ | |
BZ2_bzDecompressEnd(&stream); | |
} | |
}; | |
int64_t stream_decompress_bzip2(void *ptr, int64_t size, int64_t nmemb, Bzip2Stream *stream) | |
{ | |
stream->stream.next_in = (char *)ptr; | |
stream->stream.avail_in = size * nmemb; | |
while (stream->stream.avail_in && stream->error == BZ_OK) | |
{ | |
stream->stream.next_out = &stream->buffer[0]; | |
stream->stream.avail_out = BUFFER_SIZE; | |
stream->error = BZ2_bzDecompress(&stream->stream); | |
stream->out->write((const char *)stream->buffer, BUFFER_SIZE - stream->stream.avail_out); | |
} | |
if (stream->error != BZ_OK && stream->error != BZ_STREAM_END) | |
{ | |
throw std::runtime_error("BZ2_bzDecompress failed " + std::to_string(stream->error)); | |
} | |
return size * nmemb; | |
} | |
void curl_zstd() | |
{ | |
std::cout << "curl_zstd" << std::endl; | |
CURL *curl; | |
CURLcode res; | |
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.zst"; | |
const char outfilename[FILENAME_MAX] = "./out_zst.json"; | |
curl = curl_easy_init(); | |
ZstdStream stream(outfilename); | |
if (curl) | |
{ | |
curl_easy_setopt(curl, CURLOPT_URL, url); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_easy_setopt(curl, CURLOPT_VERBOSE, CURL_VERBOSE); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_zstd); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream); | |
res = curl_easy_perform(curl); | |
/* always cleanup */ | |
curl_easy_cleanup(curl); | |
} | |
} | |
void curl_bz2() | |
{ | |
std::cout << "curl_bz2" << std::endl; | |
CURL *curl; | |
CURLcode res; | |
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.bz2"; | |
const char outfilename[FILENAME_MAX] = "./out_bz2.json"; | |
curl = curl_easy_init(); | |
Bzip2Stream stream(outfilename); | |
if (curl) | |
{ | |
curl_easy_setopt(curl, CURLOPT_URL, url); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_easy_setopt(curl, CURLOPT_VERBOSE, CURL_VERBOSE); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_bzip2); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream); | |
res = curl_easy_perform(curl); | |
/* always cleanup */ | |
curl_easy_cleanup(curl); | |
} | |
} | |
template <class T> | |
std::size_t ostream_callback(char* buffer, std::size_t size, std::size_t nitems, T* stream) | |
{ | |
stream->write(buffer, size * nitems); | |
return size * nitems; | |
} | |
void curl_gzip() | |
{ | |
std::cout << "curl_bz2" << std::endl; | |
CURL *curl; | |
CURLcode res; | |
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json"; | |
const char outfilename[FILENAME_MAX] = "./out_gzip.json"; | |
curl = curl_easy_init(); | |
std::ofstream stream(outfilename); | |
if (curl) | |
{ | |
curl_easy_setopt(curl, CURLOPT_URL, url); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_easy_setopt(curl, CURLOPT_VERBOSE, CURL_VERBOSE); | |
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, ""); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ostream_callback<std::ofstream>); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream); | |
res = curl_easy_perform(curl); | |
/* always cleanup */ | |
curl_easy_cleanup(curl); | |
} | |
} | |
int main() | |
{ | |
std::chrono::system_clock::time_point start, end; | |
start = std::chrono::system_clock::now(); | |
curl_zstd(); | |
end = std::chrono::system_clock::now(); | |
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); | |
std::cout << "curl_zstd: " << elapsed << "ms" << std::endl; | |
start = std::chrono::system_clock::now(); | |
curl_bz2(); | |
end = std::chrono::system_clock::now(); | |
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); | |
std::cout << "curl_bz2: " << elapsed << "ms" << std::endl; | |
start = std::chrono::system_clock::now(); | |
curl_gzip(); | |
end = std::chrono::system_clock::now(); | |
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); | |
std::cout << "curl_gzip: " << elapsed << "ms" << std::endl; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl_zstd: 3861ms | |
curl_bz2: 3946ms | |
curl_gzip: 5708ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment