Last active
December 27, 2022 13:27
-
-
Save wolfv/f322bd9fb0df48e9daafb7453cfc7379 to your computer and use it in GitHub Desktop.
Stream decompress a simple `zstd` or `bzip2` compressed file with cURL / libcurl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <filesystem> | |
#include <curl/curl.h> | |
#include <zstd.h> | |
#include <bzlib.h> | |
namespace fs = std::filesystem; | |
// constexpr size_t BUFFER_SIZE = 131072; | |
constexpr size_t BUFFER_SIZE = 1024; | |
struct ZstdStream | |
{ | |
ZstdStream(const fs::path &out_path) : stream(ZSTD_createDCtx()), | |
out(std::make_unique<std::ofstream>(out_path)) | |
{ | |
ZSTD_initDStream(stream); | |
} | |
ZSTD_DCtx *stream; | |
char buffer[BUFFER_SIZE]; | |
std::unique_ptr<std::ofstream> out; | |
~ZstdStream() | |
{ | |
ZSTD_freeDCtx(stream); | |
} | |
}; | |
int64_t stream_decompress_zstd(void *ptr, int64_t size, int64_t nmemb, ZstdStream *stream) | |
{ | |
ZSTD_inBuffer in = {ptr, size_t(size * nmemb), 0}; | |
while (in.pos != in.size) | |
{ | |
ZSTD_outBuffer out = {stream->buffer, BUFFER_SIZE, 0}; | |
const size_t rc = ZSTD_decompressStream(stream->stream, &out, &in); | |
if (ZSTD_isError(rc)) | |
{ | |
throw std::runtime_error(ZSTD_getErrorName(rc)); | |
} | |
stream->out->write((const char *)out.dst, out.pos); | |
} | |
return size * nmemb; | |
} | |
struct Bzip2Stream | |
{ | |
Bzip2Stream(const fs::path &out_path) : stream{.bzalloc = nullptr, .bzfree = nullptr, .opaque = nullptr}, | |
out(std::make_unique<std::ofstream>(out_path)) | |
{ | |
error = BZ2_bzDecompressInit(&stream, 0, false); | |
if (error != BZ_OK) | |
{ | |
throw std::runtime_error("BZ2_bzDecompressInit failed"); | |
} | |
} | |
bz_stream stream; | |
char buffer[BUFFER_SIZE]; | |
std::unique_ptr<std::ofstream> out; | |
int error; | |
~Bzip2Stream() | |
{ | |
BZ2_bzDecompressEnd(&stream); | |
} | |
}; | |
int64_t stream_decompress_bzip2(void *ptr, int64_t size, int64_t nmemb, Bzip2Stream *stream) | |
{ | |
stream->stream.next_in = (char *)ptr; | |
stream->stream.avail_in = size * nmemb; | |
while (stream->stream.avail_in && stream->error == BZ_OK) | |
{ | |
stream->stream.next_out = &stream->buffer[0]; | |
stream->stream.avail_out = BUFFER_SIZE; | |
stream->error = BZ2_bzDecompress(&stream->stream); | |
stream->out->write((const char *)stream->buffer, BUFFER_SIZE - stream->stream.avail_out); | |
} | |
if (stream->error != BZ_OK && stream->error != BZ_STREAM_END) | |
{ | |
throw std::runtime_error("BZ2_bzDecompress failed " + std::to_string(stream->error)); | |
} | |
return size * nmemb; | |
} | |
void curl_zstd() | |
{ | |
std::cout << "curl_zstd" << std::endl; | |
CURL *curl; | |
CURLcode res; | |
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.zst"; | |
const char outfilename[FILENAME_MAX] = "./out_zst.json"; | |
curl = curl_easy_init(); | |
ZstdStream stream(outfilename); | |
if (curl) | |
{ | |
curl_easy_setopt(curl, CURLOPT_URL, url); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_zstd); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream); | |
res = curl_easy_perform(curl); | |
/* always cleanup */ | |
curl_easy_cleanup(curl); | |
} | |
} | |
void curl_bz2() | |
{ | |
std::cout << "curl_bz2" << std::endl; | |
CURL *curl; | |
CURLcode res; | |
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.bz2"; | |
const char outfilename[FILENAME_MAX] = "./out_bz2.json"; | |
curl = curl_easy_init(); | |
Bzip2Stream stream(outfilename); | |
if (curl) | |
{ | |
curl_easy_setopt(curl, CURLOPT_URL, url); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_bzip2); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream); | |
res = curl_easy_perform(curl); | |
/* always cleanup */ | |
curl_easy_cleanup(curl); | |
} | |
} | |
int main() | |
{ | |
curl_zstd(); | |
curl_bz2(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment