Skip to content

Instantly share code, notes, and snippets.

@jay
Last active April 21, 2019 05:19
Show Gist options
  • Save jay/27f5f8c2cc1c5510dd32 to your computer and use it in GitHub Desktop.
Save jay/27f5f8c2cc1c5510dd32 to your computer and use it in GitHub Desktop.
Use libcurl to retrieve a file, using a proxy if specified, and write that file to disk with a filename based on the filename in the URL.
/* Use libcurl to retrieve a file, using a proxy if specified, and write that
file to disk with a filename based on the filename in the URL.
Usage: ProxyTest <proxy|""> <url>
"My goal is simply to download a requested file and have it land on my desktop
with that exact same name. "
curl-library mailing list thread:
'Compiling libcurl for https'
http://curl.haxx.se/mail/lib-2015-10/0117.html
If you need a SOCKS server for testing use ssocks.
http://sourceforge.net/projects/ssocks/
~/ssocks-0.0.14/src/ssocksd
Copyright (C) 2015 Jay Satiro <[email protected]>
http://curl.haxx.se/docs/copyright.html
https://gist.github.com/jay/27f5f8c2cc1c5510dd32
*/
#define _CRT_NONSTDC_NO_DEPRECATE
#include <fcntl.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <time.h>
#ifdef _WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
/* http://curl.haxx.se/download.html */
#include <curl/curl.h>
#undef FALSE
#define FALSE 0
#undef TRUE
#define TRUE 1
#ifndef O_BINARY
#ifdef _O_BINARY
#define O_BINARY _O_BINARY
#else
#define O_BINARY 0
#endif
#endif
#ifdef _WIN32
#define strncasecmp strnicmp
#endif
enum progress_type {
PROGRESS_NONE = 0, /* NONE must stay at 0 */
PROGRESS_BARS,
PROGRESS_ROLLER
};
struct progress_bars {
int percent;
};
struct progress_roller {
int pos;
int backward;
time_t time;
curl_off_t dl;
};
/* to initialize zero out the struct then set session */
struct progress_data {
CURL *session; /* curl easy_handle to the calling session */
long redirect_count; /* number of redirects for the session */
int redirect_silent; /* TRUE: Overwrite last progress meter on redirect.
FALSE: Start a new progress meter on redirect. */
enum progress_type type;
struct progress_bars bars; /* last bars if type PROGRESS_BARS */
struct progress_roller roller; /* last roller if type PROGRESS_ROLLER */
};
/* sanitize a filename
Replace banned filename characters with underscores (foo:bar => foo_bar) and
rename any reserved dos device names (con => _con, con.air => con_air, etc).
Most of this code was adapted from my code in the curl tool.
https://github.com/curl/curl/blob/master/src/tool_doswin.c
Success: (CURLE_OK) *sanitized points to a sanitized copy of file_name.
Failure: (!= CURLE_OK) *sanitized is NULL.
*/
CURLcode sanitize(char **const sanitized, const char *file_name)
{
char *p = NULL;
char target[255+1] = { 0, };
int x = 0;
size_t len = 0;
*sanitized = NULL;
len = strlen(file_name);
if(len > sizeof(target) - 1)
len = sizeof(target) - 1;
strncpy(target, file_name, len);
target[len] = '\0';
/* replace control characters and other banned characters */
for(p = target; *p; ++p) {
const char *banned;
if(1 <= *p && *p <= 31) {
*p = '_';
continue;
}
for(banned = "|<>/\\\":?*"; *banned; ++banned) {
if(*p == *banned) {
*p = '_';
break;
}
}
}
/* remove trailing spaces and periods */
if(len) {
char *clip = NULL;
p = &target[len];
do {
--p;
if(*p != ' ' && *p != '.')
break;
clip = p;
} while(p != target);
if(clip) {
*clip = '\0';
len = clip - target;
}
}
/* rename file_name if it's a reserved dos device name */
x = (!strncasecmp(target, "CON", 3) ||
!strncasecmp(target, "PRN", 3) ||
!strncasecmp(target, "AUX", 3) ||
!strncasecmp(target, "NUL", 3)) ? 3 :
(!strncasecmp(target, "CLOCK$", 6)) ? 6 :
(!strncasecmp(target, "COM", 3) || !strncasecmp(target, "LPT", 3)) ?
(('1' <= target[3] && target[3] <= '9') ? 4 : 3) : 0;
if(x) {
/* the devices may be accessible with an extension or ADS, for
example CON.AIR and 'CON . AIR' and CON:AIR access console */
for(; target[x] == ' '; ++x)
;
if(!target[x]) {
/* target points to 'CON' or 'CON ', etc */
/* Prepend a '_' */
if(len == sizeof(target) - 1) {
/* truncate to make room */
--len;
target[len] = '\0';
}
memmove(target + 1, target, len + 1);
target[0] = '_';
++len;
}
else if(target[x] == '.' || target[x] == ':')
/* target points to 'CON.' or 'CON . AIR', etc */
target[x] = '_';
}
*sanitized = strdup(target);
return (*sanitized ? CURLE_OK : CURLE_OUT_OF_MEMORY);
}
/* get_url_file_name
*
* Extracts the name portion of the URL.
*
* Modified function from curl src/tool_operhlp.c.
* - Sanitize the filename by replacing banned characters with underscores.
* - Return "unknown" instead of empty string if filename can't be determined.
*
* Success: (CURLE_OK) *filename points to a sanitized copy of URL's filename.
* Failure: (!= CURLE_OK) *filename is NULL.
*/
CURLcode get_url_file_name(char **const filename, const char *url)
{
const char *pc = NULL, *pc2 = NULL;
*filename = NULL;
/* Find and get the remote file name */
pc = strstr(url, "://");
if(pc)
pc += 3;
else
pc = url;
pc2 = strrchr(pc, '\\');
pc = strrchr(pc, '/');
if(pc2 && (!pc || pc < pc2))
pc = pc2;
if(pc && pc[1]) {
/* sanitize the string beyond the slash */
CURLcode res;
char *sanitized;
++pc;
res = sanitize(&sanitized, pc);
if(res)
return res;
*filename = sanitized;
}
else
/* no slash or empty => unknown */
*filename = strdup("unknown");
return (*filename ? CURLE_OK : CURLE_OUT_OF_MEMORY);
}
/* fopen_excl_genfile
Generate a filename from prefix + .(00-99) + .tmp and using that name create
and open a new FILE in mode w+b while safely ensuring the filename that is
created does not exist.
The generation, creation and opening is tried 100 times until successful,
incrementing the count each time (foo.01.tmp, foo.02.tmp, ...). If after 100
tries it is not successful it returns failure.
Success: (!= NULL) FILE * to the opened file.
*opened_filename points to the name of the opened file.
Failure: (NULL) *opened_filename is NULL.
*/
FILE *fopen_excl_genfile(char **opened_filename, const char *prefix)
{
int fd = -1;
FILE *fp = NULL;
char *name = NULL;
char *x = NULL;
size_t size = 0;
int i = 0;
*opened_filename = NULL;
/* <prefix> + .00.tmp + <nul> */
size = (prefix ? strlen(prefix) : 0) + 7 + 1;
name = (char *)malloc(size);
if(!name)
return NULL;
strcpy(name, prefix ? prefix : "");
strcpy(name + size - 1 - 7, ".00.tmp");
/* Get to start of numbers: size - <nul> - 00.tmp */
x = name + size - 1 - 6;
for(i = 0; i < 100; ++i) {
if(i) {
x[0] = (char)((i / 10) + '0');
x[1] = (char)((i % 10) + '0');
}
fd = open(name, O_BINARY | O_CREAT | O_EXCL | O_RDWR,
#ifdef _WIN32
S_IREAD | S_IWRITE);
#else
S_IRUSR | S_IWUSR);
#endif
if(fd != -1) {
fp = fdopen(fd, "w+b" /* must be analogous to open flags */);
if(fp) {
*opened_filename = name;
return fp;
}
else {
close(fd);
return NULL;
}
}
}
return NULL;
}
/*
We cannot tell for certain that there is no more progress to report for a
transfer until that transfer has ended. The content length may be unknown
or there may be redirects. For these reasons there is no linefeed for the final
progress meter. After curl_easy_perform returns send a linefeed to stderr:
if(progress_data.type != PROGRESS_NONE)
fprintf(stderr, "\n");
*/
int progress_callback(void *clientp, curl_off_t dltotal, curl_off_t dlnow,
curl_off_t ultotal, curl_off_t ulnow)
{
struct progress_data *d = (struct progress_data *)clientp;
/* bars length must be <= 100 */
const char bars[] = "==================================================";
const int bars_len = (int)(sizeof(bars) - 1);
(void)ultotal;
(void)ulnow;
#if 0
fprintf(stderr, "\ndltotal: %" CURL_FORMAT_CURL_OFF_T
", dlnow: %" CURL_FORMAT_CURL_OFF_T
", ultotal: %" CURL_FORMAT_CURL_OFF_T
", ulnow: %" CURL_FORMAT_CURL_OFF_T
", time: %" CURL_FORMAT_CURL_OFF_T "\n",
dltotal, dlnow, ultotal, ulnow, (curl_off_t)time(NULL));
#endif
/* Redirect detection.
WARNING: What we can do with session safely while we may be in the middle
of a transfer is extremely limited. */
{
long count = 0;
if(!curl_easy_getinfo(d->session, CURLINFO_REDIRECT_COUNT, &count)) {
if(count != d->redirect_count) {
d->redirect_count = count;
if(!d->redirect_silent) /* Start a new progress meter */
fprintf(stderr, "\nRedirected!\n");
if(d->type != PROGRESS_NONE) {
d->type = PROGRESS_NONE;
memset(&d->bars, 0, sizeof(d->bars));
memset(&d->roller, 0, sizeof(d->roller));
}
}
}
}
/* If content length is unknown then use a roller to move back and forth */
if(dltotal < 0 || dlnow < 0 || (dlnow > 0 && !dltotal)) {
const char roller[] = "<=>";
const int roller_len = (int)(sizeof(roller) - 1);
const int spaces = (bars_len > roller_len) ? (bars_len - roller_len) : 0;
int pos = 0;
int backward = FALSE;
time_t timenow = time(NULL);
if(d->type == PROGRESS_ROLLER) {
/* Don't advance roller if new data wasn't received */
if(dlnow == d->roller.dl)
return 0;
/* Don't advance roller if the time in seconds is unchanged */
if(timenow == d->roller.time) {
/* Update the data received for this second */
d->roller.dl = dlnow;
return 0;
}
pos = d->roller.pos;
backward = d->roller.backward;
}
/* If there's a bug then two beeps and break */
#if defined(_DEBUG) && defined(_MSC_VER)
if(pos < 0 || pos > spaces) {
fprintf(stderr, "\a\a");
fflush(stderr);
__debugbreak();
}
#endif
if(d->type != PROGRESS_ROLLER || !spaces || pos < 0 || pos > spaces) {
pos = 0;
backward = FALSE;
}
else if(backward) {
if(pos)
--pos;
else {
++pos;
backward = FALSE;
}
}
else { /* forward */
if(pos != spaces)
++pos;
else {
--pos;
backward = TRUE;
}
}
fprintf(stderr, "\r ??%%[%*s%s%*s]", pos, "", roller, spaces - pos, "");
d->type = PROGRESS_ROLLER;
d->roller.backward = backward;
d->roller.dl = dlnow;
d->roller.pos = pos;
d->roller.time = timenow;
}
else {
/* Calculate percent and use progress bars.
The content length is known and/or the download hasn't started yet. */
int percent = -1;
if(!dlnow)
percent = 0;
else if(dlnow >= dltotal)
percent = 100;
else if(dltotal < 10000)
percent = (int)(dlnow * 100 / dltotal);
else
percent = (int)(dlnow / (dltotal / 100));
if(d->type == PROGRESS_BARS) {
/* Don't advance bars if the percentage is the same */
if(percent == d->bars.percent)
return 0;
}
fprintf(stderr, "\r%3d%%[%-*.*s]", percent, bars_len,
(int)((bars_len / 100.0F) * percent), bars);
d->type = PROGRESS_BARS;
d->bars.percent = percent;
}
return 0;
}
int ProxyTest(const char *proxy, const char *url)
{
int retcode = FALSE;
CURL *curl = NULL;
CURLcode res = CURLE_FAILED_INIT;
char errbuf[CURL_ERROR_SIZE] = { 0, };
FILE *fp = NULL;
struct progress_data progress_data = { 0, };
double average_speed = 0;
double bytes_downloaded = 0;
double total_download_time = 0;
char *effective_url = NULL;
char *filename_tmp = NULL, *filename_dst = NULL;
if(!proxy) {
fprintf(stderr, "Error: proxy parameter is missing.\n");
goto cleanup;
}
if(!url || !*url) {
fprintf(stderr, "Error: url parameter is missing.\n");
goto cleanup;
}
if(get_url_file_name(&filename_dst, url) != CURLE_OK) {
fprintf(stderr, "Error: get_url_file_name failed.\n");
goto cleanup;
}
fp = fopen_excl_genfile(&filename_tmp, filename_dst);
if(!fp) {
fprintf(stderr, "Error: fopen_excl_genfile failed.\n");
goto cleanup;
}
curl = curl_easy_init();
if(!curl) {
fprintf(stderr, "Error: curl_easy_init failed.\n");
goto cleanup;
}
/* CURLOPT_CAINFO
To verify SSL sites you may need to load a bundle of certificates.
You can download the default bundle here:
https://raw.githubusercontent.com/bagder/ca-bundle/master/ca-bundle.crt
However your SSL backend might use a database in addition to or instead of
the bundle.
http://curl.haxx.se/docs/ssl-compared.html
*/
curl_easy_setopt(curl, CURLOPT_CAINFO, "curl-ca-bundle.crt");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)fp);
if(isatty(fileno(stderr))) {
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_callback);
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &progress_data);
progress_data.session = curl;
}
else {
/* In the case of Cygwin if the program was not compiled using the Cygwin
version of gcc then we get here even if stderr is attached to a
terminal, since they use a pseudo tty. That could be addressed:
https://cygwin.com/ml/cygwin/2012-11/msg00214.html
Also it seems Cygwin's terminal is stderr is buffered because it's
piping it. A solution for the progress meter would be fflush after each
write to stderr.
https://github.com/gflags/gflags/issues/56
http://reviews.llvm.org/D4021
I haven't done much testing in Cygwin. Try first compiling using their
gcc which may be a suitable remedy for both of those issues.
*/
fprintf(stderr, "The progress meter has been disabled: "
"stderr isn't attached to a terminal.\n");
fflush(stderr);
}
curl_easy_setopt(curl, CURLOPT_AUTOREFERER, 1L);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 20L);
/* For security reasons we only allow redirects to safe redirect protocols */
curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS,
CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FTP | CURLPROTO_FTPS);
if(*proxy)
curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
curl_easy_setopt(curl, CURLOPT_URL, url);
/* curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); */
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errbuf);
res = curl_easy_perform(curl);
if(progress_data.type != PROGRESS_NONE)
fprintf(stderr, "\n"); /* linefeed for progress meter */
if(res != CURLE_OK) {
size_t len = strlen(errbuf);
fprintf(stderr, "\nError: libcurl: (%d) ", res);
if(len)
fprintf(stderr, "%s%s", errbuf, ((errbuf[len - 1] != '\n') ? "\n" : ""));
fprintf(stderr, "%s\n\n", curl_easy_strerror(res));
goto cleanup;
}
curl_easy_getinfo(curl, CURLINFO_SPEED_DOWNLOAD, &average_speed);
curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &bytes_downloaded);
curl_easy_getinfo(curl, CURLINFO_TOTAL_TIME, &total_download_time);
fprintf(stderr, "\nTransfer rate: %.0f KB/sec"
" (%.0f bytes in %.0f seconds)\n",
average_speed / 1024, bytes_downloaded, total_download_time);
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &effective_url);
if(effective_url) {
long proxy_connect_code = 0;
long response_code = 0;
curl_easy_getinfo(curl, CURLINFO_HTTP_CONNECTCODE, &proxy_connect_code);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code);
if(response_code) {
/* The response code is from HTTP if the URL is HTTP or the proxy is
HTTP and CONNECT wasn't used for a proxy passthrough. */
if(!strncasecmp(effective_url, "http://", 7) ||
!strncasecmp(effective_url, "https://", 8) ||
(proxy && !proxy_connect_code &&
(!strstr(proxy, "://") ||
!strncasecmp(proxy, "http://", 7) ||
!strncasecmp(proxy, "https://", 8)))) {
if(response_code != 200) {
fprintf(stderr, "Error: HTTP response code is %ld.\n",
response_code);
goto cleanup;
}
}
else if(!strncasecmp(effective_url, "ftp://", 6)) {
if(response_code != 226) {
fprintf(stderr, "Error: FTP response code is %ld.\n", response_code);
goto cleanup;
}
}
}
}
retcode = TRUE;
cleanup:
curl_easy_cleanup(curl);
if(fp)
fclose(fp);
if(retcode == TRUE) {
unlink(filename_dst);
if(!rename(filename_tmp, filename_dst)) {
printf("Transfer successful. Filename: %s\n", filename_dst);
}
else {
fprintf(stderr, "Error: Transfer was successful but unable to rename "
"temp file to destination file. Temp file will not be "
"deleted.\n"
"temp: %s\n"
"dest: %s\n",
filename_tmp, filename_dst);
retcode = FALSE;
}
}
else {
if(filename_tmp && unlink(filename_tmp) == -1)
fprintf(stderr, "Error: Unable to remove temp file: %s\n", filename_tmp);
}
return retcode;
}
int main(int argc, char *argv[])
{
if(argc != 3) {
fprintf(stderr,
"Usage: ProxyTest <proxy|\"\"> <url>\n"
"\n"
"Use libcurl to retrieve a file, using a proxy if specified, and write "
"that file to disk with a filename based on the filename in the URL.\n"
"\n"
"\n"
"Some details on how this program behaves:\n"
"\n"
"If the first argument is an empty string no proxy is used.\n"
"The proxy type defaults to HTTP unless you specify a scheme:\n"
"socks4://, socks4a://, socks5:// or socks5h://\n"
"\n"
"The URL type defaults to auto-detect unless you specify a scheme.\n"
"\n"
"This program will exit 0 on success. In that case the download was "
"successful and the file was saved to a local file in the current "
"directory. The file has a filename based on the filename in the URL.\n"
"\n"
"In detail, the download is written to a file based on the "
"filename in the URL + .(00-99) + .tmp until it is determined the "
"download was successful. In that case it's renamed to the filename in "
"the URL. The filenames may not be exact because forbidden characters "
"are replaced with underscores. Further: If there is no filename in the "
"URL the local file is named \"unknown\". If a file already exists it "
"is replaced.\n"
"\n"
"For example http://example.com/foo.zip is downloaded to foo.zip.00.tmp "
"and if the download is successful it's renamed foo.zip. If foo.zip "
"already exists it is replaced. If it cannot be replaced or the rename "
"fails the tmp file remains and the program exits without success "
"(!= 0).\n"
"\n"
"In detail, a download is only considered successful if libcurl says "
"the transfer is OK. Further, if a server response code was received "
"then in the case of an HTTP(S) proxy or URL the code must be 200; or "
"otherwise if FTP(S) then the code must be 226.\n");
return EXIT_FAILURE;
}
if(curl_global_init(CURL_GLOBAL_ALL)) {
fprintf(stderr, "Fatal: The initialization of libcurl has failed.\n");
return EXIT_FAILURE;
}
if(atexit(curl_global_cleanup)) {
fprintf(stderr, "Fatal: atexit failed to register curl_global_cleanup.\n");
curl_global_cleanup();
return EXIT_FAILURE;
}
if(!ProxyTest(argv[1], argv[2])) {
fprintf(stderr, "Fatal: ProxyTest failed.\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment