Last active
April 21, 2019 05:19
-
-
Save jay/27f5f8c2cc1c5510dd32 to your computer and use it in GitHub Desktop.
Use libcurl to retrieve a file, using a proxy if specified, and write that file to disk with a filename based on the filename in the URL.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Use libcurl to retrieve a file, using a proxy if specified, and write that | |
file to disk with a filename based on the filename in the URL. | |
Usage: ProxyTest <proxy|""> <url> | |
"My goal is simply to download a requested file and have it land on my desktop | |
with that exact same name. " | |
curl-library mailing list thread: | |
'Compiling libcurl for https' | |
http://curl.haxx.se/mail/lib-2015-10/0117.html | |
If you need a SOCKS server for testing use ssocks. | |
http://sourceforge.net/projects/ssocks/ | |
~/ssocks-0.0.14/src/ssocksd | |
Copyright (C) 2015 Jay Satiro <[email protected]> | |
http://curl.haxx.se/docs/copyright.html | |
https://gist.github.com/jay/27f5f8c2cc1c5510dd32 | |
*/ | |
#define _CRT_NONSTDC_NO_DEPRECATE | |
#include <fcntl.h> | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/stat.h> | |
#include <time.h> | |
#ifdef _WIN32 | |
#include <io.h> | |
#else | |
#include <unistd.h> | |
#endif | |
/* http://curl.haxx.se/download.html */ | |
#include <curl/curl.h> | |
#undef FALSE | |
#define FALSE 0 | |
#undef TRUE | |
#define TRUE 1 | |
#ifndef O_BINARY | |
#ifdef _O_BINARY | |
#define O_BINARY _O_BINARY | |
#else | |
#define O_BINARY 0 | |
#endif | |
#endif | |
#ifdef _WIN32 | |
#define strncasecmp strnicmp | |
#endif | |
enum progress_type { | |
PROGRESS_NONE = 0, /* NONE must stay at 0 */ | |
PROGRESS_BARS, | |
PROGRESS_ROLLER | |
}; | |
struct progress_bars { | |
int percent; | |
}; | |
struct progress_roller { | |
int pos; | |
int backward; | |
time_t time; | |
curl_off_t dl; | |
}; | |
/* to initialize zero out the struct then set session */ | |
struct progress_data { | |
CURL *session; /* curl easy_handle to the calling session */ | |
long redirect_count; /* number of redirects for the session */ | |
int redirect_silent; /* TRUE: Overwrite last progress meter on redirect. | |
FALSE: Start a new progress meter on redirect. */ | |
enum progress_type type; | |
struct progress_bars bars; /* last bars if type PROGRESS_BARS */ | |
struct progress_roller roller; /* last roller if type PROGRESS_ROLLER */ | |
}; | |
/* sanitize a filename | |
Replace banned filename characters with underscores (foo:bar => foo_bar) and | |
rename any reserved dos device names (con => _con, con.air => con_air, etc). | |
Most of this code was adapted from my code in the curl tool. | |
https://github.com/curl/curl/blob/master/src/tool_doswin.c | |
Success: (CURLE_OK) *sanitized points to a sanitized copy of file_name. | |
Failure: (!= CURLE_OK) *sanitized is NULL. | |
*/ | |
CURLcode sanitize(char **const sanitized, const char *file_name) | |
{ | |
char *p = NULL; | |
char target[255+1] = { 0, }; | |
int x = 0; | |
size_t len = 0; | |
*sanitized = NULL; | |
len = strlen(file_name); | |
if(len > sizeof(target) - 1) | |
len = sizeof(target) - 1; | |
strncpy(target, file_name, len); | |
target[len] = '\0'; | |
/* replace control characters and other banned characters */ | |
for(p = target; *p; ++p) { | |
const char *banned; | |
if(1 <= *p && *p <= 31) { | |
*p = '_'; | |
continue; | |
} | |
for(banned = "|<>/\\\":?*"; *banned; ++banned) { | |
if(*p == *banned) { | |
*p = '_'; | |
break; | |
} | |
} | |
} | |
/* remove trailing spaces and periods */ | |
if(len) { | |
char *clip = NULL; | |
p = &target[len]; | |
do { | |
--p; | |
if(*p != ' ' && *p != '.') | |
break; | |
clip = p; | |
} while(p != target); | |
if(clip) { | |
*clip = '\0'; | |
len = clip - target; | |
} | |
} | |
/* rename file_name if it's a reserved dos device name */ | |
x = (!strncasecmp(target, "CON", 3) || | |
!strncasecmp(target, "PRN", 3) || | |
!strncasecmp(target, "AUX", 3) || | |
!strncasecmp(target, "NUL", 3)) ? 3 : | |
(!strncasecmp(target, "CLOCK$", 6)) ? 6 : | |
(!strncasecmp(target, "COM", 3) || !strncasecmp(target, "LPT", 3)) ? | |
(('1' <= target[3] && target[3] <= '9') ? 4 : 3) : 0; | |
if(x) { | |
/* the devices may be accessible with an extension or ADS, for | |
example CON.AIR and 'CON . AIR' and CON:AIR access console */ | |
for(; target[x] == ' '; ++x) | |
; | |
if(!target[x]) { | |
/* target points to 'CON' or 'CON ', etc */ | |
/* Prepend a '_' */ | |
if(len == sizeof(target) - 1) { | |
/* truncate to make room */ | |
--len; | |
target[len] = '\0'; | |
} | |
memmove(target + 1, target, len + 1); | |
target[0] = '_'; | |
++len; | |
} | |
else if(target[x] == '.' || target[x] == ':') | |
/* target points to 'CON.' or 'CON . AIR', etc */ | |
target[x] = '_'; | |
} | |
*sanitized = strdup(target); | |
return (*sanitized ? CURLE_OK : CURLE_OUT_OF_MEMORY); | |
} | |
/* get_url_file_name | |
* | |
* Extracts the name portion of the URL. | |
* | |
* Modified function from curl src/tool_operhlp.c. | |
* - Sanitize the filename by replacing banned characters with underscores. | |
* - Return "unknown" instead of empty string if filename can't be determined. | |
* | |
* Success: (CURLE_OK) *filename points to a sanitized copy of URL's filename. | |
* Failure: (!= CURLE_OK) *filename is NULL. | |
*/ | |
CURLcode get_url_file_name(char **const filename, const char *url) | |
{ | |
const char *pc = NULL, *pc2 = NULL; | |
*filename = NULL; | |
/* Find and get the remote file name */ | |
pc = strstr(url, "://"); | |
if(pc) | |
pc += 3; | |
else | |
pc = url; | |
pc2 = strrchr(pc, '\\'); | |
pc = strrchr(pc, '/'); | |
if(pc2 && (!pc || pc < pc2)) | |
pc = pc2; | |
if(pc && pc[1]) { | |
/* sanitize the string beyond the slash */ | |
CURLcode res; | |
char *sanitized; | |
++pc; | |
res = sanitize(&sanitized, pc); | |
if(res) | |
return res; | |
*filename = sanitized; | |
} | |
else | |
/* no slash or empty => unknown */ | |
*filename = strdup("unknown"); | |
return (*filename ? CURLE_OK : CURLE_OUT_OF_MEMORY); | |
} | |
/* fopen_excl_genfile | |
Generate a filename from prefix + .(00-99) + .tmp and using that name create | |
and open a new FILE in mode w+b while safely ensuring the filename that is | |
created does not exist. | |
The generation, creation and opening is tried 100 times until successful, | |
incrementing the count each time (foo.01.tmp, foo.02.tmp, ...). If after 100 | |
tries it is not successful it returns failure. | |
Success: (!= NULL) FILE * to the opened file. | |
*opened_filename points to the name of the opened file. | |
Failure: (NULL) *opened_filename is NULL. | |
*/ | |
FILE *fopen_excl_genfile(char **opened_filename, const char *prefix) | |
{ | |
int fd = -1; | |
FILE *fp = NULL; | |
char *name = NULL; | |
char *x = NULL; | |
size_t size = 0; | |
int i = 0; | |
*opened_filename = NULL; | |
/* <prefix> + .00.tmp + <nul> */ | |
size = (prefix ? strlen(prefix) : 0) + 7 + 1; | |
name = (char *)malloc(size); | |
if(!name) | |
return NULL; | |
strcpy(name, prefix ? prefix : ""); | |
strcpy(name + size - 1 - 7, ".00.tmp"); | |
/* Get to start of numbers: size - <nul> - 00.tmp */ | |
x = name + size - 1 - 6; | |
for(i = 0; i < 100; ++i) { | |
if(i) { | |
x[0] = (char)((i / 10) + '0'); | |
x[1] = (char)((i % 10) + '0'); | |
} | |
fd = open(name, O_BINARY | O_CREAT | O_EXCL | O_RDWR, | |
#ifdef _WIN32 | |
S_IREAD | S_IWRITE); | |
#else | |
S_IRUSR | S_IWUSR); | |
#endif | |
if(fd != -1) { | |
fp = fdopen(fd, "w+b" /* must be analogous to open flags */); | |
if(fp) { | |
*opened_filename = name; | |
return fp; | |
} | |
else { | |
close(fd); | |
return NULL; | |
} | |
} | |
} | |
return NULL; | |
} | |
/* | |
We cannot tell for certain that there is no more progress to report for a | |
transfer until that transfer has ended. The content length may be unknown | |
or there may be redirects. For these reasons there is no linefeed for the final | |
progress meter. After curl_easy_perform returns send a linefeed to stderr: | |
if(progress_data.type != PROGRESS_NONE) | |
fprintf(stderr, "\n"); | |
*/ | |
int progress_callback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, | |
curl_off_t ultotal, curl_off_t ulnow) | |
{ | |
struct progress_data *d = (struct progress_data *)clientp; | |
/* bars length must be <= 100 */ | |
const char bars[] = "=================================================="; | |
const int bars_len = (int)(sizeof(bars) - 1); | |
(void)ultotal; | |
(void)ulnow; | |
#if 0 | |
fprintf(stderr, "\ndltotal: %" CURL_FORMAT_CURL_OFF_T | |
", dlnow: %" CURL_FORMAT_CURL_OFF_T | |
", ultotal: %" CURL_FORMAT_CURL_OFF_T | |
", ulnow: %" CURL_FORMAT_CURL_OFF_T | |
", time: %" CURL_FORMAT_CURL_OFF_T "\n", | |
dltotal, dlnow, ultotal, ulnow, (curl_off_t)time(NULL)); | |
#endif | |
/* Redirect detection. | |
WARNING: What we can do with session safely while we may be in the middle | |
of a transfer is extremely limited. */ | |
{ | |
long count = 0; | |
if(!curl_easy_getinfo(d->session, CURLINFO_REDIRECT_COUNT, &count)) { | |
if(count != d->redirect_count) { | |
d->redirect_count = count; | |
if(!d->redirect_silent) /* Start a new progress meter */ | |
fprintf(stderr, "\nRedirected!\n"); | |
if(d->type != PROGRESS_NONE) { | |
d->type = PROGRESS_NONE; | |
memset(&d->bars, 0, sizeof(d->bars)); | |
memset(&d->roller, 0, sizeof(d->roller)); | |
} | |
} | |
} | |
} | |
/* If content length is unknown then use a roller to move back and forth */ | |
if(dltotal < 0 || dlnow < 0 || (dlnow > 0 && !dltotal)) { | |
const char roller[] = "<=>"; | |
const int roller_len = (int)(sizeof(roller) - 1); | |
const int spaces = (bars_len > roller_len) ? (bars_len - roller_len) : 0; | |
int pos = 0; | |
int backward = FALSE; | |
time_t timenow = time(NULL); | |
if(d->type == PROGRESS_ROLLER) { | |
/* Don't advance roller if new data wasn't received */ | |
if(dlnow == d->roller.dl) | |
return 0; | |
/* Don't advance roller if the time in seconds is unchanged */ | |
if(timenow == d->roller.time) { | |
/* Update the data received for this second */ | |
d->roller.dl = dlnow; | |
return 0; | |
} | |
pos = d->roller.pos; | |
backward = d->roller.backward; | |
} | |
/* If there's a bug then two beeps and break */ | |
#if defined(_DEBUG) && defined(_MSC_VER) | |
if(pos < 0 || pos > spaces) { | |
fprintf(stderr, "\a\a"); | |
fflush(stderr); | |
__debugbreak(); | |
} | |
#endif | |
if(d->type != PROGRESS_ROLLER || !spaces || pos < 0 || pos > spaces) { | |
pos = 0; | |
backward = FALSE; | |
} | |
else if(backward) { | |
if(pos) | |
--pos; | |
else { | |
++pos; | |
backward = FALSE; | |
} | |
} | |
else { /* forward */ | |
if(pos != spaces) | |
++pos; | |
else { | |
--pos; | |
backward = TRUE; | |
} | |
} | |
fprintf(stderr, "\r ??%%[%*s%s%*s]", pos, "", roller, spaces - pos, ""); | |
d->type = PROGRESS_ROLLER; | |
d->roller.backward = backward; | |
d->roller.dl = dlnow; | |
d->roller.pos = pos; | |
d->roller.time = timenow; | |
} | |
else { | |
/* Calculate percent and use progress bars. | |
The content length is known and/or the download hasn't started yet. */ | |
int percent = -1; | |
if(!dlnow) | |
percent = 0; | |
else if(dlnow >= dltotal) | |
percent = 100; | |
else if(dltotal < 10000) | |
percent = (int)(dlnow * 100 / dltotal); | |
else | |
percent = (int)(dlnow / (dltotal / 100)); | |
if(d->type == PROGRESS_BARS) { | |
/* Don't advance bars if the percentage is the same */ | |
if(percent == d->bars.percent) | |
return 0; | |
} | |
fprintf(stderr, "\r%3d%%[%-*.*s]", percent, bars_len, | |
(int)((bars_len / 100.0F) * percent), bars); | |
d->type = PROGRESS_BARS; | |
d->bars.percent = percent; | |
} | |
return 0; | |
} | |
int ProxyTest(const char *proxy, const char *url) | |
{ | |
int retcode = FALSE; | |
CURL *curl = NULL; | |
CURLcode res = CURLE_FAILED_INIT; | |
char errbuf[CURL_ERROR_SIZE] = { 0, }; | |
FILE *fp = NULL; | |
struct progress_data progress_data = { 0, }; | |
double average_speed = 0; | |
double bytes_downloaded = 0; | |
double total_download_time = 0; | |
char *effective_url = NULL; | |
char *filename_tmp = NULL, *filename_dst = NULL; | |
if(!proxy) { | |
fprintf(stderr, "Error: proxy parameter is missing.\n"); | |
goto cleanup; | |
} | |
if(!url || !*url) { | |
fprintf(stderr, "Error: url parameter is missing.\n"); | |
goto cleanup; | |
} | |
if(get_url_file_name(&filename_dst, url) != CURLE_OK) { | |
fprintf(stderr, "Error: get_url_file_name failed.\n"); | |
goto cleanup; | |
} | |
fp = fopen_excl_genfile(&filename_tmp, filename_dst); | |
if(!fp) { | |
fprintf(stderr, "Error: fopen_excl_genfile failed.\n"); | |
goto cleanup; | |
} | |
curl = curl_easy_init(); | |
if(!curl) { | |
fprintf(stderr, "Error: curl_easy_init failed.\n"); | |
goto cleanup; | |
} | |
/* CURLOPT_CAINFO | |
To verify SSL sites you may need to load a bundle of certificates. | |
You can download the default bundle here: | |
https://raw.githubusercontent.com/bagder/ca-bundle/master/ca-bundle.crt | |
However your SSL backend might use a database in addition to or instead of | |
the bundle. | |
http://curl.haxx.se/docs/ssl-compared.html | |
*/ | |
curl_easy_setopt(curl, CURLOPT_CAINFO, "curl-ca-bundle.crt"); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, fwrite); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)fp); | |
if(isatty(fileno(stderr))) { | |
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); | |
curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_callback); | |
curl_easy_setopt(curl, CURLOPT_XFERINFODATA, &progress_data); | |
progress_data.session = curl; | |
} | |
else { | |
/* In the case of Cygwin if the program was not compiled using the Cygwin | |
version of gcc then we get here even if stderr is attached to a | |
terminal, since they use a pseudo tty. That could be addressed: | |
https://cygwin.com/ml/cygwin/2012-11/msg00214.html | |
Also it seems Cygwin's terminal is stderr is buffered because it's | |
piping it. A solution for the progress meter would be fflush after each | |
write to stderr. | |
https://github.com/gflags/gflags/issues/56 | |
http://reviews.llvm.org/D4021 | |
I haven't done much testing in Cygwin. Try first compiling using their | |
gcc which may be a suitable remedy for both of those issues. | |
*/ | |
fprintf(stderr, "The progress meter has been disabled: " | |
"stderr isn't attached to a terminal.\n"); | |
fflush(stderr); | |
} | |
curl_easy_setopt(curl, CURLOPT_AUTOREFERER, 1L); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); | |
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 20L); | |
/* For security reasons we only allow redirects to safe redirect protocols */ | |
curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, | |
CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FTP | CURLPROTO_FTPS); | |
if(*proxy) | |
curl_easy_setopt(curl, CURLOPT_PROXY, proxy); | |
curl_easy_setopt(curl, CURLOPT_URL, url); | |
/* curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); */ | |
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errbuf); | |
res = curl_easy_perform(curl); | |
if(progress_data.type != PROGRESS_NONE) | |
fprintf(stderr, "\n"); /* linefeed for progress meter */ | |
if(res != CURLE_OK) { | |
size_t len = strlen(errbuf); | |
fprintf(stderr, "\nError: libcurl: (%d) ", res); | |
if(len) | |
fprintf(stderr, "%s%s", errbuf, ((errbuf[len - 1] != '\n') ? "\n" : "")); | |
fprintf(stderr, "%s\n\n", curl_easy_strerror(res)); | |
goto cleanup; | |
} | |
curl_easy_getinfo(curl, CURLINFO_SPEED_DOWNLOAD, &average_speed); | |
curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &bytes_downloaded); | |
curl_easy_getinfo(curl, CURLINFO_TOTAL_TIME, &total_download_time); | |
fprintf(stderr, "\nTransfer rate: %.0f KB/sec" | |
" (%.0f bytes in %.0f seconds)\n", | |
average_speed / 1024, bytes_downloaded, total_download_time); | |
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &effective_url); | |
if(effective_url) { | |
long proxy_connect_code = 0; | |
long response_code = 0; | |
curl_easy_getinfo(curl, CURLINFO_HTTP_CONNECTCODE, &proxy_connect_code); | |
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); | |
if(response_code) { | |
/* The response code is from HTTP if the URL is HTTP or the proxy is | |
HTTP and CONNECT wasn't used for a proxy passthrough. */ | |
if(!strncasecmp(effective_url, "http://", 7) || | |
!strncasecmp(effective_url, "https://", 8) || | |
(proxy && !proxy_connect_code && | |
(!strstr(proxy, "://") || | |
!strncasecmp(proxy, "http://", 7) || | |
!strncasecmp(proxy, "https://", 8)))) { | |
if(response_code != 200) { | |
fprintf(stderr, "Error: HTTP response code is %ld.\n", | |
response_code); | |
goto cleanup; | |
} | |
} | |
else if(!strncasecmp(effective_url, "ftp://", 6)) { | |
if(response_code != 226) { | |
fprintf(stderr, "Error: FTP response code is %ld.\n", response_code); | |
goto cleanup; | |
} | |
} | |
} | |
} | |
retcode = TRUE; | |
cleanup: | |
curl_easy_cleanup(curl); | |
if(fp) | |
fclose(fp); | |
if(retcode == TRUE) { | |
unlink(filename_dst); | |
if(!rename(filename_tmp, filename_dst)) { | |
printf("Transfer successful. Filename: %s\n", filename_dst); | |
} | |
else { | |
fprintf(stderr, "Error: Transfer was successful but unable to rename " | |
"temp file to destination file. Temp file will not be " | |
"deleted.\n" | |
"temp: %s\n" | |
"dest: %s\n", | |
filename_tmp, filename_dst); | |
retcode = FALSE; | |
} | |
} | |
else { | |
if(filename_tmp && unlink(filename_tmp) == -1) | |
fprintf(stderr, "Error: Unable to remove temp file: %s\n", filename_tmp); | |
} | |
return retcode; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
if(argc != 3) { | |
fprintf(stderr, | |
"Usage: ProxyTest <proxy|\"\"> <url>\n" | |
"\n" | |
"Use libcurl to retrieve a file, using a proxy if specified, and write " | |
"that file to disk with a filename based on the filename in the URL.\n" | |
"\n" | |
"\n" | |
"Some details on how this program behaves:\n" | |
"\n" | |
"If the first argument is an empty string no proxy is used.\n" | |
"The proxy type defaults to HTTP unless you specify a scheme:\n" | |
"socks4://, socks4a://, socks5:// or socks5h://\n" | |
"\n" | |
"The URL type defaults to auto-detect unless you specify a scheme.\n" | |
"\n" | |
"This program will exit 0 on success. In that case the download was " | |
"successful and the file was saved to a local file in the current " | |
"directory. The file has a filename based on the filename in the URL.\n" | |
"\n" | |
"In detail, the download is written to a file based on the " | |
"filename in the URL + .(00-99) + .tmp until it is determined the " | |
"download was successful. In that case it's renamed to the filename in " | |
"the URL. The filenames may not be exact because forbidden characters " | |
"are replaced with underscores. Further: If there is no filename in the " | |
"URL the local file is named \"unknown\". If a file already exists it " | |
"is replaced.\n" | |
"\n" | |
"For example http://example.com/foo.zip is downloaded to foo.zip.00.tmp " | |
"and if the download is successful it's renamed foo.zip. If foo.zip " | |
"already exists it is replaced. If it cannot be replaced or the rename " | |
"fails the tmp file remains and the program exits without success " | |
"(!= 0).\n" | |
"\n" | |
"In detail, a download is only considered successful if libcurl says " | |
"the transfer is OK. Further, if a server response code was received " | |
"then in the case of an HTTP(S) proxy or URL the code must be 200; or " | |
"otherwise if FTP(S) then the code must be 226.\n"); | |
return EXIT_FAILURE; | |
} | |
if(curl_global_init(CURL_GLOBAL_ALL)) { | |
fprintf(stderr, "Fatal: The initialization of libcurl has failed.\n"); | |
return EXIT_FAILURE; | |
} | |
if(atexit(curl_global_cleanup)) { | |
fprintf(stderr, "Fatal: atexit failed to register curl_global_cleanup.\n"); | |
curl_global_cleanup(); | |
return EXIT_FAILURE; | |
} | |
if(!ProxyTest(argv[1], argv[2])) { | |
fprintf(stderr, "Fatal: ProxyTest failed.\n"); | |
return EXIT_FAILURE; | |
} | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment