Skip to content

Instantly share code, notes, and snippets.

@oatmealraisin
Created July 5, 2018 17:37
Show Gist options
  • Save oatmealraisin/725af59f03f20a22b578776d4c3f3c0b to your computer and use it in GitHub Desktop.
Save oatmealraisin/725af59f03f20a22b578776d4c3f3c0b to your computer and use it in GitHub Desktop.
#include <curl/multi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/select.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <zlib.h>
#define MAX_WAIT_MSECS 30*1000 /* Wait max. 30 seconds */
#define CHUNK 8192
#define MNIST_DATA_DIR "/tmp/mnist"
#define MNIST_TR_IMG_FILE "train-images-idx3-ubyte"
#define MNIST_TR_LAB_FILE "train-labels-idx1-ubyte"
#define MNIST_TE_IMG_FILE "t10k-images-idx3-ubyte"
#define MNIST_TE_LAB_FILE "t10k-labels-idx1-ubyte"
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
# include <io.h>
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
#else
# define SET_BINARY_MODE(file)
#endif
static char *mnist_training_images_url = "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz";
static char *mnist_training_labels_url = "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz";
static char *mnist_testing_images_url = "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz";
static char *mnist_testing_labels_url = "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz";
unsigned char decompressGzip(const char *gzFileName, const char *destFileName) {
gzFile gzf = gzopen(gzFileName, "rb");
FILE *destFile = fopen(destFileName, "wb");
char buffer[CHUNK];
int num_read = 0;
while((num_read = gzread(gzf, buffer, sizeof(buffer))) > 0) {
fwrite(buffer, 1, num_read, destFile);
}
gzclose(gzf);
fclose(destFile);
remove(gzFileName);
return 0;
}
char *fullname(char *dir, char *filename) {
char *fullname = malloc(sizeof(char) * (strlen(filename) + strlen(dir) + 2));
fullname[0] = '\0';
strcat(fullname, dir);
strcat(fullname, "/");
strcat(fullname, filename);
return fullname;
}
unsigned char checkToDownload(CURL *eh, char *url, char *filename, FILE *fh) {
unsigned char rc;
struct stat st = {0};
if(stat(filename, &st) != -1) {
return 1;
}
printf("fdsa\n");
eh = curl_easy_init();
fh = fopen(filename, "wb");
rc = curl_easy_setopt(eh, CURLOPT_WRITEDATA, fh);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(eh, CURLOPT_URL, mnist_training_images_url);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(eh, CURLOPT_NOPROGRESS, 0);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(eh, CURLOPT_VERBOSE, 0L);
if(rc != CURLM_OK) goto cleanup;
normal:
return 0;
cleanup:
return 2;
}
unsigned char mnist_download(char *dir) {
struct stat st = {0};
if(dir == NULL) {
dir = MNIST_DATA_DIR;
}
/*
There is a lot of duplicated effort. The only reason we don't create a
routine is because we call curl_multi functions, which wouldn't work if
everything was in a routine.
*/
char *name_trImg = fullname(dir, MNIST_TR_IMG_FILE);
char *name_trLab = fullname(dir, MNIST_TR_LAB_FILE);
char *name_teImg = fullname(dir, MNIST_TE_IMG_FILE);
char *name_teLab = fullname(dir, MNIST_TE_LAB_FILE);
char *name_trImgGz = malloc(sizeof(char) * (strlen(name_trImg) + 4));
char *name_trLabGz = malloc(sizeof(char) * (strlen(name_trLab) + 4));
char *name_teImgGz = malloc(sizeof(char) * (strlen(name_teImg) + 4));
char *name_teLabGz = malloc(sizeof(char) * (strlen(name_teLab) + 4));
name_trImgGz[0] = '\0';
name_trLabGz[0] = '\0';
name_teImgGz[0] = '\0';
name_teLabGz[0] = '\0';
strcat(name_trImgGz, name_trImg);
strcat(name_trLabGz, name_trLab);
strcat(name_teImgGz, name_teImg);
strcat(name_teLabGz, name_teLab);
strcat(name_trImgGz, ".gz");
strcat(name_trLabGz, ".gz");
strcat(name_teImgGz, ".gz");
strcat(name_teLabGz, ".gz");
if(stat(dir, &st) == -1) {
mkdir(dir, S_IRWXU | S_IRWXG | S_IRWXO );
}
curl_global_init(CURL_GLOBAL_ALL);
CURLM *mHandle = curl_multi_init();
CURLMcode rc;
CURL *trImgCurl,
*trLabCurl,
*teImgCurl,
*teLabCurl;
FILE *trImgFile,
*trLabFile,
*teImgFile,
*teLabFile;
int numDownloads = 0;
if(-1 == stat(name_trImg, &st)) {
trImgCurl = curl_easy_init();
trImgFile = fopen(name_trImgGz, "wb");
rc = curl_easy_setopt(trImgCurl, CURLOPT_WRITEDATA, trImgFile);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(trImgCurl, CURLOPT_URL, mnist_training_images_url);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(trImgCurl, CURLOPT_NOPROGRESS, 1);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(trImgCurl, CURLOPT_VERBOSE, 0L);
if(rc != CURLM_OK) goto cleanup;
rc = curl_multi_add_handle(mHandle, trImgCurl);
if(rc != CURLM_OK) goto cleanup;
numDownloads++;
}
if(-1 == stat(name_trLab, &st)) {
trLabCurl = curl_easy_init();
trLabFile = fopen(name_trLabGz, "wb");
rc = curl_easy_setopt(trLabCurl, CURLOPT_WRITEDATA, trLabFile);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(trLabCurl, CURLOPT_URL, mnist_training_labels_url);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(trLabCurl, CURLOPT_NOPROGRESS, 1);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(trLabCurl, CURLOPT_VERBOSE, 0L);
if(rc != CURLM_OK) goto cleanup;
rc = curl_multi_add_handle(mHandle, trLabCurl);
if(rc != CURLM_OK) goto cleanup;
numDownloads++;
}
if(-1 == stat(name_teImg, &st)) {
teImgCurl = curl_easy_init();
teImgFile = fopen(name_teImgGz, "wb");
rc = curl_easy_setopt(teImgCurl, CURLOPT_WRITEDATA, teImgFile);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(teImgCurl, CURLOPT_URL, mnist_testing_images_url);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(teImgCurl, CURLOPT_NOPROGRESS, 1);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(teImgCurl, CURLOPT_VERBOSE, 0L);
if(rc != CURLM_OK) goto cleanup;
rc = curl_multi_add_handle(mHandle, teImgCurl);
if(rc != CURLM_OK) goto cleanup;
numDownloads++;
}
if(-1 == stat(name_teLab, &st)) {
teLabCurl = curl_easy_init();
teLabFile = fopen(name_teLabGz, "wb");
rc = curl_easy_setopt(teLabCurl, CURLOPT_WRITEDATA, teLabFile);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(teLabCurl, CURLOPT_URL, mnist_testing_labels_url);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(teLabCurl, CURLOPT_NOPROGRESS, 1);
if(rc != CURLM_OK) goto cleanup;
rc = curl_easy_setopt(teLabCurl, CURLOPT_VERBOSE, 0L);
if(rc != CURLM_OK) goto cleanup;
rc = curl_multi_add_handle(mHandle, teLabCurl);
if(rc != CURLM_OK) goto cleanup;
numDownloads++;
}
// If 0, we've already downloaded what we need
if(numDownloads == 0) {
return 1;
}
int still_running = 0;
int numfds = 0;
int repeats = 0;
do {
rc = curl_multi_perform(mHandle, &still_running);
if(rc != CURLM_OK) return rc;
rc = curl_multi_wait(mHandle, NULL, 0, MAX_WAIT_MSECS, &numfds);
if(rc != CURLM_OK) return rc;
if(!numfds) {
if(repeats > 1) sleep(5);
} else {
repeats = 0;
}
} while (still_running);
if(!trImgCurl) curl_easy_cleanup(trImgCurl);
if(!trLabCurl) curl_easy_cleanup(trLabCurl);
if(!teImgCurl) curl_easy_cleanup(teImgCurl);
if(!teLabCurl) curl_easy_cleanup(teLabCurl);
rc = curl_multi_cleanup(mHandle);
if(rc != CURLM_OK) return rc;
curl_global_cleanup();
if(rc != CURLM_OK) return rc;
if(!trImgFile) fclose(trImgFile);
if(!trLabFile) fclose(trLabFile);
if(!teImgFile) fclose(teImgFile);
if(!teLabFile) fclose(teLabFile);
decompressGzip(name_trImgGz, name_trImg);
decompressGzip(name_trLabGz, name_trLab);
decompressGzip(name_teImgGz, name_teImg);
decompressGzip(name_teLabGz, name_teLab);
free(name_trImg);
free(name_trLab);
free(name_teImg);
free(name_teLab);
free(name_trImgGz);
free(name_trLabGz);
free(name_teImgGz);
free(name_teLabGz);
return 0;
cleanup:
return 2;
}
int main() {
int result = mnist_download("/home/ryan/documents/projects/llml/");
printf("Result: %d\n", result);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment