Created
July 5, 2018 17:37
-
-
Save oatmealraisin/725af59f03f20a22b578776d4c3f3c0b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <curl/multi.h> | |
#include <string.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sys/select.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
#include <zlib.h> | |
#define MAX_WAIT_MSECS 30*1000 /* Wait max. 30 seconds */ | |
#define CHUNK 8192 | |
#define MNIST_DATA_DIR "/tmp/mnist" | |
#define MNIST_TR_IMG_FILE "train-images-idx3-ubyte" | |
#define MNIST_TR_LAB_FILE "train-labels-idx1-ubyte" | |
#define MNIST_TE_IMG_FILE "t10k-images-idx3-ubyte" | |
#define MNIST_TE_LAB_FILE "t10k-labels-idx1-ubyte" | |
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) | |
# include <fcntl.h> | |
# include <io.h> | |
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) | |
#else | |
# define SET_BINARY_MODE(file) | |
#endif | |
static char *mnist_training_images_url = "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"; | |
static char *mnist_training_labels_url = "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz"; | |
static char *mnist_testing_images_url = "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz"; | |
static char *mnist_testing_labels_url = "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz"; | |
unsigned char decompressGzip(const char *gzFileName, const char *destFileName) { | |
gzFile gzf = gzopen(gzFileName, "rb"); | |
FILE *destFile = fopen(destFileName, "wb"); | |
char buffer[CHUNK]; | |
int num_read = 0; | |
while((num_read = gzread(gzf, buffer, sizeof(buffer))) > 0) { | |
fwrite(buffer, 1, num_read, destFile); | |
} | |
gzclose(gzf); | |
fclose(destFile); | |
remove(gzFileName); | |
return 0; | |
} | |
char *fullname(char *dir, char *filename) { | |
char *fullname = malloc(sizeof(char) * (strlen(filename) + strlen(dir) + 2)); | |
fullname[0] = '\0'; | |
strcat(fullname, dir); | |
strcat(fullname, "/"); | |
strcat(fullname, filename); | |
return fullname; | |
} | |
unsigned char checkToDownload(CURL *eh, char *url, char *filename, FILE *fh) { | |
unsigned char rc; | |
struct stat st = {0}; | |
if(stat(filename, &st) != -1) { | |
return 1; | |
} | |
printf("fdsa\n"); | |
eh = curl_easy_init(); | |
fh = fopen(filename, "wb"); | |
rc = curl_easy_setopt(eh, CURLOPT_WRITEDATA, fh); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(eh, CURLOPT_URL, mnist_training_images_url); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(eh, CURLOPT_NOPROGRESS, 0); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(eh, CURLOPT_VERBOSE, 0L); | |
if(rc != CURLM_OK) goto cleanup; | |
normal: | |
return 0; | |
cleanup: | |
return 2; | |
} | |
unsigned char mnist_download(char *dir) { | |
struct stat st = {0}; | |
if(dir == NULL) { | |
dir = MNIST_DATA_DIR; | |
} | |
/* | |
There is a lot of duplicated effort. The only reason we don't create a | |
routine is because we call curl_multi functions, which wouldn't work if | |
everything was in a routine. | |
*/ | |
char *name_trImg = fullname(dir, MNIST_TR_IMG_FILE); | |
char *name_trLab = fullname(dir, MNIST_TR_LAB_FILE); | |
char *name_teImg = fullname(dir, MNIST_TE_IMG_FILE); | |
char *name_teLab = fullname(dir, MNIST_TE_LAB_FILE); | |
char *name_trImgGz = malloc(sizeof(char) * (strlen(name_trImg) + 4)); | |
char *name_trLabGz = malloc(sizeof(char) * (strlen(name_trLab) + 4)); | |
char *name_teImgGz = malloc(sizeof(char) * (strlen(name_teImg) + 4)); | |
char *name_teLabGz = malloc(sizeof(char) * (strlen(name_teLab) + 4)); | |
name_trImgGz[0] = '\0'; | |
name_trLabGz[0] = '\0'; | |
name_teImgGz[0] = '\0'; | |
name_teLabGz[0] = '\0'; | |
strcat(name_trImgGz, name_trImg); | |
strcat(name_trLabGz, name_trLab); | |
strcat(name_teImgGz, name_teImg); | |
strcat(name_teLabGz, name_teLab); | |
strcat(name_trImgGz, ".gz"); | |
strcat(name_trLabGz, ".gz"); | |
strcat(name_teImgGz, ".gz"); | |
strcat(name_teLabGz, ".gz"); | |
if(stat(dir, &st) == -1) { | |
mkdir(dir, S_IRWXU | S_IRWXG | S_IRWXO ); | |
} | |
curl_global_init(CURL_GLOBAL_ALL); | |
CURLM *mHandle = curl_multi_init(); | |
CURLMcode rc; | |
CURL *trImgCurl, | |
*trLabCurl, | |
*teImgCurl, | |
*teLabCurl; | |
FILE *trImgFile, | |
*trLabFile, | |
*teImgFile, | |
*teLabFile; | |
int numDownloads = 0; | |
if(-1 == stat(name_trImg, &st)) { | |
trImgCurl = curl_easy_init(); | |
trImgFile = fopen(name_trImgGz, "wb"); | |
rc = curl_easy_setopt(trImgCurl, CURLOPT_WRITEDATA, trImgFile); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(trImgCurl, CURLOPT_URL, mnist_training_images_url); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(trImgCurl, CURLOPT_NOPROGRESS, 1); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(trImgCurl, CURLOPT_VERBOSE, 0L); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_multi_add_handle(mHandle, trImgCurl); | |
if(rc != CURLM_OK) goto cleanup; | |
numDownloads++; | |
} | |
if(-1 == stat(name_trLab, &st)) { | |
trLabCurl = curl_easy_init(); | |
trLabFile = fopen(name_trLabGz, "wb"); | |
rc = curl_easy_setopt(trLabCurl, CURLOPT_WRITEDATA, trLabFile); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(trLabCurl, CURLOPT_URL, mnist_training_labels_url); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(trLabCurl, CURLOPT_NOPROGRESS, 1); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(trLabCurl, CURLOPT_VERBOSE, 0L); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_multi_add_handle(mHandle, trLabCurl); | |
if(rc != CURLM_OK) goto cleanup; | |
numDownloads++; | |
} | |
if(-1 == stat(name_teImg, &st)) { | |
teImgCurl = curl_easy_init(); | |
teImgFile = fopen(name_teImgGz, "wb"); | |
rc = curl_easy_setopt(teImgCurl, CURLOPT_WRITEDATA, teImgFile); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(teImgCurl, CURLOPT_URL, mnist_testing_images_url); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(teImgCurl, CURLOPT_NOPROGRESS, 1); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(teImgCurl, CURLOPT_VERBOSE, 0L); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_multi_add_handle(mHandle, teImgCurl); | |
if(rc != CURLM_OK) goto cleanup; | |
numDownloads++; | |
} | |
if(-1 == stat(name_teLab, &st)) { | |
teLabCurl = curl_easy_init(); | |
teLabFile = fopen(name_teLabGz, "wb"); | |
rc = curl_easy_setopt(teLabCurl, CURLOPT_WRITEDATA, teLabFile); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(teLabCurl, CURLOPT_URL, mnist_testing_labels_url); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(teLabCurl, CURLOPT_NOPROGRESS, 1); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_easy_setopt(teLabCurl, CURLOPT_VERBOSE, 0L); | |
if(rc != CURLM_OK) goto cleanup; | |
rc = curl_multi_add_handle(mHandle, teLabCurl); | |
if(rc != CURLM_OK) goto cleanup; | |
numDownloads++; | |
} | |
// If 0, we've already downloaded what we need | |
if(numDownloads == 0) { | |
return 1; | |
} | |
int still_running = 0; | |
int numfds = 0; | |
int repeats = 0; | |
do { | |
rc = curl_multi_perform(mHandle, &still_running); | |
if(rc != CURLM_OK) return rc; | |
rc = curl_multi_wait(mHandle, NULL, 0, MAX_WAIT_MSECS, &numfds); | |
if(rc != CURLM_OK) return rc; | |
if(!numfds) { | |
if(repeats > 1) sleep(5); | |
} else { | |
repeats = 0; | |
} | |
} while (still_running); | |
if(!trImgCurl) curl_easy_cleanup(trImgCurl); | |
if(!trLabCurl) curl_easy_cleanup(trLabCurl); | |
if(!teImgCurl) curl_easy_cleanup(teImgCurl); | |
if(!teLabCurl) curl_easy_cleanup(teLabCurl); | |
rc = curl_multi_cleanup(mHandle); | |
if(rc != CURLM_OK) return rc; | |
curl_global_cleanup(); | |
if(rc != CURLM_OK) return rc; | |
if(!trImgFile) fclose(trImgFile); | |
if(!trLabFile) fclose(trLabFile); | |
if(!teImgFile) fclose(teImgFile); | |
if(!teLabFile) fclose(teLabFile); | |
decompressGzip(name_trImgGz, name_trImg); | |
decompressGzip(name_trLabGz, name_trLab); | |
decompressGzip(name_teImgGz, name_teImg); | |
decompressGzip(name_teLabGz, name_teLab); | |
free(name_trImg); | |
free(name_trLab); | |
free(name_teImg); | |
free(name_teLab); | |
free(name_trImgGz); | |
free(name_trLabGz); | |
free(name_teImgGz); | |
free(name_teLabGz); | |
return 0; | |
cleanup: | |
return 2; | |
} | |
int main() { | |
int result = mnist_download("/home/ryan/documents/projects/llml/"); | |
printf("Result: %d\n", result); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment