Created
August 13, 2018 15:50
-
-
Save sonictk/b31580160eeb497e32e32480d3003e9d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <arpa/inet.h> | |
#include <fcntl.h> | |
#include <netdb.h> | |
#include <sys/socket.h> | |
#include <sys/types.h> | |
#include <unistd.h> | |
#include <cstdlib> | |
FileType translateToContentType(const char *contentTypeStr) | |
{ | |
char *searchStr = (char *)malloc((strlen(contentTypeStr) + 1) * sizeof(char)); | |
strcpy(searchStr, contentTypeStr); | |
char *found = strtok(searchStr, "/"); | |
if (found == NULL) { | |
cerr << "Content type request was formatted incorrectly!\n"; | |
free(searchStr); | |
return FileType::UNKNOWN; | |
} | |
if (strstr(found, "application") > 0 || | |
strstr(found, "audio") > 0 || | |
strstr(found, "font") > 0 || | |
strstr(found, "image") > 0 || | |
strstr(found, "video") > 0) { | |
free(searchStr); | |
return FileType::BINARY; | |
} else { | |
free(searchStr); | |
return FileType::TEXT; | |
} | |
} | |
FileType getContentType(const char *request) | |
{ | |
const char *found = strstr(request, contentTypeToken); | |
if (found == NULL) { | |
cerr << "Could not find " << contentTypeToken << "in request header!\n"; | |
return FileType::UNKNOWN; | |
} | |
const char *lineEnd = strstr(found, "\r\n\r\n"); | |
if (lineEnd == NULL) { | |
cerr << "Request header was not formatted correctly!\n"; | |
return FileType::UNKNOWN; | |
} | |
unsigned int contentTypeRequestLen = strlen(found) - strlen(lineEnd) + 1; | |
char *contentTypeRequest = (char *)malloc(contentTypeRequestLen * sizeof(char)); | |
strncpy(contentTypeRequest, found, contentTypeRequestLen - 1); | |
contentTypeRequest[contentTypeRequestLen] = '\0'; | |
FileType contentType = translateToContentType(contentTypeRequest); | |
free(contentTypeRequest); | |
return contentType; | |
} | |
unsigned int getLengthOfHTTPRequest(const char *resourcePath, const char *domainName) | |
{ | |
unsigned int len = strlen(resourcePath) + strlen(domainName) + strlen(HTTPRequestTemplate) - 3; | |
return len; | |
} | |
int formatHTTPRequest(const char *resourcePath, | |
const char *domainName, | |
char *buf, | |
unsigned int size) | |
{ | |
int len = snprintf(buf, size, HTTPRequestTemplate, resourcePath, domainName); | |
if (len < 0) { | |
cerr << "Could not format HTTP request: " << strerror(errno) << "\n"; | |
return 0; | |
} | |
return len; | |
} | |
int getSizeOfContent(const char *response) | |
{ | |
// TODO: (sonictk) Check if other types of status codes (like 201) need to be | |
// accounted for | |
const char *statusCode = strstr(response, statusCodeToken); | |
if (statusCode == NULL) { | |
return -1; | |
} | |
const char *contentLengthFound = strstr(response, contentLengthToken); | |
if (contentLengthFound == NULL) { | |
return -1; | |
} | |
// TODO: (sonictk) Might be a better way of parsing the HTTP response than doing this? | |
unsigned int contentLengthLenStart = strlen(contentLengthFound) - strlen(contentLengthToken) + 1; | |
char *contentLengthStart = (char *)malloc(contentLengthLenStart * sizeof(char)); | |
strncpy(contentLengthStart, contentLengthFound + strlen(contentLengthToken), contentLengthLenStart); | |
const char *contentLengthEnd = strstr(contentLengthStart, "\r\n"); | |
unsigned int contentLengthLen = strlen(contentLengthStart) - strlen(contentLengthEnd) + 1; | |
char *contentLengthStr = (char *)malloc(contentLengthLen * sizeof(char)); | |
strncpy(contentLengthStr, contentLengthStart, contentLengthLen - 1); | |
int contentLength = atoi(contentLengthStr); | |
free(contentLengthStart); | |
free(contentLengthStr); | |
if (contentLength == 0) { | |
return -1; | |
} | |
return contentLength; | |
} | |
int getLengthOfHTTPHeader(char *response) | |
{ | |
const char *delimiter = "\r\n\r\n"; | |
char *contentStart = strstr(response, delimiter); | |
if (contentStart == NULL) { | |
return -1; | |
} | |
unsigned int headerLen = strlen(response) - strlen(contentStart) + strlen(delimiter); | |
return headerLen; | |
} | |
int writeToFile(FileType contentType, FILE *filep, void *content, unsigned int size) | |
{ | |
int writeLen = 0; | |
switch (contentType) { | |
case FileType::TEXT: | |
writeLen = fprintf(filep, "%.*s", size, (char *)content); | |
break; | |
case FileType::BINARY: | |
writeLen = fwrite(content, sizeof(char), size, filep); | |
break; | |
default : | |
cerr << "Unknown content type!\n"; | |
return -1; | |
} | |
return writeLen; | |
} | |
int downloadFile(const char *url, const char *outputPath, unsigned int timeoutMax) | |
{ | |
if (url == NULL) { | |
cerr << "No valid URL provided!\n"; | |
return -1; | |
} | |
if (outputPath == NULL) { | |
cerr << "No valid outpath path specified!\n"; | |
return -1; | |
} | |
// NOTE: (sonictk) Find the domain name from the whole URL given so that we | |
// can convert it to a IP address that can be used for opening a socket with | |
// TODO: (sonictk) See if malloc-ing is the right thing to do or allocate from | |
// a pool instead | |
unsigned int urlLen = strlen(url); | |
char *searchStr = (char *)malloc((urlLen + 1) * sizeof(char)); | |
strcpy(searchStr, url); | |
const char *protocolSeparator = "://"; | |
unsigned int protocolSeparatorLen = strlen(protocolSeparator); | |
char *domainNameBuffer = (char *)malloc((urlLen + 1) * sizeof(char)); | |
char *delimiter = strstr(searchStr, protocolSeparator); | |
if (delimiter != NULL) { | |
char *domainNameBufferTmp = delimiter + protocolSeparatorLen; | |
strcpy(domainNameBuffer, domainNameBufferTmp); | |
} else { | |
strcpy(domainNameBuffer, url); | |
} | |
strtok(domainNameBuffer, "/"); | |
size_t domainNameLen = strlen(domainNameBuffer); | |
char *domainName = (char *)malloc((domainNameLen + 1) * sizeof(char)); | |
int downloadResult = 0; | |
strcpy(domainName, domainNameBuffer); | |
if (strlen(domainName) == 0) { | |
cerr << "Could not determine the domain name!\n"; | |
free(searchStr); | |
free(domainNameBuffer); | |
free(domainName); | |
return -1; | |
} | |
unsigned int delimiterLen; | |
if (delimiter == NULL) { | |
delimiterLen = 0; | |
} else { | |
delimiterLen = strlen(delimiter); | |
} | |
unsigned int protocolLen = urlLen - strlen(delimiter); | |
unsigned int resourcePathLen = urlLen - protocolLen - domainNameLen - protocolSeparatorLen; | |
char *resourcePath = (char *)malloc((resourcePathLen + 1) * sizeof(char)); | |
unsigned int offset = domainNameLen + urlLen - delimiterLen + protocolSeparatorLen; | |
strcpy(resourcePath, url + offset); | |
struct addrinfo *addressResult, *iter, hints; | |
hints.ai_flags = AI_V4MAPPED|AI_ADDRCONFIG; | |
hints.ai_family = AF_INET; // NOTE: (sonictk) This forces IPv4 addresses only | |
hints.ai_socktype = SOCK_STREAM; | |
hints.ai_protocol = IPPROTO_TCP; | |
hints.ai_addrlen = 0; | |
hints.ai_addr = NULL; | |
hints.ai_canonname = NULL; | |
hints.ai_next = NULL; | |
int result = getaddrinfo(domainName, "80", &hints, &addressResult); | |
if (result != 0) { | |
if (result == EAI_SYSTEM) { | |
cerr << "Could not get address information! System error\n"; | |
} else { | |
cerr << "Error in getaddrinfo: " << gai_strerror(result) << "\n"; | |
} | |
free(searchStr); | |
free(domainNameBuffer); | |
free(domainName); | |
free(resourcePath); | |
return -1; | |
} | |
FILE *filep = fopen(outputPath, "wb"); | |
if (filep == NULL) { | |
cerr << "Could not open file for writing: " << strerror(errno) << "\n"; | |
free(searchStr); | |
free(domainNameBuffer); | |
free(domainName); | |
free(resourcePath); | |
return -1; | |
} | |
for (iter = addressResult; iter != NULL; iter = iter->ai_next) { | |
int socketFileDesc = socket(iter->ai_family, | |
iter->ai_socktype, | |
iter->ai_protocol); | |
int optionVal = 1; | |
// NOTE: (sonictk) Allow socket to be re-used, otherwise the port will enter | |
// timeout state during which time cannot be re-bound to a new socket after | |
// the first socket is closed | |
setsockopt(socketFileDesc, | |
SOL_SOCKET, | |
SO_REUSEPORT, | |
&optionVal, | |
sizeof(optionVal)); | |
// NOTE: (sonictk) Set socket to have non-blocking behaviour so that this | |
// can be called from a main thread without blocking the application | |
fcntl(socketFileDesc, F_SETFL, O_NONBLOCK); | |
if (socketFileDesc == -1) { | |
cerr << strerror(errno) << "\n"; | |
continue; | |
} | |
connect(socketFileDesc, iter->ai_addr, iter->ai_addrlen); | |
struct timeval timeout; | |
timeout.tv_sec = timeoutMax; | |
timeout.tv_usec = 0; | |
fd_set readFileDescs, writeFileDescs; | |
FD_ZERO(&readFileDescs); | |
FD_ZERO(&writeFileDescs); | |
FD_SET(socketFileDesc, &writeFileDescs); | |
FD_SET(socketFileDesc, &readFileDescs); | |
int fileDescs = select(socketFileDesc + 1, | |
NULL, | |
&writeFileDescs, | |
NULL, | |
&timeout); | |
int responseRead = 0; | |
ssize_t lenResponse = 0; | |
switch (fileDescs) { | |
case 0: | |
cerr << "Timeout limit hit!\n"; | |
break; | |
case -1: | |
cerr << "Error occurred during poll of socket: " << strerror(errno) << "\n"; | |
break; | |
default: | |
int fileSize = 0; | |
int soError; | |
socklen_t length = sizeof(soError); | |
getsockopt(socketFileDesc, SOL_SOCKET, SO_ERROR, &soError, &length); | |
if (soError == 0) { | |
time_t start = time(NULL); | |
time_t timeoutLimit = timeoutMax; | |
time_t end = start + timeoutLimit; | |
unsigned int requestLen = getLengthOfHTTPRequest(resourcePath, domainName); | |
if (requestLen == 0) { | |
cerr << "Invalid HTTP request!\n"; | |
return -1; | |
} | |
char *request = (char *)malloc(requestLen * sizeof(char)); | |
formatHTTPRequest(resourcePath, domainName, request, requestLen); | |
while (start < end) { | |
if (FD_ISSET(socketFileDesc, &writeFileDescs)) { | |
write(socketFileDesc, request, strlen(request)); | |
break; | |
} | |
start = time(NULL); | |
} | |
free(request); | |
start = time(NULL); | |
end = start + timeoutLimit; | |
char response[bufferSize]; | |
FileType contentType = FileType::UNKNOWN; | |
while (start < end) { | |
memset(response, 0, bufferSize); | |
lenResponse = read(socketFileDesc, response, bufferSize); | |
std::cout << downloadResult << " of " << fileSize << std::endl; | |
// NOTE: (sonictk) If the file has finished downloading, stop reading | |
if (downloadResult > 0 && downloadResult >= fileSize) { | |
break; | |
} | |
if (lenResponse <= 0) { | |
// NOTE: (sonictk) If the read failed, retry after a short timeout | |
usleep(100000); | |
} else { | |
if (contentType == FileType::UNKNOWN) { | |
contentType = getContentType(response); | |
} | |
responseRead += lenResponse; | |
// TODO: (sonictk) Figure out how to condense this | |
if (fileSize == 0) { | |
fileSize = getSizeOfContent(response); | |
if (fileSize > 0) { | |
int headerLen = getLengthOfHTTPHeader(response); | |
if (headerLen == -1) { | |
cerr << "Invalid HTTP header!\n"; | |
continue; | |
} | |
memset(response, 0, headerLen); | |
// TODO: (sonictk) Account for if filesize is less than buffer size | |
downloadResult = writeToFile(contentType, | |
filep, | |
response + headerLen, | |
bufferSize - headerLen); | |
} | |
} else { | |
size_t bytesToWrite = fileSize - downloadResult; | |
bytesToWrite = bytesToWrite > bufferSize ? bufferSize : bytesToWrite; | |
downloadResult += writeToFile(contentType, | |
filep, | |
response, | |
bytesToWrite); | |
} | |
} | |
start = time(NULL); | |
} | |
} else { | |
cerr << "Error occurred: " << strerror(errno) << "\n"; | |
} | |
break; | |
} | |
// NOTE: (sonictk) If the download failed to complete before the timeout expired | |
if (responseRead < lenResponse) { | |
cerr << "File was not fully downloaded within the timeout!\n"; | |
return -1; | |
} | |
// NOTE: (sonictk) Shutdown tells server also that there's no need to | |
// send any more data | |
shutdown(socketFileDesc, SHUT_RDWR); | |
close(socketFileDesc); | |
} | |
freeaddrinfo(iter); | |
fclose(filep); | |
free(searchStr); | |
free(domainNameBuffer); | |
free(domainName); | |
free(resourcePath); | |
return downloadResult; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment