Created
November 30, 2011 06:27
-
-
Save naquad/1408252 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <sys/stat.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <errno.h> | |
#include "parse_http.h" | |
http_request_t* http_request_create(size_t prealloc, size_t growth, void *userdata){ | |
http_request_t *ret; | |
if(!(ret = malloc(sizeof(*ret)))) | |
return NULL; | |
bzero(ret, sizeof(*ret)); | |
ret->userdata = userdata; | |
ret->buffer_growth = growth; | |
ret->buffer_allocated = prealloc; | |
ret->buffer_prealloc = prealloc; | |
if(prealloc && !(ret->buffer = malloc(prealloc))){ | |
free(ret); | |
return NULL; | |
} | |
ret->slider = ret->buffer; | |
return ret; | |
} | |
static inline void http_request_free_buffers(http_request_t *req){ | |
free(req->buffer); | |
free(req->path_unescaped); | |
free(req->query_unescaped); | |
free(req->body_unescaped); | |
} | |
void http_request_clear(http_request_t *req){ | |
void *ud; | |
size_t alloc, growth; | |
ud = req->userdata; | |
alloc = req->buffer_prealloc; | |
growth = req->buffer_growth; | |
http_request_free_buffers(req); | |
bzero(req, sizeof(*req)); | |
req->buffer_prealloc = alloc; | |
req->buffer_growth = growth; | |
req->userdata = ud; | |
if(alloc && (req->buffer = malloc(alloc))) | |
req->buffer_allocated = alloc; | |
req->slider = req->buffer; | |
} | |
void http_request_free(http_request_t *req){ | |
http_request_free_buffers(req); | |
free(req); | |
} | |
static char http_request_lowcase[] = "\0\0\0\0\0\0\0\0\0\t\n\0\0\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0 !\"#$%&'()*+,-./0123456789:;" | |
"<=>?@abcdefghijklmnopqrstuvwxy" | |
"z[\\]^_`abcdefghijklmnopqrstuvw" | |
"xyz{|}~\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; | |
static char http_request_upcase[] = "\0\0\0\0\0\0\0\0\0\t\n\0\0\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0 !\"#$%&'()*+,-./0123456789:;" | |
"<=>?@ABCDEFGHIJKLMNOPQRSTUVWXY" | |
"Z[\\]^_`ABCDEFGHIJKLMNOPQRSTUVW" | |
"XYZ{|}~\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; | |
static inline char http_request_tolower(char c){ | |
return http_request_lowcase[c] ? http_request_lowcase[c] : c; | |
} | |
static char* http_request_strtolower(char *c){ | |
char *o = c; | |
while(*c) | |
*c++ = http_request_lowcase[*c] ? http_request_lowcase[*c] : *c; | |
return o; | |
} | |
static char http_request_hexchar[] = { | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, | |
6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1 | |
}; | |
static char http_request_uri_decode(char *src){ | |
char *slider = src; | |
while(*src){ | |
switch(*src){ | |
case '+': | |
*slider++ = ' '; | |
++src; | |
break; | |
case '%': | |
if(http_request_hexchar[slider[1]] == -1 || http_request_hexchar[slider[2]] == -1) | |
return 1; | |
*slider++ = http_request_hexchar[slider[1]] << 4 | http_request_hexchar[slider[2]]; | |
src += 3; | |
break; | |
default: | |
++src; | |
++slider; | |
} | |
} | |
*slider = 0; | |
return 0; | |
} | |
#define SWITCH_FIELD(field) if(req->field)req->field = t + (req->field - req->buffer) | |
static char http_request_concat_buffer(http_request_t *req, const char *data, size_t len){ | |
size_t newlen = req->buffer_len + len; | |
if(newlen > req->buffer_allocated){ | |
char *t; | |
req->buffer_allocated = req->buffer_allocated + (req->buffer_growth > len ? req->buffer_growth : len); | |
if(!(t = realloc(req->buffer, req->buffer_allocated))) | |
return 1; | |
SWITCH_FIELD(slider); | |
SWITCH_FIELD(method_str); | |
SWITCH_FIELD(path); | |
SWITCH_FIELD(query); | |
SWITCH_FIELD(body); | |
SWITCH_FIELD(header_key); | |
SWITCH_FIELD(header_value); | |
req->slider = t + (req->slider - req->buffer); | |
req->buffer = t; | |
} | |
memcpy(req->buffer + req->buffer_len, data, len); | |
req->buffer_len = newlen; | |
return 0; | |
} | |
struct __http_method_struct { | |
http_request_method_t num; | |
const char *str; | |
} __http_method_string[] = { | |
{http_method_get , "GET"}, | |
{http_method_head , "HEAD"}, | |
{http_method_post , "POST"}, | |
{http_method_put , "PUT"}, | |
{http_method_delete , "DELETE"}, | |
{http_method_mkcol , "MKCOL"}, | |
{http_method_copy , "COPY"}, | |
{http_method_move , "MOVE"}, | |
{http_method_options , "OPTIONS"}, | |
{http_method_propfind , "PROPFIND"}, | |
{http_method_proppatch , "PROPPATCH"}, | |
{http_method_lock , "LOCK"}, | |
{http_method_unlock , "UNLOCK"}, | |
{http_method_patch , "PATCH"}, | |
{http_method_trace , "TRACE"}, | |
{http_method_unknown , "UNKNOWN"} | |
}; | |
#define CURR_CHAR (*(req->slider)) | |
#define IS_SPACE (CURR_CHAR == ' ' || CURR_CHAR == '\t') | |
#define IS_NEWLINE (CURR_CHAR == '\r' || CURR_CHAR == '\n') | |
#define EXPECT_ST(pr, nst, nsp, st) do{\ | |
if(pr)\ | |
--req->slider;\ | |
req->next_ptr = (nsp);\ | |
req->next_state = (nst);\ | |
req->state = (st);\ | |
}while(0) | |
#define EXPECT_SPACES(nst, nsp, pre) EXPECT_ST(pre, nst, nsp, http_state_spaces) | |
#define EXPECT_NEWLINE(nst, nsp, pre) EXPECT_ST(pre, nst, nsp, http_state_newline) | |
#define EXPECT_SPACES_NEWLINE(nst, nsp, pre) EXPECT_ST(pre, nst, nsp, http_state_spaces_newline) | |
http_request_state_t http_request_parse_chunk(http_request_t *req, const char *data, size_t len){ | |
char *max, seen_newline = 0; | |
if(http_request_concat_buffer(req, data, len)) | |
return req->state = http_state_allocation_error; | |
for(max = req->buffer + req->buffer_len; req->slider < max && req->state != http_state_parse_error; ++req->slider){ | |
if(req->state != http_state_body && !http_request_upcase[CURR_CHAR]) | |
return req->state = http_state_parse_error; | |
switch(req->state){ | |
case http_state_started: | |
EXPECT_SPACES(http_state_method, &req->method_str, 1); | |
break; | |
case http_state_method: | |
if(IS_SPACE){ | |
struct __http_method_struct *meth; | |
CURR_CHAR = 0; | |
for(meth = __http_method_string; meth->num; ++meth) | |
if(!strcasecmp(req->method_str, meth->str)) | |
break; | |
if(!(req->method = meth->num)) | |
return req->state = http_state_parse_error; | |
EXPECT_SPACES(http_state_uri, &req->path, 0); | |
} | |
break; | |
case http_state_uri: | |
if(IS_SPACE) | |
EXPECT_SPACES(http_state_h, NULL, 1); | |
else if(CURR_CHAR == '?'){ | |
CURR_CHAR = 0; | |
req->query = req->slider + 1; | |
req->state = http_state_query; | |
} | |
break; | |
case http_state_query: | |
if(IS_SPACE) | |
EXPECT_SPACES(http_state_h, NULL, 1); | |
break; | |
case http_state_h: | |
req->state = http_request_upcase[CURR_CHAR] == 'H' ? http_state_ht : http_state_parse_error; | |
break; | |
case http_state_ht: | |
req->state = http_request_upcase[CURR_CHAR] == 'T' ? http_state_htt : http_state_parse_error; | |
break; | |
case http_state_htt: | |
req->state = http_request_upcase[CURR_CHAR] == 'T' ? http_state_http : http_state_parse_error; | |
break; | |
case http_state_http: | |
req->state = http_request_upcase[CURR_CHAR] == 'P' ? http_state_https : http_state_parse_error; | |
break; | |
case http_state_https: | |
req->state = CURR_CHAR == '/' ? http_state_major : http_state_parse_error; | |
break; | |
case http_state_major: | |
if(CURR_CHAR >= '0' && CURR_CHAR <= '9') | |
req->http_major = req->http_major * 10 + CURR_CHAR - '0'; | |
else | |
req->state = CURR_CHAR == '.' ? http_state_minor : http_state_parse_error; | |
break; | |
case http_state_minor: | |
if(CURR_CHAR >= '0' && CURR_CHAR <= '9') | |
req->http_minor = req->http_minor * 10 + CURR_CHAR - '0'; | |
else if(IS_SPACE || IS_NEWLINE) | |
EXPECT_SPACES_NEWLINE(http_state_header_key, &req->header_key, 1); | |
else | |
req->state = http_state_parse_error; | |
break; | |
case http_state_header_key: | |
if(IS_SPACE) | |
EXPECT_SPACES(http_state_header_separator, NULL, 1); | |
else if(CURR_CHAR == ':'){ | |
CURR_CHAR = 0; | |
EXPECT_SPACES(http_state_header_value, &req->header_value, 0); | |
}else if(IS_NEWLINE){ | |
if(req->content_length) | |
EXPECT_NEWLINE(http_state_body, &req->body, 1); | |
else | |
return req->state = http_state_done; | |
} | |
break; | |
case http_state_header_separator: | |
if(CURR_CHAR != ':') | |
return req->state = http_state_parse_error; | |
CURR_CHAR = 0; | |
EXPECT_SPACES(http_state_header_value, &req->header_value, 0); | |
break; | |
case http_state_header_value: | |
if(IS_NEWLINE){ | |
if(!strcasecmp(req->header_key, "content-length")){ | |
char *t; | |
errno = 0; | |
req->content_length = strtol(req->header_value, &t, 10); | |
if(req->content_length < 0 || errno == ERANGE || (t && !strchr(" \r\n\t", *t))) | |
return req->state = http_state_parse_error; | |
} | |
seen_newline = CURR_CHAR; | |
CURR_CHAR = 0; | |
EXPECT_SPACES_NEWLINE(http_state_header_key, &req->header_key, 0); | |
} | |
break; | |
case http_state_body: | |
if(req->slider - req->body >= req->content_length - 1) | |
req->state = http_state_done; | |
break; | |
case http_state_newline: | |
case http_state_spaces: | |
case http_state_spaces_newline: | |
if((req->state == http_state_newline || !IS_SPACE) && (req->state == http_state_spaces || (seen_newline == '\n' || CURR_CHAR == '\r' && seen_newline == '\r') || !IS_NEWLINE)){ | |
req->state = req->next_state; | |
req->next_state = http_state_parse_error; | |
if(req->next_ptr) | |
*req->next_ptr = req->slider; | |
--req->slider; | |
seen_newline = 0; | |
}else{ | |
if(IS_NEWLINE) | |
seen_newline = CURR_CHAR; | |
CURR_CHAR = 0; | |
} | |
break; | |
default: | |
return req->state = http_state_parse_error; | |
} | |
} | |
return req->state; | |
} | |
static http_request_state_t http_request_parse_query_string(char *str, st_table *table, ssize_t len){ | |
char *key = str, *value = NULL; | |
if(len < 0) | |
len = strlen(str) + 1; | |
for(; len; ++str, --len){ | |
switch(*str){ | |
case '=': | |
if(!key) | |
return http_state_parse_error; | |
value = str + 1; | |
*str = 0; | |
break; | |
case '&': | |
*str = 0; | |
if(http_request_uri_decode(key) || http_request_uri_decode(value)) | |
return http_state_parse_error; | |
printf("%s = %s\n", key, value); | |
value = NULL; | |
key = str + 1; | |
break; | |
} | |
} | |
if(key && value){ | |
str[-1] = 0; | |
printf("%s = %s\n", key, value); | |
} | |
return http_state_done; | |
} | |
http_request_state_t http_request_parse_args(http_request_t *req){ | |
if(req->state != http_state_done) | |
return req->state; | |
if(req->path){ | |
if(!(req->path_unescaped = strdup(req->path))) | |
return req->state = http_state_allocation_error; | |
if(http_request_uri_decode(req->path_unescaped)) | |
return req->state = http_state_parse_error; | |
} | |
if(req->body){ | |
if(!(req->body_unescaped = malloc(req->content_length))) | |
return req->state = http_state_allocation_error; | |
memcpy(req->body_unescaped, req->body, req->content_length); | |
if((req->state = http_request_parse_query_string(req->body_unescaped, NULL, req->content_length)) != http_state_done) | |
return req->state; | |
} | |
if(req->query){ | |
if(!(req->query_unescaped = strdup(req->query))) | |
return req->state = http_state_allocation_error; | |
if((req->state = http_request_parse_query_string(req->query_unescaped, NULL, -1)) != http_state_done) | |
return req->state; | |
} | |
return http_state_done; | |
} | |
char* read_file(const char *path, size_t *len){ | |
FILE *fp; | |
char *ret; | |
size_t tmp; | |
struct stat st; | |
if(stat(path, &st)) | |
return NULL; | |
if(!(fp = fopen(path, "rb"))) | |
return NULL; | |
if(!(ret = malloc(st.st_size + 1))){ | |
fclose(fp); | |
return NULL; | |
} | |
tmp = fread(ret, 1, st.st_size, fp); | |
if(tmp < 0){ | |
fclose(fp); | |
free(ret); | |
return NULL; | |
} | |
ret[tmp] = 0; | |
if(len) | |
*len = tmp; | |
fclose(fp); | |
return ret; | |
} | |
int main(int argc,char *argv[]){ | |
char *req_str; | |
int st; | |
size_t len, b; | |
http_request_t *req; | |
if(!(req_str = read_file("req", &len))){ | |
perror("reading file failed"); | |
return 1; | |
} | |
if(!(req = http_request_create(1024, 1024, NULL))){ | |
perror("malloc"); | |
free(req_str); | |
return 1; | |
} | |
//for(b = 0; b < len; ++b){ | |
if((st = http_request_parse_chunk(req, req_str, len)) < 0){ | |
printf("parser faled: %d\n", st); | |
//break; | |
} | |
//} | |
printf("version: %d.%d\nmethod: [%d]\npath: [%s]\nquery: [%s]\ncontent-length: %d\nbody: [%.*s]\n", req->http_major, req->http_minor, req->method, req->path, req->query, req->content_length, req->content_length, req->body); | |
http_request_parse_args(req); | |
free(req_str); | |
http_request_free(req); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment