Skip to content

Instantly share code, notes, and snippets.

@ammarfaizi2
Last active June 5, 2025 22:33
Show Gist options
  • Save ammarfaizi2/47092ef4234188752512b65fa530886d to your computer and use it in GitHub Desktop.
Save ammarfaizi2/47092ef4234188752512b65fa530886d to your computer and use it in GitHub Desktop.
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2025 Ammar Faizi <[email protected]>
*
* http_logger.c - HTTP logger via LD_PRELOAD.
*
* gcc -Wall -Wextra -fpic -fPIC -Os http_logger.c -o /tmp/http_logger.so;
* export LD_PRELOAD=/tmp/http_logger.so;
* export GWNET_HTTP_LOG_FILE=/tmp/http.log;
* bash -c 'for i in {1..1000000}; do printf "POST /aaa HTTP/1.1\r\nHost: test.local\r\nTransfer-Encoding: chunked\r\n\r\n5\r\nabcde\r\n0\r\n\r\n"; done | nc 127.0.0.1 8080';
* curl -v http://google.com;
* firefox;
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <errno.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/syscall.h>
enum {
GWNET_HTTP_HDR_TYPE_REQ = 0,
GWNET_HTTP_HDR_TYPE_RES = 1,
};
enum {
GWNET_HTTP_HDR_PARSE_ST_INIT = 0,
GWNET_HTTP_HDR_PARSE_ST_FIRST_LINE = 1,
GWNET_HTTP_HDR_PARSE_ST_FIELDS = 2,
GWNET_HTTP_HDR_PARSE_ST_DONE = 3,
};
enum {
GWNET_HTTP_HDR_ERR_NONE = 0,
GWNET_HTTP_HDR_ERR_INCOMPLETE = 1,
GWNET_HTTP_HDR_ERR_MALFORMED = 2,
GWNET_HTTP_HDR_ERR_TOO_LONG = 3,
GWNET_HTTP_HDR_ERR_INTERNAL = 100,
};
enum {
GWNET_HTTP_VER_UNKNOWN = 0,
GWNET_HTTP_VER_1_0 = 1,
GWNET_HTTP_VER_1_1 = 2,
};
enum {
GWNET_HTTP_METHOD_UNKNOWN = 0,
GWNET_HTTP_METHOD_GET = 1,
GWNET_HTTP_METHOD_POST = 2,
GWNET_HTTP_METHOD_PUT = 3,
GWNET_HTTP_METHOD_DELETE = 4,
GWNET_HTTP_METHOD_HEAD = 5,
GWNET_HTTP_METHOD_OPTIONS = 6,
GWNET_HTTP_METHOD_PATCH = 7,
GWNET_HTTP_METHOD_TRACE = 8,
GWNET_HTTP_METHOD_CONNECT = 9,
};
struct gwnet_http_hdr_field {
char *key;
char *val;
};
struct gwnet_http_hdr_fields {
struct gwnet_http_hdr_field *ff;
size_t nr;
};
struct gwnet_http_req_hdr {
uint8_t method;
uint8_t version;
char *uri;
char *qs;
struct gwnet_http_hdr_fields fields;
};
struct gwnet_http_res_hdr {
uint8_t version;
uint16_t code;
char *reason;
struct gwnet_http_hdr_fields fields;
};
struct gwnet_http_hdr_pctx {
/*
* Internally used to track the state of the parsing
* operation.
*/
uint8_t state;
/*
* Set by the parser to indicate the error reason
* if the parsing operation fails. The caller may
* check this field if the parser returns a negative
* value.
*/
uint8_t err;
/*
* Set by the caller to provide the buffer to be parsed.
*/
const char *buf;
/*
* Set by the caller to provide the length of the buffer
* to be parsed.
*/
uint64_t len;
/*
* Initially set to zero; it will be updated with the number
* of bytes that have been successfully parsed from the buffer.
* Partially parsed headers will return -EAGAIN and advance this
* offset.
*
* The caller must reset this to zero before continuing the
* parsing operation. In that case, the buffer must be
* advanced to the next unparsed byte.
*/
uint64_t off;
/*
* Total length of the header section being parsed. It is
* accumulated from the first line and all header fields.
* This is internally used by the parser to determine if the
* total length of the header section exceeds the maximum
* length.
*/
uint64_t tot_len;
/*
* Filled by the caller to limit the maximum length of the
* request or response header. If set to 0, the default
* max length will be used.
*/
uint64_t max_len;
};
enum {
GWNET_HTTP_BODY_ERR_NONE = 0,
GWNET_HTTP_BODY_ERR_INCOMPLETE = 1,
GWNET_HTTP_BODY_ERR_MALFORMED = 2,
GWNET_HTTP_BODY_ERR_TOO_LONG = 3,
GWNET_HTTP_BODY_ERR_DST_TOO_SMALL = 4,
GWNET_HTTP_BODY_ERR_INTERNAL = 100,
};
enum {
GWNET_HTTP_BODY_PARSE_ST_INIT = 0,
GWNET_HTTP_BODY_PARSE_ST_CHK_LEN = 1,
GWNET_HTTP_BODY_PARSE_ST_CHK_DATA = 2,
GWNET_HTTP_BODY_PARSE_ST_CHK_TR = 3,
GWNET_HTTP_BODY_PARSE_ST_CHK_DONE = 4,
};
struct gwnet_http_body_pctx {
/*
* Internally used to track the state of the parsing
* operation.
*/
uint8_t state;
/*
* Set by the parser to indicate the error reason
* if the parsing operation fails. The caller may
* check this field if the parser returns a negative
* value.
*/
uint8_t err;
/*
* Used internally to indicate if a zero-length chunk
* has been found during chunked transfer encoding
* parsing.
*/
bool found_zero_len;
/*
* Set by the caller to provide the buffer to be parsed.
*/
const char *buf;
/*
* Set by the caller to provide the length of the buffer
* to be parsed.
*/
uint64_t len;
/*
* Initially set to zero; it will be updated with the number
* of bytes that have been successfully parsed from the buffer.
* Partially parsed headers will return -EAGAIN and advance this
* offset.
*
* The caller must reset this to zero before continuing the
* parsing operation. In that case, the buffer must be
* advanced to the next unparsed byte.
*/
uint64_t off;
/*
* Tracks the number of bytes remaining to be parsed in the
* current chunk. Used for chunked transfer encoding to indicate
* how many bytes are left in the current chunk.
*/
uint64_t rem_len;
/*
* Total length of all chunk data parsed so far.
*/
uint64_t tot_len;
/*
* Total accumulated length of all parsed bytes, including
* chunk size lines, chunk extensions, chunk data, trailing
* CRLFs, and the final zero-length chunk.
*
* The @max_len field is checked against this value to ensure
* the total body length does not exceed the allowed maximum.
* If it does, the parser returns -E2BIG.
*/
uint64_t tot_len_raw;
/*
* Filled by the caller to limit the maximum length of the
* body. If set to 0, the default max length will be used.
* It will be checked against @tot_len_raw.
*/
uint64_t max_len;
};
/**
* Initialize the HTTP header parsing context.
*
* Prepare the given gwnet_http_hdr_pctx structure for use in HTTP
* header parsing operations.
*
* @param ctx Pointer to a gwnet_http_hdr_pctx structure to initialize.
* Must not be NULL.
* @return 0 on success, or a negative value on failure.
*/
int gwnet_http_hdr_pctx_init(struct gwnet_http_hdr_pctx *ctx);
/**
* Free resources associated with the HTTP header parsing context.
*
* @param ctx Pointer to a gwnet_http_hdr_pctx structure to free.
* Must not be NULL.
*/
void gwnet_http_hdr_pctx_free(struct gwnet_http_hdr_pctx *ctx);
/**
* Parse an HTTP request header from the given parsing context.
*
* @param ctx Pointer to the HTTP header parsing context.
* @param hdr Pointer to the structure where the parsed HTTP request
* header will be stored.
* @return 0 on success,
* -EAGAIN if more data is needed,
* -EINVAL if the request line is malformed,
* -ENOMEM if memory allocation fails,
* -E2BIG if the request line exceeds the maximum length.
*/
int gwnet_http_req_hdr_parse(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_req_hdr *hdr);
/**
* Parse an HTTP response header from the given parsing context.
*
* @param ctx Pointer to the HTTP header parsing context.
* @param hdr Pointer to the structure where the parsed HTTP response
* header will be stored.
* @return 0 on success,
* -EAGAIN if more data is needed,
* -EINVAL if the response line is malformed,
* -ENOMEM if memory allocation fails,
* -E2BIG if the response line exceeds the maximum length.
*/
int gwnet_http_res_hdr_parse(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_res_hdr *hdr);
void gwnet_http_req_hdr_free(struct gwnet_http_req_hdr *hdr);
void gwnet_http_res_hdr_free(struct gwnet_http_res_hdr *hdr);
/**
* Free all memory associated with the given HTTP header fields
* structure.
*
* @param ff Pointer to the HTTP header fields structure to free.
*/
void gwnet_http_hdr_fields_free(struct gwnet_http_hdr_fields *ff);
/**
* Add a header field with the specified key and value to the HTTP
* header fields structure.
*
* @param ff Pointer to the HTTP header fields structure.
* @param k Null-terminated string containing the header key.
* @param v Null-terminated string containing the header value.
* @return 0 on success, or a negative value on error.
*/
int gwnet_http_hdr_fields_add(struct gwnet_http_hdr_fields *ff, const char *k,
const char *v);
/**
* Add a header field with the specified key and a formatted value to
* the HTTP header fields structure.
*
* @param ff Pointer to the HTTP header fields structure.
* @param k Null-terminated string containing the header key.
* @param fmt printf-style format string for the header value.
* @param ... Arguments for the format string.
* @return 0 on success, or a negative value on error.
*/
__attribute__((__format__(printf, 3, 4)))
int gwnet_http_hdr_fields_addf(struct gwnet_http_hdr_fields *ff,
const char *k, const char *fmt, ...);
/**
* Add a header field with the specified key and value, using explicit
* lengths for both key and value.
*
* @param ff Pointer to the HTTP header fields structure.
* @param k Pointer to the header key.
* @param klen Length of the header key.
* @param v Pointer to the header value.
* @param vlen Length of the header value.
* @return 0 on success, or a negative value on error.
*/
int gwnet_http_hdr_fields_addl(struct gwnet_http_hdr_fields *ff,
const char *k, size_t klen, const char *v,
size_t vlen);
/**
* Retrieve the value of a header field by its key from the HTTP
* header fields structure.
*
* @param ff Pointer to the HTTP header fields structure.
* @param k Null-terminated string containing the header key.
* @return Pointer to the header value, or NULL if not found.
*/
const char *gwnet_http_hdr_fields_get(const struct gwnet_http_hdr_fields *ff,
const char *k);
/**
* Retrieve the value of a header field by its key, using an explicit
* key length, from the HTTP header fields structure.
*
* @param ff Pointer to the HTTP header fields structure.
* @param k Pointer to the header key.
* @param klen Length of the header key.
* @return Pointer to the header value, or NULL if not found.
*/
const char *gwnet_http_hdr_fields_getl(const struct gwnet_http_hdr_fields *ff,
const char *k, size_t klen);
/**
* Initialize the HTTP body processing context.
*
* Set up the provided gwnet_http_body_pctx structure for use in HTTP
* body processing. It should be called before any operations are
* performed on the context.
*
* @param ctx Pointer to a gwnet_http_body_pctx structure to be
* initialized. Must not be NULL.
*
* @return 0 on success, or a negative error code on failure.
*/
int gwnet_http_body_pctx_init(struct gwnet_http_body_pctx *ctx);
/**
* Parses an HTTP body encoded with chunked transfer encoding.
*
* @param ctx Pointer to the HTTP body parsing context structure.
* @param dst Buffer where the parsed body data will be written.
* @param dst_len Length of the destination buffer in bytes.
*
* @return 0 on success,
* -EAGAIN if more data is needed,
* -EINVAL if the chunked body is malformed,
* -ENOBUFS if the destination buffer is not large enough,
* -E2BIG if the total length of the body exceeds the maximum.
*/
int gwnet_http_body_parse_chunked(struct gwnet_http_body_pctx *ctx,
char *dst, size_t dst_len);
static inline size_t min_st(size_t a, size_t b)
{
return (a < b) ? a : b;
}
/**
* Checks if a character is a valid 'tchar' as defined in RFC 7230,
* Section 3.2.6.
*
* Reference: https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6
*
* A 'tchar' is any visible (VCHAR) character except delimiters,
* specifically: "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-"
* / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA.
*
* @param c The character to check, represented as an int.
* @return 1 if the character is a valid 'tchar', 0 otherwise.
*/
static inline int is_tchar(int c)
{
/* Digits. */
if (c >= '0' && c <= '9')
return 1;
/* Uppercase */
if (c >= 'A' && c <= 'Z')
return 1;
/* Lowercase */
if (c >= 'a' && c <= 'z')
return 1;
/* The 15 extra symbols from the spec */
switch (c) {
case '!': case '#': case '$': case '%': case '&':
case '\'': case '*': case '+': case '-': case '.':
case '^': case '_': case '`': case '|': case '~':
return 1;
default:
return 0;
}
}
static inline int is_space(int c)
{
return (c == ' ' || c == '\t');
}
static inline int is_vchar(int c)
{
/* Visible characters are from 0x20 to 0x7E, inclusive. */
if (c >= 0x20 && c <= 0x7E)
return 1;
/* DEL character is not a visible character. */
if (c == 0x7F)
return 0;
return 0;
}
/**
* Check if the given HTTP header field key is one of the standard
* headers that are allowed to appear multiple times in a message and
* should be merged into a single comma-separated header value
* according to the HTTP specification.
*
* @param key The header field name to check (case-insensitive).
* @param n The length of the header field name.
* @return true if the header is allowed to be provided multiple
* times and should be merged; false otherwise.
*/
static bool is_field_allowed_to_be_duplicate(const char *key, size_t n)
{
static const char *comma_separated_list_headers[] = {
/* RFC 7231, Section 5.3.2 */
"Accept",
/* RFC 7231, Section 5.3.3 */
"Accept-Charset",
/* RFC 7231, Section 5.3.4 */
"Accept-Encoding",
/* RFC 7231, Section 5.3.5 */
"Accept-Language",
/* RFC 7233, Section 2.3 */
"Accept-Ranges",
/* RFC 7231, Section 7.4.1 */
"Allow",
/* RFC 7234, Section 5.2 */
"Cache-Control",
/* RFC 7230, Section 6.1 */
"Connection",
/* RFC 7232, Section 3.1 */
"If-Match",
/* RFC 7232, Section 3.2 */
"If-None-Match",
/* RFC 7233, Section 3.1 */
"Range",
/* RFC 7234, Section 5.4 */
"Pragma",
/* RFC 7235, Section 4.3 */
"Proxy-Authenticate",
/* RFC 7230, Section 4.3 */
"TE",
/* RFC 7230, Section 4.4 */
"Trailer",
/* RFC 7230, Section 3.3.1 */
"Transfer-Encoding",
/* RFC 7230, Section 6.7 */
"Upgrade",
/* RFC 7231, Section 7.1.4 */
"Vary",
/* RFC 7230, Section 5.7.1 */
"Via",
/* RFC 7234, Section 5.5 */
"Warning",
/* RFC 7235, Section 4.1 */
"WWW-Authenticate",
NULL
};
const char **p;
for (p = comma_separated_list_headers; *p; p++) {
const char *hdr = *p;
size_t hdr_len = strlen(hdr);
if (n == hdr_len && !strncasecmp(key, hdr, n))
return true;
}
return false;
}
/**
* Parse the first line of an HTTP/1.x request header, also known as
* the request line. According to RFC 9112, Section 3.1.1, the request
* line consists of the method, request-target, and HTTP version,
* separated by spaces and terminated by CRLF.
*
* Example:
* GET /index.html HTTP/1.1\r\n
*
* It extracts and validates these components from the provided parsing
* context and stores them in the request header structure.
*
* Reference:
* RFC 9112, Section 3:
* https://datatracker.ietf.org/doc/html/rfc9112#section-3
*
* @param ctx Pointer to the HTTP header parsing context.
* @param hdr Pointer to the HTTP request header structure to populate.
* @return 0 on success,
* -EAGAIN if more data is needed,
* -EINVAL if the request line is malformed,
* -ENOMEM if memory allocation fails,
* -E2BIG if the request line exceeds the maximum length.
*/
static int parse_hdr_req_first_line(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_req_hdr *hdr)
{
struct method_entry {
const char str[9];
uint8_t len;
uint8_t code;
};
static const struct method_entry methods[] = {
{ "GET", 3, GWNET_HTTP_METHOD_GET },
{ "POST", 4, GWNET_HTTP_METHOD_POST },
{ "PUT", 3, GWNET_HTTP_METHOD_PUT },
{ "DELETE", 6, GWNET_HTTP_METHOD_DELETE },
{ "HEAD", 4, GWNET_HTTP_METHOD_HEAD },
{ "OPTIONS", 7, GWNET_HTTP_METHOD_OPTIONS },
{ "PATCH", 5, GWNET_HTTP_METHOD_PATCH },
{ "TRACE", 5, GWNET_HTTP_METHOD_TRACE },
{ "CONNECT", 7, GWNET_HTTP_METHOD_CONNECT }
};
static const size_t nr_methods = sizeof(methods) / sizeof(methods[0]);
size_t i, cmpl, reml, off = 0, len = ctx->len - ctx->off;
const char *uri, *qs, *buf = &ctx->buf[ctx->off];
uint8_t method_code, version_code;
uint32_t uri_len, qs_len;
if (!len)
return -EAGAIN;
method_code = GWNET_HTTP_METHOD_UNKNOWN;
for (i = 0; i < nr_methods; i++) {
const struct method_entry *me = &methods[i];
size_t mlen = me->len;
cmpl = min_st(len, mlen);
if (memcmp(buf, me->str, cmpl))
continue;
if (cmpl < mlen)
return -EAGAIN;
method_code = me->code;
off += mlen;
break;
}
if (method_code == GWNET_HTTP_METHOD_UNKNOWN)
return -EINVAL;
if (off >= len)
return -EAGAIN;
/*
* After the method, there must be a space.
*/
if (!is_space(buf[off]))
return -EINVAL;
/*
* Keep going until we find a non-space character.
*/
while (is_space(buf[off])) {
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
/*
* Per RFC 7230, Section 5.3.1:
* When making a request directly to an origin server,
* other than a CONNECT or server-wide OPTIONS request
* a client MUST send only the absolute path and query
* components of the target URI as the request-target.
* If the target URI's path component is empty, the
* client MUST send "/" as the path within the
* origin-form of request-target.
*/
if (method_code != GWNET_HTTP_METHOD_CONNECT &&
method_code != GWNET_HTTP_METHOD_OPTIONS) {
if (buf[off] != '/')
return -EINVAL;
} else {
if (!is_vchar(buf[off]))
return -EINVAL;
}
uri = &buf[off];
qs = NULL;
uri_len = 0;
qs_len = 0;
/*
* Keep going until we find a space character.
*/
while (1) {
char c = buf[off++];
if (off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
if (is_space(c))
break;
if (!is_vchar(c))
return -EINVAL;
uri_len++;
if (qs)
qs_len++;
/*
* If we find a question mark, start assigning the
* the query string.
*/
if (c == '?')
qs = &buf[off];
}
/*
* Keep going until we find a non-space character.
*/
while (is_space(buf[off])) {
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
/*
* Parse the HTTP version. Only support HTTP/1.0 and HTTP/1.1.
*/
reml = len - off;
cmpl = min_st(reml, 7);
if (memcmp(&buf[off], "HTTP/1.", cmpl))
return -EINVAL;
if (cmpl < 7)
return -EAGAIN;
off += 7;
if (off >= len)
return -EAGAIN;
/*
* Check exceeding length upfront, we know that
* the HTTP version is always 8 characters long
* followed by a CRLF (or just LF).
*/
if (ctx->tot_len + off + 1 + 1 >= ctx->max_len)
return -E2BIG;
switch (buf[off]) {
case '0':
version_code = GWNET_HTTP_VER_1_0;
break;
case '1':
version_code = GWNET_HTTP_VER_1_1;
break;
default:
return -EINVAL;
}
if (++off >= len)
return -EAGAIN;
/*
* After the HTTP version, expect a CRLF. But the CR
* is optional, so we can also accept just LF.
*/
if (buf[off] == '\r') {
if (++off >= len)
return -EAGAIN;
}
if (buf[off] != '\n')
return -EINVAL;
++off;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
hdr->uri = malloc(uri_len + 1);
if (!hdr->uri)
return -ENOMEM;
if (qs_len) {
hdr->qs = malloc(qs_len + 1);
if (!hdr->qs) {
free(hdr->uri);
hdr->uri = NULL;
return -ENOMEM;
}
memcpy(hdr->qs, qs, qs_len);
hdr->qs[qs_len] = '\0';
}
memcpy(hdr->uri, uri, uri_len);
hdr->uri[uri_len] = '\0';
hdr->method = method_code;
hdr->version = version_code;
ctx->off += off;
ctx->tot_len += off;
return 0;
}
/**
* Parse the first line of an HTTP/1.x response header, also known as
* the status line. According to RFC 7230 Section 3.1.2, the status
* line is formatted as: HTTP-version SP status-code SP reason-phrase
* CRLF.
*
* Example:
* HTTP/1.1 200 OK\r\n
*
* It extracts the HTTP version, status code, and reason phrase from
* the response header's first line and populates the provided
* response header structure.
*
* Reference:
* RFC 7230, Section 3.1.2:
* https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2
*
* @param ctx Pointer to the HTTP header parsing context.
* @param hdr Pointer to the HTTP response header structure to populate.
* @return 0 on success,
* -EAGAIN if more data is needed,
* -EINVAL if the first line is malformed,
* -ENOMEM if memory allocation fails,
* -E2BIG if the first line exceeds the maximum length.
*/
static int parse_hdr_res_first_line(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_res_hdr *hdr)
{
size_t off = 0, len = ctx->len - ctx->off, cmpl, reml, i;
const char *reason, *buf = &ctx->buf[ctx->off], *p;
uint8_t version_code;
uint32_t reason_len;
char rcode[3];
uint16_t code;
if (!len)
return -EAGAIN;
/*
* Parse the HTTP version. Only support HTTP/1.0 and HTTP/1.1.
*/
reml = len - off;
cmpl = min_st(reml, 7);
if (memcmp(buf, "HTTP/1.", cmpl))
return -EINVAL;
if (cmpl < 7)
return -EAGAIN;
off += 7;
if (off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
switch (buf[off]) {
case '0':
version_code = GWNET_HTTP_VER_1_0;
break;
case '1':
version_code = GWNET_HTTP_VER_1_1;
break;
default:
return -EINVAL;
}
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
/*
* After the HTTP version, there must be a space.
*/
if (!is_space(buf[off]))
return -EINVAL;
/*
* Keep going until we find a non-space character.
*/
while (is_space(buf[off])) {
if (++off >= len)
return -EAGAIN;
}
/*
* Parse the HTTP response code. It must be a 3-digit number
* between 100 and 599, inclusive.
*/
rcode[0] = buf[off++];
if (rcode[0] < '1' || rcode[0] > '5')
return -EINVAL;
if (off >= len)
return -EAGAIN;
for (i = 1; i <= 2; i++) {
rcode[i] = buf[off++];
if (rcode[i] < '0' || rcode[i] > '9')
return -EINVAL;
if (off >= len)
return -EAGAIN;
}
code = (rcode[0] - '0') * 100 +
(rcode[1] - '0') * 10 +
(rcode[2] - '0');
/*
* After the response code, there must be a space.
*/
if (!is_space(buf[off]))
return -EINVAL;
/*
* Keep going until we find a non-space character.
*/
while (is_space(buf[off])) {
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
/*
* After the space, there may be a reason phrase.
* The reason phrase is optional, if it exists,
* it must only contain vchar or space chars.
*
* It ends with a CRLF, but the CR is optional.
*/
reason = &buf[off];
reason_len = 0;
while (1) {
char c = buf[off];
if (c == '\r' || c == '\n')
break;
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
if (!is_vchar(c) && !is_space(c))
return -EINVAL;
reason_len++;
}
if (buf[off] == '\r') {
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
if (buf[off] != '\n')
return -EINVAL;
++off;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
if (reason_len) {
/*
* Trim the trailing whitespaces from
* the reason phrase.
*/
p = &reason[reason_len - 1];
while (p >= reason && is_space(*p)) {
--reason_len;
--p;
}
}
hdr->reason = malloc(reason_len + 1);
if (!hdr->reason)
return -ENOMEM;
memcpy(hdr->reason, reason, reason_len);
hdr->reason[reason_len] = '\0';
hdr->version = version_code;
hdr->code = code;
ctx->off += off;
ctx->tot_len += off;
return 0;
}
/**
* Parse HTTP header fields from the provided parsing context.
*
* According to RFC 7230, Section 3.2: "Each header field consists of a
* case-insensitive field name followed by a colon (":"), optional
* whitespace, and the field value."
*
* Reference: RFC 7230, Section 3.2 - Header Fields
* https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
*
* @param ctx Pointer to the HTTP header parsing context.
* @param ff Pointer to the structure where parsed header fields will be
* stored.
* @return 0 on success,
* -EAGAIN if more data is needed,
* -EINVAL if the header fields are malformed,
* -ENOMEM if memory allocation fails,
* -E2BIG if the header fields exceed the maximum length.
*/
static int parse_hdr_fields(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_hdr_fields *ff)
{
size_t off = 0, len = ctx->len - ctx->off;
const char *buf = &ctx->buf[ctx->off];
int r;
if (!len)
return -EAGAIN;
while (1) {
const char *k, *v, *p;
uint32_t kl, vl;
if (buf[off] == '\r') {
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
if (buf[off] == '\n') {
++off;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
ctx->off += off;
break;
}
/*
* Parse the key. The key must only contain tchar
* characters, and must end with a colon.
*
* After the colon, there may be a space, but it is
* optional. If it exists, it must be followed by
* a vchar or space characters.
*
* The value may contain trailing space characters,
* they must be trimmed.
*
* The value may be empty, but the key must not.
*/
k = &buf[off];
kl = 0;
while (1) {
if (off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
if (buf[off] == ':')
break;
if (!is_tchar(buf[off]))
return -EINVAL;
kl++;
off++;
}
if (!kl)
return -EINVAL;
if (++off >= len)
return -EAGAIN;
/*
* Keep going until we find a non-space character.
*/
while (is_space(buf[off])) {
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
v = &buf[off];
vl = 0;
while (1) {
char c = buf[off];
if (c == '\r' || c == '\n')
break;
if (!is_vchar(c) && !is_space(c))
return -EINVAL;
vl++;
off++;
if (off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
if (buf[off] == '\r') {
if (++off >= len)
return -EAGAIN;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
}
if (buf[off] != '\n')
return -EINVAL;
++off;
if (ctx->tot_len + off >= ctx->max_len)
return -E2BIG;
if (vl) {
/*
* Trim trailing whitespaces from the value.
*/
p = &v[vl - 1];
while (p >= v && is_space(*p)) {
--vl;
--p;
}
}
r = gwnet_http_hdr_fields_addl(ff, k, kl, v, vl);
if (r)
return (r < 0) ? r : -EINVAL;
ctx->tot_len += off;
ctx->off += off;
if (off >= len)
return -EAGAIN;
buf = &ctx->buf[ctx->off];
len = ctx->len - ctx->off;
off = 0;
}
return 0;
}
int gwnet_http_hdr_pctx_init(struct gwnet_http_hdr_pctx *ctx)
{
memset(ctx, 0, sizeof(*ctx));
ctx->state = GWNET_HTTP_HDR_PARSE_ST_INIT;
return 0;
}
void gwnet_http_hdr_pctx_free(struct gwnet_http_hdr_pctx *ctx)
{
memset(ctx, 0, sizeof(*ctx));
}
static void prepare_parser(struct gwnet_http_hdr_pctx *ctx)
{
ctx->tot_len = 0;
if (!ctx->max_len)
ctx->max_len = (1024ull*16ull) + 1ull;
else
ctx->max_len += 1ull;
}
static int __gwnet_http_req_hdr_parse(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_req_hdr *hdr)
{
int r = 0;
if (ctx->state == GWNET_HTTP_HDR_PARSE_ST_INIT) {
ctx->state = GWNET_HTTP_HDR_PARSE_ST_FIRST_LINE;
memset(hdr, 0, sizeof(*hdr));
prepare_parser(ctx);
}
if (ctx->state == GWNET_HTTP_HDR_PARSE_ST_FIRST_LINE) {
r = parse_hdr_req_first_line(ctx, hdr);
if (r)
return r;
ctx->state = GWNET_HTTP_HDR_PARSE_ST_FIELDS;
}
if (ctx->state == GWNET_HTTP_HDR_PARSE_ST_FIELDS) {
r = parse_hdr_fields(ctx, &hdr->fields);
if (r)
return r;
ctx->state = GWNET_HTTP_HDR_PARSE_ST_DONE;
}
return r;
}
static int __gwnet_http_res_hdr_parse(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_res_hdr *hdr)
{
int r = 0;
if (ctx->state == GWNET_HTTP_HDR_PARSE_ST_INIT) {
ctx->state = GWNET_HTTP_HDR_PARSE_ST_FIRST_LINE;
memset(hdr, 0, sizeof(*hdr));
prepare_parser(ctx);
}
if (ctx->state == GWNET_HTTP_HDR_PARSE_ST_FIRST_LINE) {
r = parse_hdr_res_first_line(ctx, hdr);
if (r)
return r;
ctx->state = GWNET_HTTP_HDR_PARSE_ST_FIELDS;
}
if (ctx->state == GWNET_HTTP_HDR_PARSE_ST_FIELDS) {
r = parse_hdr_fields(ctx, &hdr->fields);
if (r)
return r;
ctx->state = GWNET_HTTP_HDR_PARSE_ST_DONE;
}
return r;
}
static int hdr_translate_ret_err(struct gwnet_http_hdr_pctx *ctx, int r)
{
switch (r) {
case 0:
ctx->err = GWNET_HTTP_HDR_ERR_NONE;
break;
case -EAGAIN:
ctx->err = GWNET_HTTP_HDR_ERR_INCOMPLETE;
break;
case -EINVAL:
ctx->err = GWNET_HTTP_HDR_ERR_MALFORMED;
break;
case -E2BIG:
ctx->err = GWNET_HTTP_HDR_ERR_TOO_LONG;
break;
default:
ctx->err = GWNET_HTTP_HDR_ERR_INTERNAL;
break;
}
return r;
}
int gwnet_http_req_hdr_parse(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_req_hdr *hdr)
{
return hdr_translate_ret_err(ctx, __gwnet_http_req_hdr_parse(ctx, hdr));
}
int gwnet_http_res_hdr_parse(struct gwnet_http_hdr_pctx *ctx,
struct gwnet_http_res_hdr *hdr)
{
return hdr_translate_ret_err(ctx, __gwnet_http_res_hdr_parse(ctx, hdr));
}
void gwnet_http_req_hdr_free(struct gwnet_http_req_hdr *hdr)
{
if (!hdr)
return;
free(hdr->uri);
free(hdr->qs);
gwnet_http_hdr_fields_free(&hdr->fields);
memset(hdr, 0, sizeof(*hdr));
}
void gwnet_http_res_hdr_free(struct gwnet_http_res_hdr *hdr)
{
if (!hdr)
return;
free(hdr->reason);
gwnet_http_hdr_fields_free(&hdr->fields);
memset(hdr, 0, sizeof(*hdr));
}
void gwnet_http_hdr_fields_free(struct gwnet_http_hdr_fields *ff)
{
size_t i;
if (!ff)
return;
for (i = 0; i < ff->nr; i++) {
free(ff->ff[i].key);
free(ff->ff[i].val);
}
free(ff->ff);
memset(ff, 0, sizeof(*ff));
}
int gwnet_http_hdr_fields_add(struct gwnet_http_hdr_fields *ff, const char *k,
const char *v)
{
return gwnet_http_hdr_fields_addl(ff, k, strlen(k), v, strlen(v));
}
int gwnet_http_hdr_fields_addf(struct gwnet_http_hdr_fields *ff,
const char *k, const char *fmt, ...)
{
va_list args1, args2;
size_t vlen;
char *v;
int r;
va_start(args1, fmt);
va_copy(args2, args1);
r = vsnprintf(NULL, 0, fmt, args1);
va_end(args1);
v = malloc(r + 1);
if (!v) {
r = -ENOMEM;
goto out;
}
vlen = (size_t)r;
vsnprintf(v, vlen + 1, fmt, args2);
r = gwnet_http_hdr_fields_addl(ff, k, strlen(k), v, vlen);
free(v);
out:
va_end(args2);
return r;
}
static ssize_t find_hdr_field_idx(const struct gwnet_http_hdr_fields *ff,
const char *k, size_t klen)
{
size_t i;
for (i = 0; i < ff->nr; i++) {
struct gwnet_http_hdr_field *f = &ff->ff[i];
if (!strncasecmp(f->key, k, klen)) {
if (strlen(f->key) == klen)
return i;
}
}
return -ENOENT;
}
int gwnet_http_hdr_fields_addl(struct gwnet_http_hdr_fields *ff,
const char *k, size_t klen,
const char *v, size_t vlen)
{
ssize_t idx = find_hdr_field_idx(ff, k, klen);
struct gwnet_http_hdr_field *f;
char *new_val;
if (idx < 0) {
struct gwnet_http_hdr_field *new_fields;
size_t new_size;
char *kc, *vc;
kc = malloc(klen + 1);
if (!kc)
return -ENOMEM;
vc = malloc(vlen + 1);
if (!vc) {
free(kc);
return -ENOMEM;
}
new_size = (ff->nr + 1) * sizeof(*ff->ff);
new_fields = realloc(ff->ff, new_size);
if (!new_fields) {
free(kc);
free(vc);
return -ENOMEM;
}
memcpy(kc, k, klen);
memcpy(vc, v, vlen);
kc[klen] = '\0';
vc[vlen] = '\0';
ff->ff = new_fields;
f = &ff->ff[ff->nr++];
f->key = kc;
f->val = vc;
return 0;
}
f = &ff->ff[idx];
if (is_field_allowed_to_be_duplicate(k, klen)) {
size_t cur_len, new_val_len;
if (!vlen)
return 0;
cur_len = strlen(f->val);
new_val_len = cur_len + vlen + 3;
new_val = realloc(f->val, new_val_len);
if (!new_val)
return -ENOMEM;
if (!cur_len) {
memcpy(new_val, v, vlen);
new_val[vlen] = '\0';
} else {
memcpy(&new_val[cur_len], ", ", 2);
memcpy(&new_val[cur_len + 2], v, vlen);
new_val[cur_len + 2 + vlen] = '\0';
}
f->val = new_val;
return 0;
} else {
new_val = realloc(f->val, vlen + 1);
if (!new_val)
return -ENOMEM;
memcpy(new_val, v, vlen);
new_val[vlen] = '\0';
f->val = new_val;
return EEXIST;
}
}
const char *gwnet_http_hdr_fields_get(const struct gwnet_http_hdr_fields *ff,
const char *k)
{
return gwnet_http_hdr_fields_getl(ff, k, strlen(k));
}
const char *gwnet_http_hdr_fields_getl(const struct gwnet_http_hdr_fields *ff,
const char *k, size_t klen)
{
ssize_t idx = find_hdr_field_idx(ff, k, klen);
if (idx < 0)
return NULL;
return ff->ff[idx].val;
}
int gwnet_http_body_pctx_init(struct gwnet_http_body_pctx *ctx)
{
memset(ctx, 0, sizeof(*ctx));
ctx->state = GWNET_HTTP_BODY_PARSE_ST_INIT;
return 0;
}
void gwnet_http_body_pctx_free(struct gwnet_http_body_pctx *ctx)
{
memset(ctx, 0, sizeof(*ctx));
}
static int is_xdigit(int c)
{
if (c >= '0' && c <= '9')
return 1;
if (c >= 'A' && c <= 'F')
return 1;
if (c >= 'a' && c <= 'f')
return 1;
return 0;
}
static int parse_chunked_len(struct gwnet_http_body_pctx *ctx)
{
size_t len = ctx->len - ctx->off, off = 0, i = 0;
const char *buf = &ctx->buf[ctx->off];
uint64_t decoded_len = 0;
char tmp_buf[17], *e, c;
if (!len)
return -EAGAIN;
while (1) {
if (off >= len)
return -EAGAIN;
if (i >= 16)
return -EINVAL;
c = buf[off];
if (!is_xdigit(c))
break;
tmp_buf[i++] = c;
off++;
}
/*
* Early exit if we haven't found any hex digits.
*/
if (!i)
return -EINVAL;
/*
* We have read the hex digits. Now, we skip any chunk extension.
* The extension is any character until we hit a CR or LF.
* We are lenient: we accept LF without CR.
*/
while (1) {
if (off >= len)
return -EAGAIN;
if (ctx->tot_len_raw + off >= ctx->max_len)
return -E2BIG;
c = buf[off++];
if (c == '\r') {
if (off >= len)
return -EAGAIN;
if (ctx->tot_len_raw + off + 1 >= ctx->max_len)
return -E2BIG;
if (buf[off++] != '\n')
return -EINVAL;
break;
} else if (c == '\n') {
break;
}
}
tmp_buf[i] = '\0';
errno = 0;
decoded_len = strtoull(tmp_buf, &e, 16);
if (errno || e == tmp_buf || *e != '\0')
return -EINVAL;
ctx->off += off;
ctx->tot_len_raw += off;
ctx->rem_len = decoded_len;
/*
* Predictive check for exceeding the maximum length.
*/
if (ctx->tot_len_raw + decoded_len >= ctx->max_len)
return -E2BIG;
if (decoded_len) {
ctx->state = GWNET_HTTP_BODY_PARSE_ST_CHK_DATA;
ctx->found_zero_len = false;
} else {
ctx->state = GWNET_HTTP_BODY_PARSE_ST_CHK_TR;
ctx->found_zero_len = true;
}
return 0;
}
static int parse_chunked_data(struct gwnet_http_body_pctx *ctx, char **dst_p,
size_t *dst_len_p)
{
size_t len = ctx->len - ctx->off, copy_len;
const char *buf = &ctx->buf[ctx->off];
if (!len)
return -EAGAIN;
copy_len = min_st(ctx->rem_len, len);
if (dst_len_p)
copy_len = min_st(copy_len, *dst_len_p);
if (!copy_len)
return -ENOBUFS;
if (ctx->tot_len_raw + copy_len >= ctx->max_len)
return -E2BIG;
if (*dst_p) {
memcpy(*dst_p, buf, copy_len);
*dst_len_p -= copy_len;
*dst_p += copy_len;
}
ctx->off += copy_len;
ctx->tot_len += copy_len;
ctx->tot_len_raw += copy_len;
ctx->rem_len -= copy_len;
if (!ctx->rem_len)
ctx->state = GWNET_HTTP_BODY_PARSE_ST_CHK_TR;
return 0;
}
static int parse_chunked_tr(struct gwnet_http_body_pctx *ctx)
{
size_t len = ctx->len - ctx->off, cmpl;
const char *buf = &ctx->buf[ctx->off];
if (!len)
return -EAGAIN;
if (ctx->tot_len_raw + 2 >= ctx->max_len)
return -E2BIG;
cmpl = min_st(len, 2);
if (memcmp(buf, "\r\n", cmpl))
return -EINVAL;
if (cmpl < 2)
return -EAGAIN;
ctx->off += 2;
ctx->tot_len_raw += 2;
if (ctx->found_zero_len)
ctx->state = GWNET_HTTP_BODY_PARSE_ST_CHK_DONE;
else
ctx->state = GWNET_HTTP_BODY_PARSE_ST_CHK_LEN;
return 0;
}
int gwnet_http_body_parse_chunked(struct gwnet_http_body_pctx *ctx,
char *dst, size_t dst_len)
{
size_t dst_len_loc = dst_len;
size_t *dst_len_p = dst_len_loc ? &dst_len_loc : NULL;
int r = 0;
if (ctx->state == GWNET_HTTP_BODY_PARSE_ST_INIT) {
ctx->state = GWNET_HTTP_BODY_PARSE_ST_CHK_LEN;
if (!ctx->max_len)
ctx->max_len = 1024ull*128ull;
else
ctx->max_len += 1ull;
ctx->tot_len = 0;
ctx->tot_len_raw = 0;
ctx->err = GWNET_HTTP_BODY_ERR_NONE;
}
while (1) {
if (ctx->state == GWNET_HTTP_BODY_PARSE_ST_CHK_LEN) {
r = parse_chunked_len(ctx);
if (r)
break;
}
if (ctx->state == GWNET_HTTP_BODY_PARSE_ST_CHK_DATA) {
r = parse_chunked_data(ctx, &dst, dst_len_p);
if (r)
break;
}
if (ctx->state == GWNET_HTTP_BODY_PARSE_ST_CHK_TR) {
r = parse_chunked_tr(ctx);
if (r)
break;
}
if (ctx->state == GWNET_HTTP_BODY_PARSE_ST_CHK_DONE) {
r = 0;
break;
}
}
switch (r) {
case 0:
ctx->err = GWNET_HTTP_BODY_ERR_NONE;
break;
case -EAGAIN:
ctx->err = GWNET_HTTP_BODY_ERR_INCOMPLETE;
break;
case -EINVAL:
ctx->err = GWNET_HTTP_BODY_ERR_MALFORMED;
break;
case -E2BIG:
/*
* The source is too long.
*/
ctx->err = GWNET_HTTP_BODY_ERR_TOO_LONG;
break;
case -ENOBUFS:
/*
* The destination buffer is too small.
*/
ctx->err = GWNET_HTTP_BODY_ERR_DST_TOO_SMALL;
break;
default:
ctx->err = GWNET_HTTP_BODY_ERR_INTERNAL;
break;
}
return r;
}
#define INITIAL_SOCK_SLOTS_CAP 16
#define MAX_HEADER_SIZE (1024ull * 16ull) /* 16 KiB */
#define MAX_BODY_SIZE (1024ull * 1024ull * 1024ull * 100ull) /* 100 GiB */
#ifdef __CHECKER__
#define __must_hold(x) __attribute__((context(x,1,1)))
#define __acquires(x) __attribute__((context(x,0,1)))
#define __releases(x) __attribute__((context(x,1,0)))
#else
#define __must_hold(x)
#define __acquires(x)
#define __releases(x)
#endif
#include <stdatomic.h>
enum {
TX_STATE_INIT = 0,
TX_STATE_HDR = 1,
TX_STATE_BODY = 2,
TX_STATE_DONE = 3,
};
enum {
RX_STATE_INIT = 0,
RX_STATE_HDR = 1,
RX_STATE_BODY = 2,
RX_STATE_DONE = 3,
};
struct buf {
uint64_t cap;
uint64_t len;
char *buf;
};
struct http_res {
bool is_chunked;
uint64_t con_len;
struct gwnet_http_res_hdr hdr;
union {
struct gwnet_http_hdr_pctx hdr_ctx;
struct gwnet_http_body_pctx body_ctx;
};
};
struct http_req {
bool is_chunked;
uint64_t con_len;
struct gwnet_http_req_hdr hdr;
union {
struct gwnet_http_hdr_pctx hdr_ctx;
struct gwnet_http_body_pctx body_ctx;
};
struct http_res res;
time_t time;
struct http_req *next;
};
struct sock {
int fd;
uint8_t tx_state;
uint8_t rx_state;
struct buf tx_buf;
struct buf rx_buf;
char addr[INET6_ADDRSTRLEN + sizeof("[]:65535")];
atomic_int_fast32_t ref_count;
/*
* Request queue for pipelined keep-alive:
* - Adding requests to the tail.
* - Consuming requests from the head.
*/
struct http_req *req_tail;
struct http_req *req_head;
};
struct sock_slots {
struct sock **slots;
uint32_t nr;
uint32_t cap;
pthread_mutex_t lock;
};
struct tracer_ctx {
FILE *log_file;
struct sock_slots ss;
};
static pthread_mutex_t x_init_lock = PTHREAD_MUTEX_INITIALIZER;
static volatile bool stop_tracer = false;
static struct tracer_ctx tctx;
static struct http_req *http_req_alloc(void)
{
struct http_req *r = calloc(1, sizeof(struct http_req));
if (r)
r->time = time(NULL);
return r;
}
static void http_req_free(struct http_req *req)
{
if (!req)
return;
gwnet_http_req_hdr_free(&req->hdr);
gwnet_http_res_hdr_free(&req->res.hdr);
gwnet_http_hdr_pctx_free(&req->hdr_ctx);
gwnet_http_hdr_pctx_free(&req->res.hdr_ctx);
gwnet_http_body_pctx_free(&req->body_ctx);
gwnet_http_body_pctx_free(&req->res.body_ctx);
free(req);
}
static void http_req_free_all(struct http_req *req)
{
struct http_req *next;
while (req) {
next = req->next;
http_req_free(req);
req = next;
}
}
static int buf_append(struct buf *b, const char *data, size_t len)
{
if (b->len + len > b->cap) {
size_t new_cap = b->cap ? b->cap * 2 : 1024;
char *new_buf;
while (new_cap < b->len + len)
new_cap *= 2;
new_buf = realloc(b->buf, new_cap);
if (!new_buf)
return -ENOMEM;
b->buf = new_buf;
b->cap = new_cap;
}
memcpy(&b->buf[b->len], data, len);
b->len += len;
return 0;
}
static void buf_free(struct buf *b)
{
if (!b->buf)
return;
free(b->buf);
b->buf = NULL;
b->cap = 0;
b->len = 0;
}
static void buf_advance(struct buf *b, size_t len)
{
if (len >= b->len) {
buf_free(b);
return;
}
b->len -= len;
memmove(b->buf, &b->buf[len], b->len);
}
static int sock_slots_init(struct sock_slots *ss)
{
int r;
ss->slots = calloc(INITIAL_SOCK_SLOTS_CAP, sizeof(*ss->slots));
if (!ss->slots)
return -ENOMEM;
ss->nr = 0;
ss->cap = INITIAL_SOCK_SLOTS_CAP;
r = pthread_mutex_init(&ss->lock, NULL);
if (r) {
free(ss->slots);
ss->slots = NULL;
return -r;
}
return 0;
}
static void sock_get(struct sock_slots *ss, struct sock *sk)
{
atomic_fetch_add(&sk->ref_count, 1);
/*
* Not used, just to force discipline pair of ss and sk.
*/
(void)ss;
}
static ssize_t __sock_find_idx(struct sock_slots *ss, int fd)
__must_hold(&ss->lock)
{
/*
* TODO(ammarfaizi2): Use a hash table for O(1) lookup.
*
* In C++ we would use std::unordered_map, but in C
* we don't have such a luxury item.
*/
uint32_t i;
for (i = 0; i < ss->nr; i++) {
if (ss->slots[i]->fd == fd)
return i;
}
return -ENOENT;
}
static struct sock *__sock_find(struct sock_slots *ss, int fd)
__must_hold(&ss->lock)
{
ssize_t idx;
idx = __sock_find_idx(ss, fd);
if (idx < 0)
return NULL;
return ss->slots[idx];
}
static ssize_t __sock_del_idx(struct sock_slots *ss, size_t idx)
__must_hold(&ss->lock)
{
struct sock *sk = ss->slots[idx];
http_req_free_all(sk->req_head);
buf_free(&sk->tx_buf);
buf_free(&sk->rx_buf);
free(sk);
ss->slots[idx] = ss->slots[--ss->nr];
ss->slots[ss->nr] = NULL;
return idx;
}
static ssize_t __sock_del(struct sock_slots *ss, int fd)
__must_hold(&ss->lock)
{
ssize_t idx;
idx = __sock_find_idx(ss, fd);
if (idx < 0)
return idx;
return __sock_del_idx(ss, idx);
}
static ssize_t sock_del(struct sock_slots *ss, int fd)
{
ssize_t r;
pthread_mutex_lock(&ss->lock);
r = __sock_del(ss, fd);
pthread_mutex_unlock(&ss->lock);
return r;
}
static struct sock *sock_find_and_get(struct sock_slots *ss, int fd)
{
struct sock *sk;
pthread_mutex_lock(&ss->lock);
sk = __sock_find(ss, fd);
if (sk)
sock_get(ss, sk);
pthread_mutex_unlock(&ss->lock);
return sk;
}
static struct sock *sock_find(struct sock_slots *ss, int fd)
{
struct sock *sk;
pthread_mutex_lock(&ss->lock);
sk = __sock_find(ss, fd);
pthread_mutex_unlock(&ss->lock);
return sk;
}
static void sock_put(struct sock_slots *ss, struct sock *sk)
{
if (atomic_fetch_sub(&sk->ref_count, 1) == 1) {
pthread_mutex_lock(&ss->lock);
__sock_del(ss, sk->fd);
pthread_mutex_unlock(&ss->lock);
}
}
static struct sock *sock_alloc(struct sock_slots *ss, int fd)
{
struct sock *sk;
pthread_mutex_lock(&ss->lock);
/*
* Don't allow duplicate sockets.
*/
sk = __sock_find(ss, fd);
if (sk) {
sk = NULL;
goto out;
}
if (ss->nr >= ss->cap) {
uint32_t new_cap = ss->cap * 2;
struct sock **new_slots;
new_slots = realloc(ss->slots, new_cap * sizeof(*ss->slots));
if (!new_slots)
goto out;
ss->slots = new_slots;
ss->cap = new_cap;
}
sk = calloc(1, sizeof(*sk));
if (!sk)
goto out;
sk->fd = fd;
sk->tx_state = TX_STATE_INIT;
sk->rx_state = RX_STATE_INIT;
ss->slots[ss->nr++] = sk;
out:
pthread_mutex_unlock(&ss->lock);
return sk;
}
static int init_tracer(void)
{
static bool initialized = false;
const char *log_file_path;
int r = 0;
if (initialized)
return r;
pthread_mutex_lock(&x_init_lock);
if (initialized)
goto out;
initialized = true;
r = -1;
log_file_path = getenv("GWNET_HTTP_LOG_FILE");
if (!log_file_path) {
stop_tracer = true;
goto out;
}
tctx.log_file = fopen(log_file_path, "ab");
if (!tctx.log_file) {
stop_tracer = true;
goto out;
}
r = sock_slots_init(&tctx.ss);
if (r < 0) {
fclose(tctx.log_file);
tctx.log_file = NULL;
stop_tracer = true;
goto out;
}
setvbuf(tctx.log_file, NULL, _IOLBF, 0);
r = 0;
out:
pthread_mutex_unlock(&x_init_lock);
return r;
}
static void http_req_plug_to_sock(struct sock *sk, struct http_req *req)
{
if (!sk->req_head) {
sk->req_tail = sk->req_head = req;
} else {
sk->req_tail->next = req;
sk->req_tail = req;
}
}
static void pop_http_req_head(struct sock *sk)
{
struct http_req *req = sk->req_head;
if (!req)
return;
sk->req_head = req->next;
if (!sk->req_head)
sk->req_tail = NULL;
http_req_free(req);
}
static uint64_t probe_body_len(struct gwnet_http_hdr_fields *ff,
bool *is_chunked, bool *is_invalid)
{
const char *v;
*is_chunked = *is_invalid = false;
v = gwnet_http_hdr_fields_get(ff, "transfer-encoding");
if (v && strstr(v, "chunked")) {
*is_chunked = true;
return 0;
}
v = gwnet_http_hdr_fields_get(ff, "content-length");
if (v) {
uint64_t cl;
char *e;
errno = 0;
cl = strtoull(v, &e, 10);
if (errno || e == v || *e != '\0' || cl > MAX_BODY_SIZE) {
*is_invalid = true;
return 0;
}
return cl;
}
return 0;
}
static const char *translate_method(int m)
{
switch (m) {
case GWNET_HTTP_METHOD_GET: return "GET";
case GWNET_HTTP_METHOD_POST: return "POST";
case GWNET_HTTP_METHOD_PUT: return "PUT";
case GWNET_HTTP_METHOD_DELETE: return "DELETE";
case GWNET_HTTP_METHOD_HEAD: return "HEAD";
case GWNET_HTTP_METHOD_OPTIONS: return "OPTIONS";
case GWNET_HTTP_METHOD_PATCH: return "PATCH";
case GWNET_HTTP_METHOD_TRACE: return "TRACE";
case GWNET_HTTP_METHOD_CONNECT: return "CONNECT";
default: return "UNKNOWN";
}
}
static void log_req(FILE *l, const char *addr, struct http_req *req)
{
const char *host, *method = translate_method(req->hdr.method);
struct http_res *res = &req->res;
char date_buf[32];
struct tm tm;
if (!localtime_r(&req->time, &tm))
return;
host = gwnet_http_hdr_fields_get(&req->hdr.fields, "host");
if (!host)
host = "-";
strftime(date_buf, sizeof(date_buf), "%Y-%m-%d %H:%M:%S", &tm);
fprintf(l, "%s: dst=%s; res_code=%d; host=%s; method=%s; uri=%s;\n",
date_buf, addr, res->hdr.code, host, method, req->hdr.uri);
}
static int consume_body(struct gwnet_http_body_pctx *c, struct buf *b,
uint64_t *con_len, bool is_chunked)
{
int r;
if (is_chunked) {
c->buf = b->buf;
c->len = b->len;
c->off = 0;
c->max_len = MAX_BODY_SIZE;
r = gwnet_http_body_parse_chunked(c, NULL, 0);
if (c->off)
buf_advance(b, c->off);
} else {
uint64_t sub = min_st(*con_len, b->len);
buf_advance(b, sub);
*con_len -= sub;
r = (*con_len == 0) ? 0 : -EAGAIN;
}
return r;
}
static int handle_tx_hdr(struct sock *sk)
{
struct http_req *req = sk->req_tail;
struct gwnet_http_hdr_fields *ff = &req->hdr.fields;
struct gwnet_http_hdr_pctx *p = &req->hdr_ctx;
struct buf *b = &sk->tx_buf;
bool inv;
int r;
p->buf = b->buf;
p->len = b->len;
p->off = 0;
p->max_len = MAX_HEADER_SIZE;
r = gwnet_http_req_hdr_parse(p, &req->hdr);
if (p->off)
buf_advance(b, p->off);
if (r)
return r;
req->con_len = probe_body_len(ff, &req->is_chunked, &inv);
if (inv)
return -EINVAL;
sk->tx_state = TX_STATE_BODY;
return 0;
}
static int handle_tx_body(struct sock *sk)
{
struct http_req *req = sk->req_tail;
struct buf *b = &sk->tx_buf;
int r;
r = consume_body(&req->body_ctx, b, &req->con_len, req->is_chunked);
if (!r)
sk->tx_state = TX_STATE_DONE;
return r;
}
static int handle_rx_hdr(struct sock *sk)
{
struct http_res *res = &sk->req_head->res;
struct gwnet_http_hdr_fields *ff = &res->hdr.fields;
struct gwnet_http_hdr_pctx *p = &res->hdr_ctx;
struct buf *b = &sk->rx_buf;
bool inv;
int r;
p->buf = b->buf;
p->len = b->len;
p->off = 0;
p->max_len = MAX_HEADER_SIZE;
r = gwnet_http_res_hdr_parse(p, &res->hdr);
if (p->off)
buf_advance(b, p->off);
if (r < 0)
return r;
res->con_len = probe_body_len(ff, &res->is_chunked, &inv);
if (inv)
return -EINVAL;
sk->rx_state = RX_STATE_BODY;
return 0;
}
static int handle_rx_body(struct sock *sk)
{
struct http_req *req = sk->req_head;
struct http_res *res = &req->res;
struct buf *b = &sk->rx_buf;
int r;
r = consume_body(&res->body_ctx, b, &res->con_len, res->is_chunked);
if (!r)
sk->rx_state = RX_STATE_DONE;
return r;
}
static int handle_tx(struct sock *sk)
{
struct http_req *req;
int r = 0;
loop:
if (sk->tx_state == TX_STATE_INIT) {
req = http_req_alloc();
if (!req)
return -ENOMEM;
http_req_plug_to_sock(sk, req);
r = gwnet_http_hdr_pctx_init(&req->hdr_ctx);
if (r)
return r;
sk->tx_state = TX_STATE_HDR;
}
req = sk->req_tail;
if (sk->tx_state == TX_STATE_HDR) {
r = handle_tx_hdr(sk);
if (r)
return r;
r = gwnet_http_body_pctx_init(&req->body_ctx);
if (r)
return r;
}
if (sk->tx_state == TX_STATE_BODY) {
r = handle_tx_body(sk);
if (r)
return r;
}
if (sk->tx_state == TX_STATE_DONE) {
sk->tx_state = TX_STATE_INIT;
if (sk->tx_buf.len)
goto loop;
}
return r;
}
static int handle_rx(struct sock *sk)
{
struct http_req *req;
struct http_res *res;
int r = 0;
loop:
req = sk->req_head;
res = &req->res;
if (sk->rx_state == RX_STATE_INIT) {
if (!req)
return -EINVAL;
r = gwnet_http_hdr_pctx_init(&res->hdr_ctx);
if (r)
return r;
sk->rx_state = RX_STATE_HDR;
}
if (sk->rx_state == RX_STATE_HDR) {
r = handle_rx_hdr(sk);
if (r)
return r;
r = gwnet_http_body_pctx_init(&res->body_ctx);
if (r)
return r;
}
if (sk->rx_state == RX_STATE_BODY) {
r = handle_rx_body(sk);
if (r)
return r;
}
if (sk->rx_state == RX_STATE_DONE) {
log_req(tctx.log_file, sk->addr, sk->req_head);
pop_http_req_head(sk);
sk->rx_state = RX_STATE_INIT;
if (sk->rx_buf.len)
goto loop;
}
return r;
}
static void trace_socket(int fd, int domain, int type)
{
struct sock *sk;
if (stop_tracer)
return;
if (init_tracer() < 0)
return;
if (domain != AF_INET && domain != AF_INET6)
return;
if (!(type & SOCK_STREAM))
return;
sk = sock_alloc(&tctx.ss, fd);
if (sk)
sock_get(&tctx.ss, sk);
}
static int store_ip_addr(struct sock *sk, int r, const struct sockaddr *addr,
socklen_t len)
{
uint16_t port;
if (r == AF_INET) {
struct sockaddr_in *i = (void *)addr;
if (len < sizeof(*i))
return -EINVAL;
inet_ntop(AF_INET, &i->sin_addr, sk->addr, sizeof(sk->addr));
port = ntohs(i->sin_port);
len = strlen(sk->addr);
} else {
struct sockaddr_in6 *i = (void *)addr;
if (len < sizeof(*i))
return -EINVAL;
sk->addr[0] = '[';
len = sizeof(sk->addr) - 1;
inet_ntop(AF_INET6, &i->sin6_addr, sk->addr + 1, len);
port = ntohs(i->sin6_port);
len = strlen(sk->addr);
sk->addr[len] = ']';
sk->addr[len + 1] = '\0';
len++;
}
snprintf(&sk->addr[len], sizeof(sk->addr) - len, ":%hu", port);
return 0;
}
static void trace_connect(int sockfd, const struct sockaddr *addr,
socklen_t len)
{
struct sock *sk;
int r;
if (stop_tracer)
return;
r = addr->sa_family;
if (r != AF_INET && r != AF_INET6)
return;
pthread_mutex_lock(&tctx.ss.lock);
sk = __sock_find(&tctx.ss, sockfd);
if (!sk)
goto out_unlock;
if (sk->rx_state != RX_STATE_INIT || sk->tx_state != TX_STATE_INIT) {
/*
* Bad state on connect(), don't trace this, sus.
*/
__sock_del(&tctx.ss, sockfd);
goto out_unlock;
}
r = store_ip_addr(sk, r, addr, len);
if (r < 0)
__sock_del(&tctx.ss, sockfd);
out_unlock:
pthread_mutex_unlock(&tctx.ss.lock);
}
static void trace_recv(int fd, const void *buf, size_t len)
{
struct sock *sk;
int r;
if (stop_tracer)
return;
sk = sock_find_and_get(&tctx.ss, fd);
if (!sk)
return;
r = buf_append(&sk->rx_buf, buf, len);
if (r)
goto out_del;
r = handle_rx(sk);
if (r && r != -EAGAIN)
goto out_del;
return;
out_del:
sock_del(&tctx.ss, sk->fd);
}
static void trace_send(int fd, const void *buf, size_t len)
{
struct sock *sk;
int r;
if (stop_tracer)
return;
sk = sock_find_and_get(&tctx.ss, fd);
if (!sk)
return;
r = buf_append(&sk->tx_buf, buf, len);
if (r)
goto out_del;
r = handle_tx(sk);
if (r && r != -EAGAIN)
goto out_del;
sock_put(&tctx.ss, sk);
return;
out_del:
sock_del(&tctx.ss, sk->fd);
}
static void trace_close(int fd)
{
struct sock *sk;
if (stop_tracer)
return;
sk = sock_find(&tctx.ss, fd);
if (sk)
sock_put(&tctx.ss, sk);
}
int socket(int domain, int type, int protocol)
{
int fd;
__asm__ volatile (
"syscall"
: "=a" (fd)
: "0" (__NR_socket), "D" (domain), "S" (type), "d" (protocol)
);
if (fd < 0) {
errno = -fd;
return -1;
} else {
trace_socket(fd, domain, type);
}
return fd;
}
int connect(int sockfd, const struct sockaddr *addr, socklen_t len)
{
int ret;
__asm__ volatile (
"syscall"
: "=a" (ret)
: "0" (__NR_connect), "D" (sockfd), "S" (addr), "d" (len)
);
trace_connect(sockfd, addr, len);
if (ret < 0) {
errno = -ret;
return -1;
}
return ret;
}
int close(int fd)
{
int ret;
trace_close(fd);
__asm__ volatile (
"syscall"
: "=a" (ret)
: "0" (__NR_close), "D" (fd)
);
if (ret < 0) {
errno = -ret;
return -1;
}
return ret;
}
ssize_t recvfrom(int sockfd, void *buf, size_t len, int flags,
struct sockaddr *src_addr, socklen_t *addrlen)
{
register int __flags __asm__ ("%r10") = flags;
register struct sockaddr *__src_addr __asm__ ("%r8") = src_addr;
register socklen_t *__addrlen __asm__ ("%r9") = addrlen;
ssize_t ret;
__asm__ volatile (
"syscall"
: "=a" (ret)
: "0" (__NR_recvfrom), "D" (sockfd), "S" (buf), "d" (len),
"r" (__flags), "r" (__src_addr), "r" (__addrlen)
);
if (ret < 0) {
errno = -ret;
return -1;
} else {
trace_recv(sockfd, buf, (size_t)ret);
}
return ret;
}
ssize_t sendto(int sockfd, const void *buf, size_t len, int flags,
const struct sockaddr *dest_addr, socklen_t addrlen)
{
register int __flags __asm__ ("%r10") = flags;
register const struct sockaddr *__dest_addr __asm__ ("%r8") = dest_addr;
register socklen_t __addrlen __asm__ ("%r9") = addrlen;
ssize_t ret;
__asm__ volatile (
"syscall"
: "=a" (ret)
: "0" (__NR_sendto), "D" (sockfd), "S" (buf), "d" (len),
"r" (__flags), "r" (__dest_addr), "r" (__addrlen)
);
if (ret < 0) {
errno = -ret;
return -1;
} else {
trace_send(sockfd, buf, (size_t)ret);
}
return ret;
}
ssize_t recv(int sockfd, void *buf, size_t len, int flags)
{
return recvfrom(sockfd, buf, len, flags, NULL, NULL);
}
ssize_t send(int sockfd, const void *buf, size_t len, int flags)
{
return sendto(sockfd, buf, len, flags, NULL, 0);
}
ssize_t write(int fd, const void *buf, size_t count)
{
ssize_t ret;
__asm__ volatile (
"syscall"
: "=a" (ret)
: "0" (__NR_write), "D" (fd), "S" (buf), "d" (count)
);
if (ret < 0) {
errno = -ret;
return -1;
} else {
trace_send(fd, buf, ret);
}
return ret;
}
ssize_t read(int fd, void *buf, size_t count)
{
ssize_t ret;
__asm__ volatile (
"syscall"
: "=a" (ret)
: "0" (__NR_read), "D" (fd), "S" (buf), "d" (count)
);
if (ret < 0) {
errno = -ret;
return -1;
} else {
trace_recv(fd, buf, ret);
}
return ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment