Created
October 1, 2013 19:40
-
-
Save luisbebop/6783933 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <errno.h> | |
#include <ctype.h> | |
#include "bencode.h" | |
#define MAX_ALLOC (((size_t) -1) / sizeof(struct bencode *) / 2) | |
struct decode { | |
const char *data; | |
const size_t len; | |
size_t off; | |
int error; | |
int level; | |
}; | |
/* | |
* Buffer size for fitting all unsigned long long and long long integers, | |
* assuming it is at most 64 bits. If long long is larger than 64 bits, | |
* an error is produced when too large an integer is converted. | |
*/ | |
#define LONGLONGSIZE 21 | |
struct bencode_keyvalue { | |
struct bencode *key; | |
struct bencode *value; | |
}; | |
static struct bencode *decode(struct decode *ctx); | |
static size_t find(const char *data, size_t len, size_t off, char c) | |
{ | |
for (; off < len; off++) { | |
if (data[off] == c) | |
return off; | |
} | |
return -1; | |
} | |
static size_t type_size(int type) | |
{ | |
switch (type) { | |
case BENCODE_BOOL: | |
return sizeof(struct bencode_bool); | |
case BENCODE_DICT: | |
return sizeof(struct bencode_dict); | |
case BENCODE_INT: | |
return sizeof(struct bencode_int); | |
case BENCODE_LIST: | |
return sizeof(struct bencode_list); | |
case BENCODE_STR: | |
return sizeof(struct bencode_str); | |
default: | |
fprintf(stderr, "Unknown bencode type: %d\n", type); | |
abort(); | |
} | |
} | |
static void *alloc(int type) | |
{ | |
struct bencode *b = calloc(1, type_size(type)); | |
if (b == NULL) | |
return NULL; | |
b->type = type; | |
return b; | |
} | |
static int insufficient(struct decode *ctx) | |
{ | |
ctx->error = BEN_INSUFFICIENT; | |
return -1; | |
} | |
static int invalid(struct decode *ctx) | |
{ | |
ctx->error = BEN_INVALID; | |
return -1; | |
} | |
static void *insufficient_ptr(struct decode *ctx) | |
{ | |
ctx->error = BEN_INSUFFICIENT; | |
return NULL; | |
} | |
static void *invalid_ptr(struct decode *ctx) | |
{ | |
ctx->error = BEN_INVALID; | |
return NULL; | |
} | |
static void *oom_ptr(struct decode *ctx) | |
{ | |
ctx->error = BEN_NO_MEMORY; | |
return NULL; | |
} | |
static struct bencode *decode_bool(struct decode *ctx) | |
{ | |
struct bencode_bool *b; | |
char value; | |
char c; | |
if ((ctx->off + 2) > ctx->len) | |
return insufficient_ptr(ctx); | |
c = ctx->data[ctx->off + 1]; | |
if (c != '0' && c != '1') | |
return invalid_ptr(ctx); | |
value = (c == '1'); | |
b = alloc(BENCODE_BOOL); | |
if (b == NULL) | |
return oom_ptr(ctx); | |
b->b = value; | |
ctx->off += 2; | |
return (struct bencode *) b; | |
} | |
static int resize_dict(struct bencode_dict *d) | |
{ | |
struct bencode **newkeys; | |
struct bencode **newvalues; | |
size_t newsize; | |
if (d->alloc >= MAX_ALLOC) | |
return -1; | |
if (d->alloc == 0) | |
d->alloc = 4; | |
else | |
d->alloc *= 2; | |
newsize = sizeof(d->values[0]) * d->alloc; | |
newkeys = realloc(d->keys, newsize); | |
newvalues = realloc(d->values, newsize); | |
if (newkeys == NULL || newvalues == NULL) { | |
free(newkeys); | |
free(newvalues); | |
return -1; | |
} | |
d->keys = newkeys; | |
d->values = newvalues; | |
return 0; | |
} | |
int ben_cmp(const struct bencode *a, const struct bencode *b) | |
{ | |
size_t cmplen; | |
int ret; | |
const struct bencode_str *sa; | |
const struct bencode_str *sb; | |
if (a->type != b->type) | |
return (a->type == BENCODE_INT) ? -1 : 1; | |
if (a->type == BENCODE_INT) { | |
const struct bencode_int *ia = ben_int_const_cast(a); | |
const struct bencode_int *ib = ben_int_const_cast(b); | |
if (ia->ll < ib->ll) | |
return -1; | |
if (ib->ll < ia->ll) | |
return 1; | |
return 0; | |
} | |
sa = ben_str_const_cast(a); | |
sb = ben_str_const_cast(b); | |
cmplen = (sa->len <= sb->len) ? sa->len : sb->len; | |
ret = memcmp(sa->s, sb->s, cmplen); | |
if (sa->len == sb->len) | |
return ret; | |
if (ret) | |
return ret; | |
return (sa->len < sb->len) ? -1 : 1; | |
} | |
int ben_cmp_qsort(const void *a, const void *b) | |
{ | |
const struct bencode *akey = ((const struct bencode_keyvalue *) a)->key; | |
const struct bencode *bkey = ((const struct bencode_keyvalue *) b)->key; | |
return ben_cmp(akey, bkey); | |
} | |
static struct bencode *decode_dict(struct decode *ctx) | |
{ | |
struct bencode *key; | |
struct bencode *value; | |
struct bencode_dict *d; | |
d = alloc(BENCODE_DICT); | |
if (d == NULL) { | |
fprintf(stderr, "bencode: Not enough memory for dict\n"); | |
return oom_ptr(ctx); | |
} | |
ctx->off += 1; | |
while (ctx->off < ctx->len && ctx->data[ctx->off] != 'e') { | |
if (d->n == d->alloc && resize_dict(d)) { | |
fprintf(stderr, "bencode: Can not resize dict\n"); | |
ctx->error = BEN_NO_MEMORY; | |
goto error; | |
} | |
key = decode(ctx); | |
if (key == NULL) | |
goto error; | |
if (key->type != BENCODE_INT && key->type != BENCODE_STR) { | |
ben_free(key); | |
key = NULL; | |
ctx->error = BEN_INVALID; | |
fprintf(stderr, "bencode: Invalid dict key type\n"); | |
goto error; | |
} | |
if (d->n > 0 && ben_cmp(d->keys[d->n - 1], key) >= 0) { | |
ben_free(key); | |
key = NULL; | |
ctx->error = BEN_INVALID; | |
goto error; | |
} | |
value = decode(ctx); | |
if (value == NULL) { | |
ben_free(key); | |
key = NULL; | |
goto error; | |
} | |
d->keys[d->n] = key; | |
d->values[d->n] = value; | |
d->n++; | |
} | |
if (ctx->off >= ctx->len) { | |
ctx->error = BEN_INSUFFICIENT; | |
goto error; | |
} | |
ctx->off += 1; | |
return (struct bencode *) d; | |
error: | |
ben_free((struct bencode *) d); | |
return NULL; | |
} | |
/* off is the position of first number in */ | |
static int read_long_long(long long *ll, struct decode *ctx, int c) | |
{ | |
char buf[LONGLONGSIZE]; /* fits all 64 bit integers */ | |
size_t pos; | |
char *endptr; | |
size_t slen; | |
pos = find(ctx->data, ctx->len, ctx->off, c); | |
if (pos == -1) | |
return insufficient(ctx); | |
slen = pos - ctx->off; | |
if (slen == 0 || slen >= sizeof buf) | |
return invalid(ctx); | |
assert(slen < sizeof buf); | |
memcpy(buf, ctx->data + ctx->off, slen); | |
buf[slen] = 0; | |
if (buf[0] != '-' && !isdigit(buf[0])) | |
return invalid(ctx); | |
errno = 0; | |
*ll = strtoll(buf, &endptr, 10); | |
if (errno == ERANGE || *endptr != 0) | |
return invalid(ctx); | |
/* | |
* Demand a unique encoding for all integers. | |
* Zero may not begin with a (minus) sign. | |
* Non-zero integers may not have leading zeros in the encoding. | |
*/ | |
if (buf[0] == '-' && buf[1] == '0') | |
return invalid(ctx); | |
if (buf[0] == '0' && pos != (ctx->off + 1)) | |
return invalid(ctx); | |
ctx->off = pos + 1; | |
return 0; | |
} | |
static struct bencode *decode_int(struct decode *ctx) | |
{ | |
struct bencode_int *b; | |
long long ll; | |
ctx->off += 1; | |
if (read_long_long(&ll, ctx, 'e')) | |
return NULL; | |
b = alloc(BENCODE_INT); | |
if (b == NULL) | |
return oom_ptr(ctx); | |
b->ll = ll; | |
return (struct bencode *) b; | |
} | |
static int resize_list(struct bencode_list *list) | |
{ | |
struct bencode **newvalues; | |
size_t newsize; | |
if (list->alloc >= MAX_ALLOC) | |
return -1; | |
if (list->alloc == 0) | |
list->alloc = 4; | |
else | |
list->alloc *= 2; | |
newsize = sizeof(list->values[0]) * list->alloc; | |
newvalues = realloc(list->values, newsize); | |
if (newvalues == NULL) | |
return -1; | |
list->values = newvalues; | |
return 0; | |
} | |
static struct bencode *decode_list(struct decode *ctx) | |
{ | |
struct bencode_list *l = alloc(BENCODE_LIST); | |
if (l == NULL) | |
return oom_ptr(ctx); | |
ctx->off += 1; | |
while (ctx->off < ctx->len && ctx->data[ctx->off] != 'e') { | |
struct bencode *b = decode(ctx); | |
if (b == NULL) | |
goto error; | |
if (ben_list_append((struct bencode *) l, b)) { | |
ben_free(b); | |
ctx->error = BEN_NO_MEMORY; | |
goto error; | |
} | |
} | |
if (ctx->off >= ctx->len) { | |
ctx->error = BEN_INSUFFICIENT; | |
goto error; | |
} | |
ctx->off += 1; | |
return (struct bencode *) l; | |
error: | |
ben_free((struct bencode *) l); | |
return NULL; | |
} | |
static size_t read_size_t(struct decode *ctx, int c) | |
{ | |
long long ll; | |
size_t s; | |
if (read_long_long(&ll, ctx, c)) | |
return -1; | |
if (ll < 0) | |
return invalid(ctx); | |
/* | |
* Test that information is not lost when converting from long long | |
* to size_t | |
*/ | |
s = (size_t) ll; | |
if (ll != (long long) s) | |
return invalid(ctx); | |
return s; | |
} | |
static struct bencode *decode_str(struct decode *ctx) | |
{ | |
struct bencode *b; | |
size_t datalen = read_size_t(ctx, ':'); /* Read the string length */ | |
if (datalen == -1) | |
return NULL; | |
if ((ctx->off + datalen) > ctx->len) | |
return insufficient_ptr(ctx); | |
/* Allocate string structure and copy data into it */ | |
b = ben_blob(ctx->data + ctx->off, datalen); | |
ctx->off += datalen; | |
return b; | |
} | |
static struct bencode *decode(struct decode *ctx) | |
{ | |
ctx->level++; | |
if (ctx->level > 256) | |
return invalid_ptr(ctx); | |
if (ctx->off == ctx->len) | |
return insufficient_ptr(ctx); | |
assert (ctx->off < ctx->len); | |
switch (ctx->data[ctx->off]) { | |
case '0': | |
case '1': | |
case '2': | |
case '3': | |
case '4': | |
case '5': | |
case '6': | |
case '7': | |
case '8': | |
case '9': | |
return decode_str(ctx); | |
case 'b': | |
return decode_bool(ctx); | |
case 'd': | |
return decode_dict(ctx); | |
case 'i': | |
return decode_int(ctx); | |
case 'l': | |
return decode_list(ctx); | |
default: | |
return invalid_ptr(ctx); | |
} | |
} | |
struct bencode *ben_decode(const void *data, size_t len) | |
{ | |
struct decode ctx = {.data = data, .len = len}; | |
struct bencode *b = decode(&ctx); | |
if (b != NULL && ctx.off != len) { | |
ben_free(b); | |
return NULL; | |
} | |
return b; | |
} | |
struct bencode *ben_decode2(const void *data, size_t len, size_t *off, int *error) | |
{ | |
struct decode ctx = {.data = data, .len = len, .off = *off}; | |
struct bencode *b = decode(&ctx); | |
*off = ctx.off; | |
if (error != NULL) { | |
assert((b != NULL) ^ (ctx.error != 0)); | |
*error = ctx.error; | |
} | |
return b; | |
} | |
static void free_dict(struct bencode_dict *d) | |
{ | |
size_t pos; | |
for (pos = 0; pos < d->n; pos++) { | |
ben_free(d->keys[pos]); | |
d->keys[pos] = NULL; | |
ben_free(d->values[pos]); | |
d->values[pos] = NULL; | |
} | |
} | |
static void free_list(struct bencode_list *list) | |
{ | |
size_t pos; | |
for (pos = 0; pos < list->n; pos++) { | |
ben_free(list->values[pos]); | |
list->values[pos] = NULL; | |
} | |
} | |
static int putonechar(char *data, size_t size, size_t *pos, char c) | |
{ | |
if (*pos >= size) | |
return -1; | |
data[*pos] = c; | |
*pos += 1; | |
return 0; | |
} | |
static int puthexchar(char *data, size_t size, size_t *pos, unsigned char hex) | |
{ | |
char buf[5]; | |
int len = snprintf(buf, sizeof buf, "\\x%.2x", hex); | |
assert(len == 4); | |
if ((*pos + len) > size) | |
return -1; | |
memcpy(data + *pos, buf, len); | |
*pos += len; | |
return 0; | |
} | |
static int putlonglong(char *data, size_t size, size_t *pos, long long ll) | |
{ | |
char buf[LONGLONGSIZE]; | |
int len = snprintf(buf, sizeof buf, "%lld", ll); | |
assert(len > 0); | |
if ((*pos + len) > size) | |
return -1; | |
memcpy(data + *pos, buf, len); | |
*pos += len; | |
return 0; | |
} | |
static int putunsignedlonglong(char *data, size_t size, size_t *pos, | |
unsigned long long llu) | |
{ | |
char buf[LONGLONGSIZE]; | |
int len = snprintf(buf, sizeof buf, "%llu", llu); | |
assert(len > 0); | |
if ((*pos + len) > size) | |
return -1; | |
memcpy(data + *pos, buf, len); | |
*pos += len; | |
return 0; | |
} | |
static int putstr(char *data, size_t size, size_t *pos, char *s) | |
{ | |
size_t len = strlen(s); | |
if (*pos + len > size) | |
return -1; | |
memcpy(data + *pos, s, len); | |
*pos += len; | |
return 0; | |
} | |
static int print(char *data, size_t size, size_t *pos, const struct bencode *b) | |
{ | |
const struct bencode_bool *boolean; | |
const struct bencode_dict *dict; | |
const struct bencode_int *integer; | |
const struct bencode_list *list; | |
const struct bencode_str *s; | |
size_t i; | |
int len; | |
struct bencode_keyvalue *pairs; | |
switch (b->type) { | |
case BENCODE_BOOL: | |
boolean = ben_bool_const_cast(b); | |
len = boolean->b ? 4 : 5; | |
if (*pos + len > size) | |
return -1; | |
memcpy(data + *pos, (len == 4) ? "True" : "False", len); | |
*pos += len; | |
return 0; | |
case BENCODE_DICT: | |
if (putonechar(data, size, pos, '{')) | |
return -1; | |
dict = ben_dict_const_cast(b); | |
pairs = malloc(dict->n * sizeof(pairs[0])); | |
if (pairs == NULL) { | |
fprintf(stderr, "bencode: No memory for dict serialization\n"); | |
return -1; | |
} | |
for (i = 0; i < dict->n; i++) { | |
pairs[i].key = dict->keys[i]; | |
pairs[i].value = dict->values[i]; | |
} | |
qsort(pairs, dict->n, sizeof(pairs[0]), ben_cmp_qsort); | |
for (i = 0; i < dict->n; i++) { | |
if (print(data, size, pos, pairs[i].key)) | |
break; | |
if (putstr(data, size, pos, ": ")) | |
break; | |
if (print(data, size, pos, pairs[i].value)) | |
break; | |
if (i < (dict->n - 1)) { | |
if (putstr(data, size, pos, ", ")) | |
break; | |
} | |
} | |
free(pairs); | |
pairs = NULL; | |
if (i < dict->n) | |
return -1; | |
return putonechar(data, size, pos, '}'); | |
case BENCODE_INT: | |
integer = ben_int_const_cast(b); | |
if (putlonglong(data, size, pos, integer->ll)) | |
return -1; | |
return 0; | |
case BENCODE_LIST: | |
if (putonechar(data, size, pos, '[')) | |
return -1; | |
list = ben_list_const_cast(b); | |
for (i = 0; i < list->n; i++) { | |
if (print(data, size, pos, list->values[i])) | |
return -1; | |
if (i < (list->n - 1) && putstr(data, size, pos, ", ")) | |
return -1; | |
} | |
return putonechar(data, size, pos, ']'); | |
case BENCODE_STR: | |
s = ben_str_const_cast(b); | |
if (putonechar(data, size, pos, '\'')) | |
return -1; | |
for (i = 0; i < s->len; i++) { | |
if (!isprint(s->s[i])) { | |
if (puthexchar(data, size, pos, s->s[i])) | |
return -1; | |
continue; | |
} | |
switch (s->s[i]) { | |
case '\'': | |
case '\\': | |
/* Need escape character */ | |
if (putonechar(data, size, pos, '\\')) | |
return -1; | |
default: | |
if (putonechar(data, size, pos, s->s[i])) | |
return -1; | |
break; | |
} | |
} | |
return putonechar(data, size, pos, '\''); | |
default: | |
fprintf(stderr, "bencode: serialization type %d not implemented\n", b->type); | |
abort(); | |
} | |
} | |
static size_t get_printed_size(const struct bencode *b) | |
{ | |
size_t pos; | |
const struct bencode_bool *boolean; | |
const struct bencode_dict *d; | |
const struct bencode_int *i; | |
const struct bencode_list *l; | |
const struct bencode_str *s; | |
size_t size = 0; | |
char buf[1]; | |
switch (b->type) { | |
case BENCODE_BOOL: | |
boolean = ben_bool_const_cast(b); | |
return boolean->b ? 4 : 5; /* "True" and "False" */ | |
case BENCODE_DICT: | |
size += 1; /* "{" */ | |
d = ben_dict_const_cast(b); | |
for (pos = 0; pos < d->n; pos++) { | |
size += get_printed_size(d->keys[pos]); | |
size += 2; /* ": " */ | |
size += get_printed_size(d->values[pos]); | |
if (pos < (d->n - 1)) | |
size += 2; /* ", " */ | |
} | |
size += 1; /* "}" */ | |
return size; | |
case BENCODE_INT: | |
i = ben_int_const_cast(b); | |
return snprintf(buf, 0, "%lld", i->ll); | |
case BENCODE_LIST: | |
size += 1; /* "[" */ | |
l = ben_list_const_cast(b); | |
for (pos = 0; pos < l->n; pos++) { | |
size += get_printed_size(l->values[pos]); | |
if (pos < (l->n - 1)) | |
size += 2; /* ", " */ | |
} | |
size += 1; /* "]" */ | |
return size; | |
case BENCODE_STR: | |
s = ben_str_const_cast(b); | |
size += 1; /* ' */ | |
for (pos = 0; pos < s->len; pos++) { | |
if (!isprint(s->s[pos])) { | |
size += 4; /* "\xDD" */ | |
continue; | |
} | |
switch (s->s[pos]) { | |
case '\'': | |
case '\\': | |
size += 2; /* escaped characters */ | |
break; | |
default: | |
size += 1; | |
break; | |
} | |
} | |
size += 1; /* ' */ | |
return size; | |
default: | |
fprintf(stderr, "bencode: invalid bencode type: %c\n", b->type); | |
abort(); | |
} | |
} | |
static int serialize(char *data, size_t size, size_t *pos, | |
const struct bencode *b) | |
{ | |
const struct bencode_dict *dict; | |
const struct bencode_int *integer; | |
const struct bencode_list *list; | |
const struct bencode_str *s; | |
size_t i; | |
struct bencode_keyvalue *pairs; | |
switch (b->type) { | |
case BENCODE_BOOL: | |
if ((*pos + 2) > size) | |
return -1; | |
data[*pos] = 'b'; | |
data[*pos + 1] = ben_bool_const_cast(b)->b ? '1' : '0'; | |
*pos += 2; | |
return 0; | |
case BENCODE_DICT: | |
if (putonechar(data, size, pos, 'd')) | |
return -1; | |
dict = ben_dict_const_cast(b); | |
pairs = malloc(dict->n * sizeof(pairs[0])); | |
if (pairs == NULL) { | |
fprintf(stderr, "bencode: No memory for dict serialization\n"); | |
return -1; | |
} | |
for (i = 0; i < dict->n; i++) { | |
pairs[i].key = dict->keys[i]; | |
pairs[i].value = dict->values[i]; | |
} | |
qsort(pairs, dict->n, sizeof(pairs[0]), ben_cmp_qsort); | |
for (i = 0; i < dict->n; i++) { | |
if (serialize(data, size, pos, pairs[i].key)) | |
break; | |
if (serialize(data, size, pos, pairs[i].value)) | |
break; | |
} | |
free(pairs); | |
pairs = NULL; | |
if (i < dict->n) | |
return -1; | |
return putonechar(data, size, pos, 'e'); | |
case BENCODE_INT: | |
if (putonechar(data, size, pos, 'i')) | |
return -1; | |
integer = ben_int_const_cast(b); | |
if (putlonglong(data, size, pos, integer->ll)) | |
return -1; | |
return putonechar(data, size, pos, 'e'); | |
case BENCODE_LIST: | |
if (putonechar(data, size, pos, 'l')) | |
return -1; | |
list = ben_list_const_cast(b); | |
for (i = 0; i < list->n; i++) { | |
if (serialize(data, size, pos, list->values[i])) | |
return -1; | |
} | |
return putonechar(data, size, pos, 'e'); | |
case BENCODE_STR: | |
s = ben_str_const_cast(b); | |
if (putunsignedlonglong(data, size, pos, ((long long) s->len))) | |
return -1; | |
if (putonechar(data, size, pos, ':')) | |
return -1; | |
if ((*pos + s->len) > size) | |
return -1; | |
memcpy(data + *pos, s->s, s->len); | |
*pos += s->len; | |
return 0; | |
default: | |
fprintf(stderr, "bencode: serialization type %d not implemented\n", b->type); | |
abort(); | |
} | |
} | |
static size_t get_size(const struct bencode *b) | |
{ | |
size_t pos; | |
const struct bencode_dict *d; | |
const struct bencode_int *i; | |
const struct bencode_list *l; | |
const struct bencode_str *s; | |
size_t size = 0; | |
char buf[1]; | |
switch (b->type) { | |
case BENCODE_BOOL: | |
return 2; | |
case BENCODE_DICT: | |
d = ben_dict_const_cast(b); | |
for (pos = 0; pos < d->n; pos++) { | |
size += get_size(d->keys[pos]); | |
size += get_size(d->values[pos]); | |
} | |
return size + 2; | |
case BENCODE_INT: | |
i = ben_int_const_cast(b); | |
return 2 + snprintf(buf, 0, "%lld", i->ll); | |
case BENCODE_LIST: | |
l = ben_list_const_cast(b); | |
for (pos = 0; pos < l->n; pos++) | |
size += get_size(l->values[pos]); | |
return size + 2; | |
case BENCODE_STR: | |
s = ben_str_const_cast(b); | |
return snprintf(buf, 0, "%zu", s->len) + 1 + s->len; | |
default: | |
fprintf(stderr, "bencode: invalid bencode type: %c\n", b->type); | |
abort(); | |
} | |
} | |
size_t ben_encoded_size(const struct bencode *b) | |
{ | |
return get_size(b); | |
} | |
void *ben_encode(size_t *len, const struct bencode *b) | |
{ | |
size_t size = get_size(b); | |
void *data = malloc(size); | |
if (data == NULL) { | |
fprintf(stderr, "bencode: No memory to encode\n"); | |
return NULL; | |
} | |
*len = 0; | |
if (serialize(data, size, len, b)) { | |
free(data); | |
return NULL; | |
} | |
assert(*len == size); | |
return data; | |
} | |
size_t ben_encode2(char *data, size_t maxlen, const struct bencode *b) | |
{ | |
size_t pos = 0; | |
if (serialize(data, maxlen, &pos, b)) | |
return -1; | |
return pos; | |
} | |
void ben_free(struct bencode *b) | |
{ | |
if (b == NULL) | |
return; | |
switch (b->type) { | |
case BENCODE_BOOL: | |
break; | |
case BENCODE_DICT: | |
free_dict((struct bencode_dict *) b); | |
break; | |
case BENCODE_INT: | |
break; | |
case BENCODE_LIST: | |
free_list((struct bencode_list *) b); | |
break; | |
case BENCODE_STR: | |
free(((struct bencode_str *) b)->s); | |
break; | |
default: | |
fprintf(stderr, "bencode: invalid type: %d\n", b->type); | |
abort(); | |
} | |
memset(b, -1, type_size(b->type)); /* data poison */ | |
free(b); | |
} | |
struct bencode *ben_blob(const void *data, size_t len) | |
{ | |
struct bencode_str *b = alloc(BENCODE_STR); | |
if (b == NULL) | |
return NULL; | |
/* Allocate one extra byte for zero termination for convenient use */ | |
b->s = malloc(len + 1); | |
if (b->s == NULL) { | |
free(b); | |
return NULL; | |
} | |
memcpy(b->s, data, len); | |
b->len = len; | |
b->s[len] = 0; | |
return (struct bencode *) b; | |
} | |
struct bencode *ben_bool(int boolean) | |
{ | |
struct bencode_bool *b = alloc(BENCODE_BOOL); | |
if (b == NULL) | |
return NULL; | |
b->b = boolean ? 1 : 0; | |
return (struct bencode *) b; | |
} | |
struct bencode *ben_dict(void) | |
{ | |
return alloc(BENCODE_DICT); | |
} | |
struct bencode *ben_dict_get(const struct bencode *dict, const struct bencode *key) | |
{ | |
const struct bencode_dict *d = ben_dict_const_cast(dict); | |
size_t pos; | |
for (pos = 0; pos < d->n; pos++) { | |
if (ben_cmp(d->keys[pos], key) == 0) | |
return d->values[pos]; | |
} | |
return NULL; | |
} | |
struct bencode *ben_dict_get_by_str(const struct bencode *dict, const char *key) | |
{ | |
const struct bencode_dict *d = ben_dict_const_cast(dict); | |
size_t keylen = strlen(key); | |
struct bencode_str *dkey; | |
size_t pos; | |
for (pos = 0; pos < d->n; pos++) { | |
dkey = ben_str_cast(d->keys[pos]); | |
if (dkey == NULL) | |
continue; | |
if (dkey->len != keylen) | |
continue; | |
if (strcmp(dkey->s, key) == 0) | |
return d->values[pos]; | |
} | |
return NULL; | |
} | |
static void replacewithlast(struct bencode **arr, size_t i, size_t n) | |
{ | |
arr[i] = arr[n - 1]; | |
arr[n - 1] = NULL; | |
} | |
struct bencode *ben_dict_pop(struct bencode *dict, const struct bencode *key) | |
{ | |
struct bencode_dict *d = ben_dict_cast(dict); | |
size_t pos; | |
for (pos = 0; pos < d->n; pos++) { | |
if (ben_cmp(d->keys[pos], key) == 0) { | |
struct bencode *value = d->values[pos]; | |
ben_free(d->keys[pos]); | |
replacewithlast(d->keys, pos, d->n); | |
replacewithlast(d->values, pos, d->n); | |
d->n -= 1; | |
return value; | |
} | |
} | |
return NULL; | |
} | |
int ben_dict_set(struct bencode *dict, struct bencode *key, struct bencode *value) | |
{ | |
struct bencode_dict *d = ben_dict_cast(dict); | |
assert(d->n <= d->alloc); | |
if (d->n == d->alloc && resize_dict(d)) | |
return -1; | |
ben_free(ben_dict_pop(dict, key)); | |
d->keys[d->n] = key; | |
d->values[d->n] = value; | |
d->n++; | |
return 0; | |
} | |
int ben_dict_set_by_str(struct bencode *dict, const char *key, struct bencode *value) | |
{ | |
struct bencode *bkey = ben_str(key); | |
if (bkey == NULL) | |
return -1; | |
if (ben_dict_set(dict, bkey, value)) { | |
ben_free(bkey); | |
return -1; | |
} | |
return 0; | |
} | |
int ben_dict_set_str_by_str(struct bencode *dict, const char *key, const char *value) | |
{ | |
struct bencode *bkey = ben_str(key); | |
struct bencode *bvalue = ben_str(value); | |
if (bkey == NULL || bvalue == NULL) { | |
ben_free(bkey); | |
ben_free(bvalue); | |
return -1; | |
} | |
if (ben_dict_set(dict, bkey, bvalue)) { | |
ben_free(bkey); | |
ben_free(bvalue); | |
return -1; | |
} | |
return 0; | |
} | |
struct bencode *ben_int(long long ll) | |
{ | |
struct bencode_int *b = alloc(BENCODE_INT); | |
if (b == NULL) | |
return NULL; | |
b->ll = ll; | |
return (struct bencode *) b; | |
} | |
struct bencode *ben_list(void) | |
{ | |
return alloc(BENCODE_LIST); | |
} | |
int ben_list_append(struct bencode *list, struct bencode *b) | |
{ | |
struct bencode_list *l = ben_list_cast(list); | |
/* NULL pointer de-reference if the cast fails */ | |
assert(l->n <= l->alloc); | |
if (l->n == l->alloc && resize_list(l)) | |
return -1; | |
l->values[l->n] = b; | |
l->n += 1; | |
return 0; | |
} | |
void ben_list_set(struct bencode *list, size_t i, struct bencode *b) | |
{ | |
struct bencode_list *l = ben_list_cast(list); | |
if (i >= l->n) { | |
fprintf(stderr, "bencode: ben_list_set() out of bounds: %zu\n", i); | |
abort(); | |
} | |
ben_free(l->values[i]); | |
l->values[i] = b; | |
} | |
char *ben_print(const struct bencode *b) | |
{ | |
size_t size = get_printed_size(b); | |
char *data = malloc(size + 1); | |
size_t len = 0; | |
if (data == NULL) { | |
fprintf(stderr, "bencode: No memory to print\n"); | |
return NULL; | |
} | |
if (print(data, size, &len, b)) { | |
free(data); | |
return NULL; | |
} | |
assert(len == size); | |
data[size] = 0; | |
return data; | |
} | |
struct bencode *ben_str(const char *s) | |
{ | |
return ben_blob(s, strlen(s)); | |
} | |
const char *ben_strerror(int error) | |
{ | |
switch (error) { | |
case BEN_OK: | |
return "OK (no error)"; | |
case BEN_INVALID: | |
return "Invalid data"; | |
case BEN_INSUFFICIENT: | |
return "Insufficient amount of data (need more data)"; | |
case BEN_NO_MEMORY: | |
return "Out of memory"; | |
default: | |
fprintf(stderr, "Unknown error code: %d\n", error); | |
return NULL; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _BENCODE_H | |
#define _BENCODE_H | |
#include <stdio.h> | |
enum { | |
BENCODE_BOOL = 1, | |
BENCODE_DICT, | |
BENCODE_INT, | |
BENCODE_LIST, | |
BENCODE_STR, | |
}; | |
enum { | |
BEN_OK = 0, /* No errors. Set to zero. Non-zero implies an error. */ | |
BEN_INVALID, /* Invalid data was given to decoder */ | |
BEN_INSUFFICIENT, /* Insufficient amount of data for decoding */ | |
BEN_NO_MEMORY, /* Memory allocation failed */ | |
}; | |
struct bencode { | |
char type; | |
}; | |
struct bencode_bool { | |
char type; | |
char b; | |
}; | |
struct bencode_dict { | |
char type; | |
size_t n; | |
size_t alloc; | |
/* keys and values can be put into a same array, later */ | |
struct bencode **keys; | |
struct bencode **values; | |
}; | |
struct bencode_int { | |
char type; | |
long long ll; | |
}; | |
struct bencode_list { | |
char type; | |
size_t n; | |
size_t alloc; | |
struct bencode **values; | |
}; | |
struct bencode_str { | |
char type; | |
size_t len; | |
char *s; | |
}; | |
/* | |
* Decode 'data' with 'len' bytes of data. Returns NULL on error. | |
* The encoded data must be exactly 'len' bytes (not less), otherwise NULL | |
* is returned. ben_decode2() function supports partial decoding ('len' is | |
* larger than actual decoded message) and gives more accurate error reports. | |
*/ | |
struct bencode *ben_decode(const void *data, size_t len); | |
/* | |
* Same as ben_decode(), but allows one to set start offset for decoding with | |
* 'off' and reports errors more accurately. | |
* | |
* '*off' must point to decoding start offset inside 'data'. | |
* If decoding is successful, '*off' is updated to point to the next byte | |
* after the decoded message. | |
* | |
* If 'error != NULL', it is updated according to the success and error of | |
* the decoding. BEN_OK is success, BEN_INVALID means invalid data. | |
* BEN_INSUFFICIENT means data is invalid but could be valid if more data | |
* was given for decoding. BEN_NO_MEMORY means decoding ran out of memory. | |
*/ | |
struct bencode *ben_decode2(const void *data, size_t len, size_t *off, int *error); | |
/* | |
* ben_cmp() is similar to strcmp(), but compares both integers and strings. | |
* An integer is always less than a string. | |
* | |
* ben_cmp(a, b) returns -1 if a < b, 0 if a == b, and 1 if a > b. | |
*/ | |
int ben_cmp(const struct bencode *a, const struct bencode *b); | |
/* | |
* Comparison function suitable for qsort(). Uses ben_cmp(), so this can be | |
* used to order both integer and string arrays. | |
*/ | |
int ben_cmp_qsort(const void *a, const void *b); | |
/* Get the serialization size of bencode structure 'b' */ | |
size_t ben_encoded_size(const struct bencode *b); | |
/* encode 'b'. Return encoded data with a pointer, and length in '*len' */ | |
void *ben_encode(size_t *len, const struct bencode *b); | |
/* | |
* encode 'b' into 'data' buffer with at most 'maxlen' bytes. | |
* Returns the size of encoded data. | |
*/ | |
size_t ben_encode2(char *data, size_t maxlen, const struct bencode *b); | |
/* You must use ben_free() for all allocated bencode structures after use */ | |
void ben_free(struct bencode *b); | |
/* Create a string from binary data with len bytes */ | |
struct bencode *ben_blob(const void *data, size_t len); | |
/* Create a boolean from integer */ | |
struct bencode *ben_bool(int b); | |
/* Create an empty dictionary */ | |
struct bencode *ben_dict(void); | |
/* | |
* Try to locate 'key' in dictionary. Returns the associated value, if found. | |
* Returns NULL if the key does not exist. | |
*/ | |
struct bencode *ben_dict_get(const struct bencode *d, const struct bencode *key); | |
struct bencode *ben_dict_get_by_str(const struct bencode *d, const char *key); | |
/* | |
* Try to locate 'key' in dictionary. Returns the associated value, if found. | |
* The value must be later freed with ben_free(). Returns NULL if the key | |
* does not exist. | |
*/ | |
struct bencode *ben_dict_pop(struct bencode *d, const struct bencode *key); | |
/* | |
* Set 'key' in dictionary to be 'value'. An old value exists for the key | |
* is freed if it exists. 'key' and 'value' are owned by the dictionary | |
* after a successful call (one may not call ben_free() for 'key' or | |
* 'value'). One may free 'key' and 'value' if the call is unsuccessful. | |
* | |
* Returns 0 on success, -1 on failure (no memory). | |
*/ | |
int ben_dict_set(struct bencode *d, struct bencode *key, struct bencode *value); | |
/* Same as ben_dict_set(), but the key is a C string */ | |
int ben_dict_set_by_str(struct bencode *d, const char *key, struct bencode *value); | |
/* Same as ben_dict_set(), but the key and value are C strings */ | |
int ben_dict_set_str_by_str(struct bencode *d, const char *key, const char *value); | |
struct bencode *ben_int(long long ll); | |
/* Create an empty list */ | |
struct bencode *ben_list(void); | |
/* | |
* Append 'b' to 'list'. Returns 0 on success, -1 on failure (no memory). | |
* One may not call ben_free(b) after a successful call, because the list owns | |
* the object 'b'. | |
*/ | |
int ben_list_append(struct bencode *list, struct bencode *b); | |
/* | |
* Returns a Python formatted C string representation of 'b' on success, | |
* NULL on failure. The returned string should be freed with free(). | |
* | |
* Note: The string is terminated with '\0'. All instances of '\0' bytes in | |
* the bencoded data are escaped so that there is only one '\0' byte | |
* in the generated string at the end. | |
*/ | |
char *ben_print(const struct bencode *b); | |
/* Create a string from C string (note bencode string may contain '\0'. */ | |
struct bencode *ben_str(const char *s); | |
/* Return a human readable explanation of error returned with ben_decode2() */ | |
const char *ben_strerror(int error); | |
/* ben_is_bool() returns 1 iff b is a boolean, 0 otherwise */ | |
static inline int ben_is_bool(struct bencode *b) | |
{ | |
return b->type == BENCODE_BOOL; | |
} | |
static inline int ben_is_dict(struct bencode *b) | |
{ | |
return b->type == BENCODE_DICT; | |
} | |
static inline int ben_is_int(struct bencode *b) | |
{ | |
return b->type == BENCODE_INT; | |
} | |
static inline int ben_is_list(struct bencode *b) | |
{ | |
return b->type == BENCODE_LIST; | |
} | |
static inline int ben_is_str(struct bencode *b) | |
{ | |
return b->type == BENCODE_STR; | |
} | |
/* | |
* ben_bool_const_cast(b) returns "(const struct bencode_bool *) b" if the | |
* underlying object is a boolean, NULL otherwise. | |
*/ | |
static inline const struct bencode_bool *ben_bool_const_cast(const struct bencode *b) | |
{ | |
return b->type == BENCODE_BOOL ? ((const struct bencode_bool *) b) : NULL; | |
} | |
/* | |
* ben_bool_cast(b) returns "(struct bencode_bool *) b" if the | |
* underlying object is a boolean, NULL otherwise. | |
*/ | |
static inline struct bencode_bool *ben_bool_cast(struct bencode *b) | |
{ | |
return b->type == BENCODE_BOOL ? ((struct bencode_bool *) b) : NULL; | |
} | |
static inline const struct bencode_dict *ben_dict_const_cast(const struct bencode *b) | |
{ | |
return b->type == BENCODE_DICT ? ((const struct bencode_dict *) b) : NULL; | |
} | |
static inline struct bencode_dict *ben_dict_cast(struct bencode *b) | |
{ | |
return b->type == BENCODE_DICT ? ((struct bencode_dict *) b) : NULL; | |
} | |
static inline const struct bencode_int *ben_int_const_cast(const struct bencode *i) | |
{ | |
return i->type == BENCODE_INT ? ((const struct bencode_int *) i) : NULL; | |
} | |
static inline struct bencode_int *ben_int_cast(struct bencode *i) | |
{ | |
return i->type == BENCODE_INT ? ((struct bencode_int *) i) : NULL; | |
} | |
static inline const struct bencode_list *ben_list_const_cast(const struct bencode *list) | |
{ | |
return list->type == BENCODE_LIST ? ((const struct bencode_list *) list) : NULL; | |
} | |
static inline struct bencode_list *ben_list_cast(struct bencode *list) | |
{ | |
return list->type == BENCODE_LIST ? ((struct bencode_list *) list) : NULL; | |
} | |
static inline const struct bencode_str *ben_str_const_cast(const struct bencode *str) | |
{ | |
return str->type == BENCODE_STR ? ((const struct bencode_str *) str) : NULL; | |
} | |
static inline struct bencode_str *ben_str_cast(struct bencode *str) | |
{ | |
return str->type == BENCODE_STR ? ((struct bencode_str *) str) : NULL; | |
} | |
/* Return the number of keys in a dictionary 'b' */ | |
static inline size_t ben_dict_len(const struct bencode *b) | |
{ | |
return ben_dict_const_cast(b)->n; | |
} | |
/* Return the number of items in a list 'b' */ | |
static inline size_t ben_list_len(const struct bencode *b) | |
{ | |
return ben_list_const_cast(b)->n; | |
} | |
/* | |
* ben_list_get(list, i) returns object at position i in list, | |
* or NULL if position i is out of bounds. | |
*/ | |
static inline struct bencode *ben_list_get(const struct bencode *list, size_t i) | |
{ | |
const struct bencode_list *l = ben_list_const_cast(list); | |
return i < l->n ? l->values[i] : NULL; | |
} | |
/* | |
* ben_list_set(list, i, b) sets object b to list at position i. | |
* The old value at position i is freed. | |
* The program aborts if position i is out of bounds. | |
*/ | |
void ben_list_set(struct bencode *list, size_t i, struct bencode *b); | |
/* Return the number of bytes in a string 'b' */ | |
static inline size_t ben_str_len(const struct bencode *b) | |
{ | |
return ben_str_const_cast(b)->len; | |
} | |
/* Return boolean value (0 or 1) of 'b' */ | |
static inline int ben_bool_val(const struct bencode *b) | |
{ | |
return ben_bool_const_cast(b)->b ? 1 : 0; | |
} | |
/* Return integer value of 'b' */ | |
static inline long long ben_int_val(const struct bencode *b) | |
{ | |
return ben_int_const_cast(b)->ll; | |
} | |
/* | |
* Note: the string is always zero terminated. Also, the string may | |
* contain more than one zero. | |
* bencode strings are not compatible with C strings. | |
*/ | |
static inline const char *ben_str_val(const struct bencode *b) | |
{ | |
return ben_str_const_cast(b)->s; | |
} | |
/* | |
* ben_list_for_each() is an iterator macro for bencoded lists. | |
* | |
* pos is a size_t. | |
*/ | |
#define ben_list_for_each(b, pos, l) \ | |
for ((pos) = 0; (b) = ((const struct bencode_list *) (l))->values[(pos)], (pos) < ((const struct bencode_list *) (l))->n; (pos)++) | |
/* | |
* ben_dict_for_each() is an iterator macro for bencoded dictionaries. | |
* | |
* pos is a size_t. | |
*/ | |
#define ben_dict_for_each(key, value, pos, d) \ | |
for ((pos) = 0; (key) = ((const struct bencode_dict *) (d))->keys[(pos)], (value) = ((const struct bencode_dict *) (d))->values[(pos)], (pos) < ((const struct bencode_dict *) (d))->n; (pos)++) | |
#endif /* _BENCODE_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string.h> | |
#include <stdlib.h> | |
#include "bencode.h" | |
// reads a binary file with the flag 'rb' on fopen | |
// return -1 if fail or the number of bytes read | |
int readbinaryfile(unsigned char * buffer, char * filename) { | |
FILE * h; | |
int i = 0; | |
char tmp; | |
h = fopen(filename, "rb"); | |
if (!h) { | |
return -1; | |
} | |
while (!feof(h)) { | |
fread(&tmp,1,1,h); | |
buffer[i] = tmp; | |
i++; | |
} | |
fclose(h); | |
return i-1; | |
} | |
// read a torrent file. Support torrent with size less than 12k | |
// 0 = read or parser error | |
// 1 = file read ok | |
int parse_torrent(char *torrent_filename) { | |
struct bencode *torrent; | |
struct bencode *info; | |
struct bencode_list *files; | |
struct bencode_str *filename; | |
int piece_length = 0; | |
unsigned char buf[12 * 1024]; | |
unsigned char *sha1_pieces; | |
int sha1_pieces_len; | |
int len = 0; | |
int file_length = 0; | |
// read a torrent file | |
memset(buf, 0, sizeof(buf)); | |
len = readbinaryfile(buf, torrent_filename); | |
// read fail | |
if (len <= 0) { | |
return 0; | |
} | |
// decode the bencode buffer | |
torrent = (struct bencode*)ben_decode(buf,len); | |
// decode fail | |
if(!torrent) { | |
return 0; | |
} | |
// pull out the .info part, which has stuff about the file we're downloading | |
info = (struct bencode*)ben_dict_get_by_str((struct bencode*)torrent,"info"); | |
// decode fail | |
if(!info) { | |
// clean memory and return | |
ben_free(torrent); | |
return 0; | |
} | |
// get the piece length | |
piece_length = ((struct bencode_int*)ben_dict_get_by_str(info,"piece length"))->ll; | |
Pprintf(false,"parse_torrent piece_length=%d\n", piece_length); | |
// get the concatened SHA1 from all pieces | |
sha1_pieces = ((struct bencode_str*)ben_dict_get_by_str(info,"pieces"))->s; | |
sha1_pieces_len = ((struct bencode_str*)ben_dict_get_by_str(info,"pieces"))->len; | |
//hexdump(sha1_pieces, sha1_pieces_len, "sha1_pieces"); | |
// get the files dict from multiple file torrent. otherwise is a single file torrent | |
files = (struct bencode_list*)ben_dict_get_by_str(info,"files"); | |
if (files) { | |
Pprintf(false, "parse_torrent multiple files\n"); | |
} | |
else { | |
filename = (struct bencode_str*)ben_dict_get_by_str(info,"name"); | |
file_length = ((struct bencode_int*)ben_dict_get_by_str(info,"length"))->ll; | |
Pprintf(false, "parse_torrent filename=%s\n", filename->s); | |
Pprintf(false, "parse_torrent file_length=%d\n", file_length); | |
Pprintf(false, "parse_torrent check_next_piece=%d\n", check_next_piece(filename->s, file_length, piece_length)); | |
} | |
// clean memory and return | |
ben_free(torrent); | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment