Skip to content

Instantly share code, notes, and snippets.

@mxswd
Created January 27, 2013 10:08
Show Gist options
  • Save mxswd/4647675 to your computer and use it in GitHub Desktop.
Save mxswd/4647675 to your computer and use it in GitHub Desktop.
/* array.c - automatic dynamic array for pointers */
/*
* Copyright (c) 2008, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "array.h"
#include <string.h>
/***************************
* STATIC HELPER FUNCTIONS *
***************************/
/* arr_realloc • realloc memory of a struct array */
static int
arr_realloc(struct array* arr, int neosz) {
void* neo;
neo = realloc(arr->base, neosz * arr->unit);
if (neo == 0) return 0;
arr->base = neo;
arr->asize = neosz;
if (arr->size > neosz) arr->size = neosz;
return 1; }
/* parr_realloc • realloc memory of a struct parray */
static int
parr_realloc(struct parray* arr, int neosz) {
void* neo;
neo = realloc(arr->item, neosz * sizeof (void*));
if (neo == 0) return 0;
arr->item = neo;
arr->asize = neosz;
if (arr->size > neosz) arr->size = neosz;
return 1; }
/***************************
* GENERIC ARRAY FUNCTIONS *
***************************/
/* arr_adjust • shrink the allocated memory to fit exactly the needs */
int
arr_adjust(struct array *arr) {
return arr_realloc(arr, arr->size); }
/* arr_free • frees the structure contents (buf NOT the struct itself) */
void
arr_free(struct array *arr) {
if (!arr) return;
free(arr->base);
arr->base = 0;
arr->size = arr->asize = 0; }
/* arr_grow • increases the array size to fit the given number of elements */
int
arr_grow(struct array *arr, int need) {
if (arr->asize >= need) return 1;
else return arr_realloc(arr, need); }
/* arr_init • initialization of the contents of the struct */
void
arr_init(struct array *arr, size_t unit) {
arr->base = 0;
arr->size = arr->asize = 0;
arr->unit = unit; }
/* arr_insert • inserting nb elements before the nth one */
int
arr_insert(struct array *arr, int nb, int n) {
char *src, *dst;
size_t len;
if (!arr || nb <= 0 || n < 0
|| !arr_grow(arr, arr->size + nb))
return 0;
if (n < arr->size) {
src = arr->base;
src += n * arr->unit;
dst = src + nb * arr->unit;
len = (arr->size - n) * arr->unit;
memmove(dst, src, len); }
arr->size += nb;
return 1; }
/* arr_item • returns a pointer to the n-th element */
void *
arr_item(struct array *arr, int no) {
char *ptr;
if (!arr || no < 0 || no >= arr->size) return 0;
ptr = arr->base;
ptr += no * arr->unit;
return ptr; }
/* arr_newitem • returns the index of a new element appended to the array */
int
arr_newitem(struct array *arr) {
if (!arr_grow(arr, arr->size + 1)) return -1;
arr->size += 1;
return arr->size - 1; }
/* arr_remove • removes the n-th elements of the array */
void
arr_remove(struct array *arr, int idx) {
if (!arr || idx < 0 || idx >= arr->size) return;
arr->size -= 1;
if (idx < arr->size) {
char *dst = arr->base;
char *src;
dst += idx * arr->unit;
src = dst + arr->unit;
memmove(dst, src, (arr->size - idx) * arr->unit); } }
/* arr_sorted_find • O(log n) search in a sorted array, returning entry */
void *
arr_sorted_find(struct array *arr, void *key, array_cmp_fn cmp) {
int mi, ma, cu, ret;
char *ptr = arr->base;
mi = -1;
ma = arr->size;
while (mi < ma - 1) {
cu = mi + (ma - mi) / 2;
ret = cmp(key, ptr + cu * arr->unit);
if (ret == 0) return ptr + cu * arr->unit;
else if (ret < 0) ma = cu;
else /* if (ret > 0) */ mi = cu; }
return 0; }
/* arr_sorted_find_i • O(log n) search in a sorted array,
* returning index of the smallest element larger than the key */
int
arr_sorted_find_i(struct array *arr, void *key, array_cmp_fn cmp) {
int mi, ma, cu, ret;
char *ptr = arr->base;
mi = -1;
ma = arr->size;
while (mi < ma - 1) {
cu = mi + (ma - mi) / 2;
ret = cmp(key, ptr + cu * arr->unit);
if (ret == 0) {
while (cu < arr->size && ret == 0) {
cu += 1;
ret = cmp(key, ptr + cu * arr->unit); }
return cu; }
else if (ret < 0) ma = cu;
else /* if (ret > 0) */ mi = cu; }
return ma; }
/***************************
* POINTER ARRAY FUNCTIONS *
***************************/
/* parr_adjust • shrinks the allocated memory to fit exactly the needs */
int
parr_adjust(struct parray* arr) {
return parr_realloc (arr, arr->size); }
/* parr_free • frees the structure contents (buf NOT the struct itself) */
void
parr_free(struct parray *arr) {
if (!arr) return;
free (arr->item);
arr->item = 0;
arr->size = 0;
arr->asize = 0; }
/* parr_grow • increases the array size to fit the given number of elements */
int
parr_grow(struct parray *arr, int need) {
if (arr->asize >= need) return 1;
else return parr_realloc (arr, need); }
/* parr_init • initialization of the struct (which is equivalent to zero) */
void
parr_init(struct parray *arr) {
arr->item = 0;
arr->size = 0;
arr->asize = 0; }
/* parr_insert • inserting nb elements before the nth one */
int
parr_insert(struct parray *parr, int nb, int n) {
char *src, *dst;
size_t len, i;
if (!parr || nb <= 0 || n < 0
|| !parr_grow(parr, parr->size + nb))
return 0;
if (n < parr->size) {
src = (void *)parr->item;
src += n * sizeof (void *);
dst = src + nb * sizeof (void *);
len = (parr->size - n) * sizeof (void *);
memmove(dst, src, len);
for (i = 0; i < nb; ++i)
parr->item[n + i] = 0; }
parr->size += nb;
return 1; }
/* parr_pop • pops the last item of the array and returns it */
void *
parr_pop(struct parray *arr) {
if (arr->size <= 0) return 0;
arr->size -= 1;
return arr->item[arr->size]; }
/* parr_push • pushes a pointer at the end of the array (= append) */
int
parr_push(struct parray *arr, void *i) {
if (!parr_grow(arr, arr->size + 1)) return 0;
arr->item[arr->size] = i;
arr->size += 1;
return 1; }
/* parr_remove • removes the n-th element of the array and returns it */
void *
parr_remove(struct parray *arr, int idx) {
void* ret;
int i;
if (!arr || idx < 0 || idx >= arr->size) return 0;
ret = arr->item[idx];
for (i = idx+1; i < arr->size; ++i)
arr->item[i - 1] = arr->item[i];
arr->size -= 1;
return ret; }
/* parr_sorted_find • O(log n) search in a sorted array, returning entry */
void *
parr_sorted_find(struct parray *arr, void *key, array_cmp_fn cmp) {
int mi, ma, cu, ret;
mi = -1;
ma = arr->size;
while (mi < ma - 1) {
cu = mi + (ma - mi) / 2;
ret = cmp(key, arr->item[cu]);
if (ret == 0) return arr->item[cu];
else if (ret < 0) ma = cu;
else /* if (ret > 0) */ mi = cu; }
return 0; }
/* parr_sorted_find_i • O(log n) search in a sorted array,
* returning index of the smallest element larger than the key */
int
parr_sorted_find_i(struct parray *arr, void *key, array_cmp_fn cmp) {
int mi, ma, cu, ret;
mi = -1;
ma = arr->size;
while (mi < ma - 1) {
cu = mi + (ma - mi) / 2;
ret = cmp(key, arr->item[cu]);
if (ret == 0) {
while (cu < arr->size && ret == 0) {
cu += 1;
ret = cmp(key, arr->item[cu]); }
return cu; }
else if (ret < 0) ma = cu;
else /* if (ret > 0) */ mi = cu; }
return ma; }
/* parr_top • returns the top the stack (i.e. the last element of the array) */
void *
parr_top(struct parray *arr) {
if (arr == 0 || arr->size <= 0) return 0;
else return arr->item[arr->size - 1]; }
/* vim: set filetype=c: */
/* array.h - automatic dynamic array for pointers */
/*
* Copyright (c) 2008, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LITHIUM_ARRAY_H
#define LITHIUM_ARRAY_H
#include <stdlib.h>
/********************
* TYPE DEFINITIONS *
********************/
/* struct array • generic linear array */
struct array {
void* base;
int size;
int asize;
size_t unit; };
/* struct parray • array of pointers */
struct parray {
void ** item;
int size;
int asize; };
/* array_cmp_fn • comparison functions for sorted arrays */
typedef int (*array_cmp_fn)(void *key, void *array_entry);
/***************************
* GENERIC ARRAY FUNCTIONS *
***************************/
/* arr_adjust • shrink the allocated memory to fit exactly the needs */
int
arr_adjust(struct array *);
/* arr_free • frees the structure contents (buf NOT the struct itself) */
void
arr_free(struct array *);
/* arr_grow • increases the array size to fit the given number of elements */
int
arr_grow(struct array *, int);
/* arr_init • initialization of the contents of the struct */
void
arr_init(struct array *, size_t);
/* arr_insert • inserting elements nb before the nth one */
int
arr_insert(struct array *, int nb, int n);
/* arr_item • returns a pointer to the n-th element */
void *
arr_item(struct array *, int);
/* arr_newitem • returns the index of a new element appended to the array */
int
arr_newitem(struct array *);
/* arr_remove • removes the n-th elements of the array */
void
arr_remove(struct array *, int);
/* arr_sorted_find • O(log n) search in a sorted array, returning entry */
/* equivalent to bsearch(key, arr->base, arr->size, arr->unit, cmp) */
void *
arr_sorted_find(struct array *, void *key, array_cmp_fn cmp);
/* arr_sorted_find_i • O(log n) search in a sorted array,
* returning index of the smallest element larger than the key */
int
arr_sorted_find_i(struct array *, void *key, array_cmp_fn cmp);
/***************************
* POINTER ARRAY FUNCTIONS *
***************************/
/* parr_adjust • shrinks the allocated memory to fit exactly the needs */
int
parr_adjust(struct parray *);
/* parr_free • frees the structure contents (buf NOT the struct itself) */
void
parr_free(struct parray *);
/* parr_grow • increases the array size to fit the given number of elements */
int
parr_grow(struct parray *, int);
/* parr_init • initialization of the struct (which is equivalent to zero) */
void
parr_init(struct parray *);
/* parr_insert • inserting nb elements before the nth one */
int
parr_insert(struct parray *, int nb, int n);
/* parr_pop • pops the last item of the array and returns it */
void *
parr_pop(struct parray *);
/* parr_push • pushes a pointer at the end of the array (= append) */
int
parr_push(struct parray *, void *);
/* parr_remove • removes the n-th element of the array and returns it */
void *
parr_remove(struct parray *, int);
/* parr_sorted_find • O(log n) search in a sorted array, returning entry */
void *
parr_sorted_find(struct parray *, void *key, array_cmp_fn cmp);
/* parr_sorted_find_i • O(log n) search in a sorted array,
* returning index of the smallest element larger than the key */
int
parr_sorted_find_i(struct parray *, void *key, array_cmp_fn cmp);
/* parr_top • returns the top the stack (i.e. the last element of the array) */
void *
parr_top(struct parray *);
#endif /* ndef LITHIUM_ARRAY_H */
/* vim: set filetype=c: */
/* benchmark.c - main function for markdown module benchmarking */
/*
* Copyright (c) 2009, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "markdown.h"
#include "renderers.h"
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#define READ_UNIT 1024
#define OUTPUT_UNIT 64
/* buffer statistics, to track some memleaks */
extern long buffer_stat_nb;
extern size_t buffer_stat_alloc_bytes;
/* markdown_file • performs markdown transformation on FILE* */
static void
benchmark(FILE *in, int nb) {
struct buf *ib, *ob;
size_t ret, i, n;
if (!in) return;
n = (nb <= 1) ? 1 : nb;
/* reading everything */
ib = bufnew(READ_UNIT);
bufgrow(ib, READ_UNIT);
while ((ret = fread(ib->data + ib->size, 1,
ib->asize - ib->size, in)) > 0) {
ib->size += ret;
bufgrow(ib, ib->size + READ_UNIT); }
/* performing markdown parsing */
for (i = 0; i < n; i += 1) {
ob = bufnew(OUTPUT_UNIT);
ob->size = 0;
markdown(ob, ib, &mkd_xhtml);
bufrelease(ob); }
/* cleanup */
bufrelease(ib); }
/* main • main function, interfacing STDIO with the parser */
int
main(int argc, char **argv) {
int nb = 1, i, j, f, files = 0;
FILE *in = 0;
/* looking for a count number */
if (argc > 1) {
for (i = 1; i < argc; i += 1)
if (argv[i][0] == '-' && argv[i][1] == '-')
nb = atoi(argv[i] + 2);
else files += 1;
if (nb < 1) {
fprintf(stderr, "Usage: %s [--<number>] "
"[file] [file] ...\n", argv[0]);
return 2; } }
/* if no file is given, using stdin as the only file */
if (files <= 0) {
in = stdin;
files = 1; }
/* performing the markdown */
f = 0;
for (j = 0; j < files; j += 1) {
if (in != stdin) {
f += 1;
while (f < argc
&& argv[f][0] == '-' && argv[f][1] == '-')
f += 1;
if (f >= argc) break;
in = fopen(argv[f], "r");
if (!in) {
fprintf(stderr, "Unable to open \"%s\": %s\n",
argv[f], strerror(errno));
continue; } }
benchmark(in, nb);
if (in != stdin) fclose(in); }
/* memory checks */
if (buffer_stat_nb)
fprintf(stderr, "Warning: %ld buffers still active\n",
buffer_stat_nb);
if (buffer_stat_alloc_bytes)
fprintf(stderr, "Warning: %zu bytes still allocated\n",
buffer_stat_alloc_bytes);
return 0; }
/* vim: set filetype=c: */
/* buffer.c - automatic buffer structure */
/*
* Copyright (c) 2008, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* COMPILE TIME OPTIONS
*
* BUFFER_STATS • if defined, stats are kept about memory usage
*/
#define BUFFER_STATS
#define BUFFER_STDARG
#include "buffer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/********************
* GLOBAL VARIABLES *
********************/
#ifdef BUFFER_STATS
long buffer_stat_nb = 0;
size_t buffer_stat_alloc_bytes = 0;
#endif
/***************************
* STATIC HELPER FUNCTIONS *
***************************/
/* lower • retruns the lower-case variant of the input char */
static char
lower(char c) {
return (c >= 'A' && c <= 'Z') ? (c - 'A' + 'a') : c; }
/********************
* BUFFER FUNCTIONS *
********************/
/* bufcasecmp • case-insensitive buffer comparison */
int
bufcasecmp(const struct buf *a, const struct buf *b) {
size_t i = 0;
size_t cmplen;
if (a == b) return 0;
if (!a) return -1; else if (!b) return 1;
cmplen = (a->size < b->size) ? a->size : b->size;
while (i < cmplen && lower(a->data[i]) == lower(b->data[i])) ++i;
if (i < a->size) {
if (i < b->size) return lower(a->data[i]) - lower(b->data[i]);
else return 1; }
else { if (i < b->size) return -1;
else return 0; } }
/* bufcmp • case-sensitive buffer comparison */
int
bufcmp(const struct buf *a, const struct buf *b) {
size_t i = 0;
size_t cmplen;
if (a == b) return 0;
if (!a) return -1; else if (!b) return 1;
cmplen = (a->size < b->size) ? a->size : b->size;
while (i < cmplen && a->data[i] == b->data[i]) ++i;
if (i < a->size) {
if (i < b->size) return a->data[i] - b->data[i];
else return 1; }
else { if (i < b->size) return -1;
else return 0; } }
/* bufcmps • case-sensitive comparison of a string to a buffer */
int
bufcmps(const struct buf *a, const char *b) {
const size_t len = strlen(b);
size_t cmplen = len;
int r;
if (!a || !a->size) return b ? 0 : -1;
if (len < a->size) cmplen = a->size;
r = strncmp(a->data, b, cmplen);
if (r) return r;
else if (a->size == len) return 0;
else if (a->size < len) return -1;
else return 1; }
/* bufdup • buffer duplication */
struct buf *
bufdup(const struct buf *src, size_t dupunit) {
size_t blocks;
struct buf *ret;
if (src == 0) return 0;
ret = malloc(sizeof (struct buf));
if (ret == 0) return 0;
ret->unit = dupunit;
ret->size = src->size;
ret->ref = 1;
if (!src->size) {
ret->asize = 0;
ret->data = 0;
return ret; }
blocks = (src->size + dupunit - 1) / dupunit;
ret->asize = blocks * dupunit;
ret->data = malloc(ret->asize);
if (ret->data == 0) {
free(ret);
return 0; }
memcpy(ret->data, src->data, src->size);
#ifdef BUFFER_STATS
buffer_stat_nb += 1;
buffer_stat_alloc_bytes += ret->asize;
#endif
return ret; }
/* bufgrow • increasing the allocated size to the given value */
int
bufgrow(struct buf *buf, size_t neosz) {
size_t neoasz;
void *neodata;
if (!buf || !buf->unit) return 0;
if (buf->asize >= neosz) return 1;
neoasz = buf->asize + buf->unit;
while (neoasz < neosz) neoasz += buf->unit;
neodata = realloc(buf->data, neoasz);
if (!neodata) return 0;
#ifdef BUFFER_STATS
buffer_stat_alloc_bytes += (neoasz - buf->asize);
#endif
buf->data = neodata;
buf->asize = neoasz;
return 1; }
/* bufnew • allocation of a new buffer */
struct buf *
bufnew(size_t unit) {
struct buf *ret;
ret = malloc(sizeof (struct buf));
if (ret) {
#ifdef BUFFER_STATS
buffer_stat_nb += 1;
#endif
ret->data = 0;
ret->size = ret->asize = 0;
ret->ref = 1;
ret->unit = unit; }
return ret; }
/* bufnullterm • NUL-termination of the string array (making a C-string) */
void
bufnullterm(struct buf *buf) {
if (!buf || !buf->unit) return;
if (buf->size < buf->asize && buf->data[buf->size] == 0) return;
if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1))
buf->data[buf->size] = 0; }
/* bufprintf • formatted printing to a buffer */
void
bufprintf(struct buf *buf, const char *fmt, ...) {
va_list ap;
if (!buf || !buf->unit) return;
va_start(ap, fmt);
vbufprintf(buf, fmt, ap);
va_end(ap); }
/* bufput • appends raw data to a buffer */
void
bufput(struct buf *buf, const void *data, size_t len) {
if (!buf) return;
if (buf->size + len > buf->asize && !bufgrow(buf, buf->size + len))
return;
memcpy(buf->data + buf->size, data, len);
buf->size += len; }
/* bufputs • appends a NUL-terminated string to a buffer */
void
bufputs(struct buf *buf, const char *str) {
bufput(buf, str, strlen (str)); }
/* bufputc • appends a single char to a buffer */
void
bufputc(struct buf *buf, char c) {
if (!buf) return;
if (buf->size + 1 > buf->asize && !bufgrow(buf, buf->size + 1))
return;
buf->data[buf->size] = c;
buf->size += 1; }
/* bufrelease • decrease the reference count and free the buffer if needed */
void
bufrelease(struct buf *buf) {
if (!buf || !buf->unit) return;
buf->ref -= 1;
if (buf->ref == 0) {
#ifdef BUFFER_STATS
buffer_stat_nb -= 1;
buffer_stat_alloc_bytes -= buf->asize;
#endif
free(buf->data);
free(buf); } }
/* bufreset • frees internal data of the buffer */
void
bufreset(struct buf *buf) {
if (!buf || !buf->unit || !buf->asize) return;
#ifdef BUFFER_STATS
buffer_stat_alloc_bytes -= buf->asize;
#endif
free(buf->data);
buf->data = 0;
buf->size = buf->asize = 0; }
/* bufset • safely assigns a buffer to another */
void
bufset(struct buf **dest, struct buf *src) {
if (src) {
if (!src->asize) src = bufdup(src, 1);
else src->ref += 1; }
bufrelease(*dest);
*dest = src; }
/* bufslurp • removes a given number of bytes from the head of the array */
void
bufslurp(struct buf *buf, size_t len) {
if (!buf || !buf->unit || len <= 0) return;
if (len >= buf->size) {
buf->size = 0;
return; }
buf->size -= len;
memmove(buf->data, buf->data + len, buf->size); }
/* buftoi • converts the numbers at the beginning of the buf into an int */
int
buftoi(struct buf *buf, size_t offset_i, size_t *offset_o) {
int r = 0, neg = 0;
size_t i = offset_i;
if (!buf || !buf->size) return 0;
if (buf->data[i] == '+') i += 1;
else if (buf->data[i] == '-') {
neg = 1;
i += 1; }
while (i < buf->size && buf->data[i] >= '0' && buf->data[i] <= '9') {
r = (r * 10) + buf->data[i] - '0';
i += 1; }
if (offset_o) *offset_o = i;
return neg ? -r : r; }
/* vbufprintf • stdarg variant of formatted printing into a buffer */
void
vbufprintf(struct buf *buf, const char *fmt, va_list ap) {
int n;
va_list ap_save;
if (buf == 0
|| (buf->size >= buf->asize && !bufgrow (buf, buf->size + 1)))
return;
va_copy(ap_save, ap);
n = vsnprintf(buf->data + buf->size, buf->asize - buf->size, fmt, ap);
if (n >= buf->asize - buf->size) {
if (buf->size + n + 1 > buf->asize
&& !bufgrow (buf, buf->size + n + 1))
return;
n = vsnprintf (buf->data + buf->size,
buf->asize - buf->size, fmt, ap_save); }
va_end(ap_save);
if (n < 0) return;
buf->size += n; }
/* vim: set filetype=c: */
/* buffer.h - automatic buffer structure */
/*
* Copyright (c) 2008, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LITHIUM_BUFFER_H
#define LITHIUM_BUFFER_H
#include <stddef.h>
/********************
* TYPE DEFINITIONS *
********************/
/* struct buf • character array buffer */
struct buf {
char * data; /* actual character data */
size_t size; /* size of the string */
size_t asize; /* allocated size (0 = volatile buffer) */
size_t unit; /* reallocation unit size (0 = read-only buffer) */
int ref; }; /* reference count */
/**********
* MACROS *
**********/
/* CONST_BUF • global buffer from a string litteral */
#define CONST_BUF(name, string) \
static struct buf name = { string, sizeof string -1, sizeof string }
/* VOLATILE_BUF • macro for creating a volatile buffer on the stack */
#define VOLATILE_BUF(name, strname) \
struct buf name = { strname, strlen(strname) }
/* BUFPUTSL • optimized bufputs of a string litteral */
#define BUFPUTSL(output, litteral) \
bufput(output, litteral, sizeof litteral - 1)
/***********************
* FUNCTION ATTRIBUTES *
***********************/
/* BUF_ALLOCATOR • the function returns a completely new ponter */
#ifdef __GNUC__
#define BUF_ALLOCATOR \
__attribute__ ((malloc))
#else
#define BUF_ALLOCATOR
#endif
/* BUF_PRINTF_LIKE • marks the function as behaving like printf */
#ifdef __GNUC__
#define BUF_PRINTF_LIKE(format_index, first_variadic_index) \
__attribute__ ((format (printf, format_index, first_variadic_index)));
#else
#define BUF_PRINTF_LIKE(format_index, first_variadic_index)
#endif
/********************
* BUFFER FUNCTIONS *
********************/
/* bufcasecmp • case-insensitive buffer comparison */
int
bufcasecmp(const struct buf *, const struct buf *);
/* bufcmp • case-sensitive buffer comparison */
int
bufcmp(const struct buf *, const struct buf *);
/* bufcmps • case-sensitive comparison of a string to a buffer */
int
bufcmps(const struct buf *, const char *);
/* bufdup • buffer duplication */
struct buf *
bufdup(const struct buf *, size_t)
BUF_ALLOCATOR;
/* bufgrow • increasing the allocated size to the given value */
int
bufgrow(struct buf *, size_t);
/* bufnew • allocation of a new buffer */
struct buf *
bufnew(size_t)
BUF_ALLOCATOR;
/* bufnullterm • NUL-termination of the string array (making a C-string) */
void
bufnullterm(struct buf *);
/* bufprintf • formatted printing to a buffer */
void
bufprintf(struct buf *, const char *, ...)
BUF_PRINTF_LIKE(2, 3);
/* bufput • appends raw data to a buffer */
void
bufput(struct buf *, const void*, size_t);
/* bufputs • appends a NUL-terminated string to a buffer */
void
bufputs(struct buf *, const char*);
/* bufputc • appends a single char to a buffer */
void
bufputc(struct buf *, char);
/* bufrelease • decrease the reference count and free the buffer if needed */
void
bufrelease(struct buf *);
/* bufreset • frees internal data of the buffer */
void
bufreset(struct buf *);
/* bufset • safely assigns a buffer to another */
void
bufset(struct buf **, struct buf *);
/* bufslurp • removes a given number of bytes from the head of the array */
void
bufslurp(struct buf *, size_t);
/* buftoi • converts the numbers at the beginning of the buf into an int */
int
buftoi(struct buf *, size_t, size_t *);
#ifdef BUFFER_STDARG
#include <stdarg.h>
/* vbufprintf • stdarg variant of formatted printing into a buffer */
void
vbufprintf(struct buf *, const char*, va_list);
#endif /* def BUFFER_STDARG */
#endif /* ndef LITHIUM_BUFFER_H */
/* vim: set filetype=c: */
Copyright (c) 2009, Natacha Porté
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# Makefile
# Copyright (c) 2009, Natacha Porté
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
DEPDIR=depends
CFLAGS=-c -g -O3 -Wall -Werror -fPIC
LDFLAGS=-g -O3 -Wall -Werror
CC=gcc
all: libsoldout.a mkd2html mkd2latex mkd2man
.PHONY: all clean
# libraries
libsoldout.a: markdown.o array.o buffer.o renderers.o
ar rcs $@ $^
# executables
mkd2html: mkd2html.o libsoldout.a
$(CC) $(LDFLAGS) $^ -o $@
mkd2latex: mkd2latex.o libsoldout.a
$(CC) $(LDFLAGS) $^ -o $@
mkd2man: mkd2man.o libsoldout.a
$(CC) $(LDFLAGS) $^ -o $@
# housekeeping
benchmark: benchmark.o libsoldout.a
$(CC) $(LDFLAGS) $^ -o $@
clean:
rm -f *.o
rm -f libsoldout.so libsoldout.so.*
rm -f mkd2html mkd2latex mkd2man benchmark
rm -rf $(DEPDIR)
# dependencies
include $(wildcard $(DEPDIR)/*.d)
# generic object compilations
%.o: %.c
@mkdir -p $(DEPDIR)
@$(CC) -MM $< > $(DEPDIR)/$*.d
$(CC) $(CFLAGS) -o $@ $<
%.o: %.m
@mkdir -p $(DEPDIR)
@$(CC) -MM $< > depends/$*.d
$(CC) $(CFLAGS) -o $@ $<
/* markdown.c - generic markdown parser */
/*
* Copyright (c) 2009, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "markdown.h"
#include "array.h"
#include <assert.h>
#include <string.h>
#include <strings.h> /* for strncasecmp */
#define TEXT_UNIT 64 /* unit for the copy of the input buffer */
#define WORK_UNIT 64 /* block-level working buffer */
#define MKD_LI_END 8 /* internal list flag */
/***************
* LOCAL TYPES *
***************/
/* link_ref • reference to a link */
struct link_ref {
struct buf * id;
struct buf * link;
struct buf * title; };
/* char_trigger • function pointer to render active chars */
/* returns the number of chars taken care of */
/* data is the pointer of the beginning of the span */
/* offset is the number of valid chars before data */
struct render;
typedef size_t
(*char_trigger)(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size);
/* render • structure containing one particular render */
struct render {
struct mkd_renderer make;
struct array refs;
char_trigger active_char[256];
struct parray work; };
/* html_tag • structure for quick HTML tag search (inspired from discount) */
struct html_tag {
char * text;
int size; };
/********************
* GLOBAL VARIABLES *
********************/
/* block_tags • recognised block tags, sorted by cmp_html_tag */
static struct html_tag block_tags[] = {
/*0*/ { "p", 1 },
{ "dl", 2 },
{ "h1", 2 },
{ "h2", 2 },
{ "h3", 2 },
{ "h4", 2 },
{ "h5", 2 },
{ "h6", 2 },
{ "ol", 2 },
{ "ul", 2 },
/*10*/ { "del", 3 },
{ "div", 3 },
/*12*/ { "ins", 3 },
{ "pre", 3 },
{ "form", 4 },
{ "math", 4 },
{ "table", 5 },
{ "iframe", 6 },
{ "script", 6 },
{ "fieldset", 8 },
{ "noscript", 8 },
{ "blockquote", 10 } };
#define INS_TAG (block_tags + 12)
#define DEL_TAG (block_tags + 10)
/***************************
* STATIC HELPER FUNCTIONS *
***************************/
/* build_ref_id • collapse whitespace from input text to make it a ref id */
static int
build_ref_id(struct buf *id, const char *data, size_t size) {
size_t beg, i;
/* skip leading whitespace */
while (size > 0
&& (data[0] == ' ' || data[0] == '\t' || data[0] == '\n')) {
data += 1;
size -= 1; }
/* skip trailing whitespace */
while (size > 0
&& (data[size - 1] == ' ' || data[size - 1] == '\t'
|| data[size - 1] == '\n'))
size -= 1;
if (size == 0) return -1;
/* making the ref id */
i = 0;
id->size = 0;
while (i < size) {
/* copy non-whitespace into the output buffer */
beg = i;
while (i < size
&& !(data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
i += 1;
bufput(id, data + beg, i - beg);
/* add a single space and skip all consecutive whitespace */
if (i < size) bufputc(id, ' ');
while (i < size
&& (data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
i += 1; }
return 0; }
/* cmp_link_ref • comparison function for link_ref sorted arrays */
static int
cmp_link_ref(void *key, void *array_entry) {
struct link_ref *lr = array_entry;
return bufcasecmp(key, lr->id); }
/* cmp_link_ref_sort • comparison function for link_ref qsort */
static int
cmp_link_ref_sort(const void *a, const void *b) {
const struct link_ref *lra = a;
const struct link_ref *lrb = b;
return bufcasecmp(lra->id, lrb->id); }
/* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
static int
cmp_html_tag(const void *a, const void *b) {
const struct html_tag *hta = a;
const struct html_tag *htb = b;
if (hta->size != htb->size) return hta->size - htb->size;
return strncasecmp(hta->text, htb->text, hta->size); }
/* find_block_tag • returns the current block tag */
static struct html_tag *
find_block_tag(char *data, size_t size) {
size_t i = 0;
struct html_tag key;
/* looking for the word end */
while (i < size && ((data[i] >= '0' && data[i] <= '9')
|| (data[i] >= 'A' && data[i] <= 'Z')
|| (data[i] >= 'a' && data[i] <= 'z')))
i += 1;
if (i >= size) return 0;
/* binary search of the tag */
key.text = data;
key.size = i;
return bsearch(&key, block_tags,
sizeof block_tags / sizeof block_tags[0],
sizeof block_tags[0], cmp_html_tag); }
/* new_work_buffer • get a new working buffer from the stack or create one */
static struct buf *
new_work_buffer(struct render *rndr) {
struct buf *ret = 0;
if (rndr->work.size < rndr->work.asize) {
ret = rndr->work.item[rndr->work.size ++];
ret->size = 0; }
else {
ret = bufnew(WORK_UNIT);
parr_push(&rndr->work, ret); }
return ret; }
/* release_work_buffer • release the given working buffer */
static void
release_work_buffer(struct render *rndr, struct buf *buf) {
assert(rndr->work.size > 0
&& rndr->work.item[rndr->work.size - 1] == buf);
rndr->work.size -= 1; }
/****************************
* INLINE PARSING FUNCTIONS *
****************************/
/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
/* this is less strict than the original markdown e-mail address matching */
static size_t
is_mail_autolink(char *data, size_t size) {
size_t i = 0, nb = 0;
/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
while (i < size && (data[i] == '-' || data[i] == '.'
|| data[i] == '_' || data[i] == '@'
|| (data[i] >= 'a' && data[i] <= 'z')
|| (data[i] >= 'A' && data[i] <= 'Z')
|| (data[i] >= '0' && data[i] <= '9'))) {
if (data[i] == '@') nb += 1;
i += 1; }
if (i >= size || data[i] != '>' || nb != 1) return 0;
return i + 1; }
/* tag_length • returns the length of the given tag, or 0 is it's not valid */
static size_t
tag_length(char *data, size_t size, enum mkd_autolink *autolink) {
size_t i, j;
/* a valid tag can't be shorter than 3 chars */
if (size < 3) return 0;
/* begins with a '<' optionally followed by '/', followed by letter */
if (data[0] != '<') return 0;
i = (data[1] == '/') ? 2 : 1;
if ((data[i] < 'a' || data[i] > 'z')
&& (data[i] < 'A' || data[i] > 'Z')) return 0;
/* scheme test */
*autolink = MKDA_NOT_AUTOLINK;
if (size > 6 && strncasecmp(data + 1, "http", 4) == 0 && (data[5] == ':'
|| ((data[5] == 's' || data[5] == 'S') && data[6] == ':'))) {
i = data[5] == ':' ? 6 : 7;
*autolink = MKDA_NORMAL; }
else if (size > 5 && strncasecmp(data + 1, "ftp:", 4) == 0) {
i = 5;
*autolink = MKDA_NORMAL; }
else if (size > 7 && strncasecmp(data + 1, "mailto:", 7) == 0) {
i = 8;
/* not changing *autolink to go to the address test */ }
/* completing autolink test: no whitespace or ' or " */
if (i >= size || i == '>')
*autolink = MKDA_NOT_AUTOLINK;
else if (*autolink) {
j = i;
while (i < size && data[i] != '>' && data[i] != '\''
&& data[i] != '"' && data[i] != ' ' && data[i] != '\t'
&& data[i] != '\t')
i += 1;
if (i >= size) return 0;
if (i > j && data[i] == '>') return i + 1;
/* one of the forbidden chars has been found */
*autolink = MKDA_NOT_AUTOLINK; }
else if ((j = is_mail_autolink(data + i, size - i)) != 0) {
*autolink = (i == 8)
? MKDA_EXPLICIT_EMAIL : MKDA_IMPLICIT_EMAIL;
return i + j; }
/* looking for sometinhg looking like a tag end */
while (i < size && data[i] != '>') i += 1;
if (i >= size) return 0;
return i + 1; }
/* parse_inline • parses inline markdown elements */
static void
parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size) {
size_t i = 0, end = 0;
char_trigger action = 0;
struct buf work = { 0, 0, 0, 0, 0 };
if (rndr->work.size > rndr->make.max_work_stack) {
if (size) bufput(ob, data, size);
return; }
while (i < size) {
/* copying inactive chars into the output */
while (end < size
&& (action = rndr->active_char[(unsigned char)data[end]]) == 0)
end += 1;
if (rndr->make.normal_text) {
work.data = data + i;
work.size = end - i;
rndr->make.normal_text(ob, &work, rndr->make.opaque); }
else
bufput(ob, data + i, end - i);
if (end >= size) break;
i = end;
/* calling the trigger */
end = action(ob, rndr, data + i, i, size - i);
if (!end) /* no action from the callback */
end = i + 1;
else {
i += end;
end = i; } } }
/* find_emph_char • looks for the next emph char, skipping other constructs */
static size_t
find_emph_char(char *data, size_t size, char c) {
size_t i = 1;
while (i < size) {
while (i < size && data[i] != c
&& data[i] != '`' && data[i] != '[')
i += 1;
if (i >= size) return 0;
if (data[i] == c) return i;
/* not counting escaped chars */
if (i && data[i - 1] == '\\') { i += 1; continue; }
/* skipping a code span */
if (data[i] == '`') {
size_t span_nb = 0, bt;
size_t tmp_i = 0;
/* counting the number of opening backticks */
while (i < size && data[i] == '`') {
i += 1;
span_nb += 1; }
if (i >= size) return 0;
/* finding the matching closing sequence */
bt = 0;
while (i < size && bt < span_nb) {
if (!tmp_i && data[i] == c) tmp_i = i;
if (data[i] == '`') bt += 1;
else bt = 0;
i += 1; }
if (i >= size) return tmp_i;
i += 1; }
/* skipping a link */
else if (data[i] == '[') {
size_t tmp_i = 0;
char cc;
i += 1;
while (i < size && data[i] != ']') {
if (!tmp_i && data[i] == c) tmp_i = i;
i += 1; }
i += 1;
while (i < size && (data[i] == ' '
|| data[i] == '\t' || data[i] == '\n'))
i += 1;
if (i >= size) return tmp_i;
if (data[i] != '[' && data[i] != '(') { /* not a link*/
if (tmp_i) return tmp_i;
else continue; }
cc = data[i];
i += 1;
while (i < size && data[i] != cc) {
if (!tmp_i && data[i] == c) tmp_i = i;
i += 1; }
if (i >= size) return tmp_i;
i += 1; } }
return 0; }
/* parse_emph1 • parsing single emphase */
/* closed by a symbol not preceded by whitespace and not followed by symbol */
static size_t
parse_emph1(struct buf *ob, struct render *rndr,
char *data, size_t size, char c) {
size_t i = 0, len;
struct buf *work = 0;
int r;
if (!rndr->make.emphasis) return 0;
/* skipping one symbol if coming from emph3 */
if (size > 1 && data[0] == c && data[1] == c) i = 1;
while (i < size) {
len = find_emph_char(data + i, size - i, c);
if (!len) return 0;
i += len;
if (i >= size) return 0;
if (i + 1 < size && data[i + 1] == c) {
i += 1;
continue; }
if (data[i] == c && data[i - 1] != ' '
&& data[i - 1] != '\t' && data[i - 1] != '\n') {
work = new_work_buffer(rndr);
parse_inline(work, rndr, data, i);
r = rndr->make.emphasis(ob, work, c, rndr->make.opaque);
release_work_buffer(rndr, work);
return r ? i + 1 : 0; } }
return 0; }
/* parse_emph2 • parsing single emphase */
static size_t
parse_emph2(struct buf *ob, struct render *rndr,
char *data, size_t size, char c) {
size_t i = 0, len;
struct buf *work = 0;
int r;
if (!rndr->make.double_emphasis) return 0;
while (i < size) {
len = find_emph_char(data + i, size - i, c);
if (!len) return 0;
i += len;
if (i + 1 < size && data[i] == c && data[i + 1] == c
&& i && data[i - 1] != ' '
&& data[i - 1] != '\t' && data[i - 1] != '\n') {
work = new_work_buffer(rndr);
parse_inline(work, rndr, data, i);
r = rndr->make.double_emphasis(ob, work, c,
rndr->make.opaque);
release_work_buffer(rndr, work);
return r ? i + 2 : 0; }
i += 1; }
return 0; }
/* parse_emph3 • parsing single emphase */
/* finds the first closing tag, and delegates to the other emph */
static size_t
parse_emph3(struct buf *ob, struct render *rndr,
char *data, size_t size, char c) {
size_t i = 0, len;
int r;
while (i < size) {
len = find_emph_char(data + i, size - i, c);
if (!len) return 0;
i += len;
/* skip whitespace preceded symbols */
if (data[i] != c || data[i - 1] == ' '
|| data[i - 1] == '\t' || data[i - 1] == '\n')
continue;
if (i + 2 < size && data[i + 1] == c && data[i + 2] == c
&& rndr->make.triple_emphasis) {
/* triple symbol found */
struct buf *work = new_work_buffer(rndr);
parse_inline(work, rndr, data, i);
r = rndr->make.triple_emphasis(ob, work, c,
rndr->make.opaque);
release_work_buffer(rndr, work);
return r ? i + 3 : 0; }
else if (i + 1 < size && data[i + 1] == c) {
/* double symbol found, handing over to emph1 */
len = parse_emph1(ob, rndr, data - 2, size + 2, c);
if (!len) return 0;
else return len - 2; }
else {
/* single symbol found, handing over to emph2 */
len = parse_emph2(ob, rndr, data - 1, size + 1, c);
if (!len) return 0;
else return len - 1; } }
return 0; }
/* char_emphasis • single and double emphasis parsing */
static size_t
char_emphasis(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size) {
char c = data[0];
size_t ret;
if (size > 2 && data[1] != c) {
/* whitespace cannot follow an opening emphasis */
if (data[1] == ' ' || data[1] == '\t' || data[1] == '\n'
|| (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
return 0;
return ret + 1; }
if (size > 3 && data[1] == c && data[2] != c) {
if (data[2] == ' ' || data[2] == '\t' || data[2] == '\n'
|| (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
return 0;
return ret + 2; }
if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
if (data[3] == ' ' || data[3] == '\t' || data[3] == '\n'
|| (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
return 0;
return ret + 3; }
return 0; }
/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
static size_t
char_linebreak(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size) {
if (offset < 2 || data[-1] != ' ' || data[-2] != ' ') return 0;
/* removing the last space from ob and rendering */
if (ob->size && ob->data[ob->size - 1] == ' ') ob->size -= 1;
return rndr->make.linebreak(ob, rndr->make.opaque) ? 1 : 0; }
/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
static size_t
char_codespan(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size) {
size_t end, nb = 0, i, f_begin, f_end;
/* counting the number of backticks in the delimiter */
while (nb < size && data[nb] == '`') nb += 1;
/* finding the next delimiter */
i = 0;
for (end = nb; end < size && i < nb; end += 1)
if (data[end] == '`') i += 1;
else i = 0;
if (i < nb && end >= size) return 0; /* no matching delimiter */
/* trimming outside whitespaces */
f_begin = nb;
while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
f_begin += 1;
f_end = end - nb;
while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
f_end -= 1;
/* real code span */
if (f_begin < f_end) {
struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
if (!rndr->make.codespan(ob, &work, rndr->make.opaque))
end = 0; }
else {
if (!rndr->make.codespan(ob, 0, rndr->make.opaque))
end = 0; }
return end; }
/* char_escape • '\\' backslash escape */
static size_t
char_escape(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size) {
struct buf work = { 0, 0, 0, 0, 0 };
if (size > 1) {
if (rndr->make.normal_text) {
work.data = data + 1;
work.size = 1;
rndr->make.normal_text(ob, &work, rndr->make.opaque); }
else bufputc(ob, data[1]); }
return 2; }
/* char_entity • '&' escaped when it doesn't belong to an entity */
/* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
static size_t
char_entity(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size) {
size_t end = 1;
struct buf work;
if (end < size && data[end] == '#') end += 1;
while (end < size
&& ((data[end] >= '0' && data[end] <= '9')
|| (data[end] >= 'a' && data[end] <= 'z')
|| (data[end] >= 'A' && data[end] <= 'Z')))
end += 1;
if (end < size && data[end] == ';') {
/* real entity */
end += 1; }
else {
/* lone '&' */
return 0; }
if (rndr->make.entity) {
work.data = data;
work.size = end;
rndr->make.entity(ob, &work, rndr->make.opaque); }
else bufput(ob, data, end);
return end; }
/* char_langle_tag • '<' when tags or autolinks are allowed */
static size_t
char_langle_tag(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size) {
enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
size_t end = tag_length(data, size, &altype);
struct buf work = { data, end, 0, 0, 0 };
int ret = 0;
if (end) {
if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) {
work.data = data + 1;
work.size = end - 2;
ret = rndr->make.autolink(ob, &work, altype,
rndr->make.opaque); }
else if (rndr->make.raw_html_tag)
ret = rndr->make.raw_html_tag(ob, &work,
rndr->make.opaque); }
if (!ret) return 0;
else return end; }
/* get_link_inline • extract inline-style link and title from parenthesed data*/
static int
get_link_inline(struct buf *link, struct buf *title, char *data, size_t size) {
size_t i = 0, mark;
size_t link_b, link_e;
size_t title_b = 0, title_e = 0;
link->size = title->size = 0;
/* skipping initial whitespace */
while (i < size
&& (data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
i += 1;
link_b = i;
/* looking for link end: ' " */
while (i < size && data[i] != '\'' && data[i] != '"')
i += 1;
link_e = i;
/* looking for title end if present */
if (data[i] == '\'' || data[i] == '"') {
i += 1;
title_b = i;
/* skipping whitespaces after title */
title_e = size - 1;
while (title_e > title_b && (data[title_e] == ' '
|| data[title_e] == '\t' || data[title_e] == '\n'))
title_e -= 1;
/* checking for closing quote presence */
if (data[title_e] != '\'' && data[title_e] != '"') {
title_b = title_e = 0;
link_e = i; } }
/* remove whitespace at the end of the link */
while (link_e > link_b
&& (data[link_e - 1] == ' ' || data[link_e - 1] == '\t'
|| data[link_e - 1] == '\n'))
link_e -= 1;
/* remove optional angle brackets around the link */
if (data[link_b] == '<') link_b += 1;
if (data[link_e - 1] == '>') link_e -= 1;
/* escape backslashed character from link */
link->size = 0;
i = link_b;
while (i < link_e) {
mark = i;
while (i < link_e && data[i] != '\\') i += 1;
bufput(link, data + mark, i - mark);
while (i < link_e && data[i] == '\\') i += 1; }
/* handing back title */
title->size = 0;
if (title_e > title_b)
bufput(title, data + title_b, title_e - title_b);
/* this function always succeed */
return 0; }
/* get_link_ref • extract referenced link and title from id */
static int
get_link_ref(struct render *rndr, struct buf *link, struct buf *title,
char * data, size_t size) {
struct link_ref *lr;
/* find the link from its id (stored temporarily in link) */
link->size = 0;
if (build_ref_id(link, data, size) < 0)
return -1;
lr = arr_sorted_find(&rndr->refs, link, cmp_link_ref);
if (!lr) return -1;
/* fill the output buffers */
link->size = 0;
if (lr->link)
bufput(link, lr->link->data, lr->link->size);
title->size = 0;
if (lr->title)
bufput(title, lr->title->data, lr->title->size);
return 0; }
/* char_link • '[': parsing a link or an image */
static size_t
char_link(struct buf *ob, struct render *rndr,
char *data, size_t offset, size_t size) {
int is_img = (offset && data[-1] == '!'), level;
size_t i = 1, txt_e;
struct buf *content = 0;
struct buf *link = 0;
struct buf *title = 0;
int ret;
/* checking whether the correct renderer exists */
if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
return 0;
/* looking for the matching closing bracket */
for (level = 1; i < size; i += 1)
if (data[i - 1] == '\\') continue;
else if (data[i] == '[') level += 1;
else if (data[i] == ']') {
level -= 1;
if (level <= 0) break; }
if (i >= size) return 0;
txt_e = i;
i += 1;
/* skip any amount of whitespace or newline */
/* (this is much more laxist than original markdown syntax) */
while (i < size
&& (data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
i += 1;
/* allocate temporary buffers to store content, link and title */
content = new_work_buffer(rndr);
link = new_work_buffer(rndr);
title = new_work_buffer(rndr);
ret = 0; /* error if we don't get to the callback */
/* inline style link */
if (i < size && data[i] == '(') {
size_t span_end = i;
while (span_end < size
&& !(data[span_end] == ')'
&& (span_end == i || data[span_end - 1] != '\\')))
span_end += 1;
if (span_end >= size
|| get_link_inline(link, title,
data + i+1, span_end - (i+1)) < 0)
goto char_link_cleanup;
i = span_end + 1; }
/* reference style link */
else if (i < size && data[i] == '[') {
char *id_data;
size_t id_size, id_end = i;
while (id_end < size && data[id_end] != ']')
id_end += 1;
if (id_end >= size)
goto char_link_cleanup;
if (i + 1 == id_end) {
/* implicit id - use the contents */
id_data = data + 1;
id_size = txt_e - 1; }
else {
/* explici id - between brackets */
id_data = data + i + 1;
id_size = id_end - (i + 1); }
if (get_link_ref(rndr, link, title, id_data, id_size) < 0)
goto char_link_cleanup;
i = id_end + 1; }
/* shortcut reference style link */
else {
if (get_link_ref(rndr, link, title, data + 1, txt_e - 1) < 0)
goto char_link_cleanup;
/* rewinding the whitespace */
i = txt_e + 1; }
/* building content: img alt is escaped, link content is parsed */
if (txt_e > 1) {
if (is_img) bufput(content, data + 1, txt_e - 1);
else parse_inline(content, rndr, data + 1, txt_e - 1); }
/* calling the relevant rendering function */
if (is_img) {
if (ob->size && ob->data[ob->size - 1] == '!') ob->size -= 1;
ret = rndr->make.image(ob, link, title, content,
rndr->make.opaque); }
else ret = rndr->make.link(ob, link, title, content, rndr->make.opaque);
/* cleanup */
char_link_cleanup:
release_work_buffer(rndr, title);
release_work_buffer(rndr, link);
release_work_buffer(rndr, content);
return ret ? i : 0; }
/*********************************
* BLOCK-LEVEL PARSING FUNCTIONS *
*********************************/
/* is_empty • returns the line length when it is empty, 0 otherwise */
static size_t
is_empty(char *data, size_t size) {
size_t i;
for (i = 0; i < size && data[i] != '\n'; i += 1)
if (data[i] != ' ' && data[i] != '\t') return 0;
return i + 1; }
/* is_hrule • returns whether a line is a horizontal rule */
static int
is_hrule(char *data, size_t size) {
size_t i = 0, n = 0;
char c;
/* skipping initial spaces */
if (size < 3) return 0;
if (data[0] == ' ') { i += 1;
if (data[1] == ' ') { i += 1;
if (data[2] == ' ') { i += 1; } } }
/* looking at the hrule char */
if (i + 2 >= size
|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
return 0;
c = data[i];
/* the whole line must be the char or whitespace */
while (i < size && data[i] != '\n') {
if (data[i] == c) n += 1;
else if (data[i] != ' ' && data[i] != '\t')
return 0;
i += 1; }
return n >= 3; }
/* is_headerline • returns whether the line is a setext-style hdr underline */
static int
is_headerline(char *data, size_t size) {
size_t i = 0;
/* test of level 1 header */
if (data[i] == '=') {
for (i = 1; i < size && data[i] == '='; i += 1);
while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
return (i >= size || data[i] == '\n') ? 1 : 0; }
/* test of level 2 header */
if (data[i] == '-') {
for (i = 1; i < size && data[i] == '-'; i += 1);
while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
return (i >= size || data[i] == '\n') ? 2 : 0; }
return 0; }
/* is_table_sep • returns wether there is a table separator at the given pos */
static int
is_table_sep(char *data, size_t pos) {
return data[pos] == '|' && (pos == 0 || data[pos - 1] != '\\'); }
/* is_tableline • returns the number of column tables in the given line */
static int
is_tableline(char *data, size_t size) {
size_t i = 0;
int n_sep = 0, outer_sep = 0;
/* skip initial blanks */
while (i < size && (data[i] == ' ' || data[i] == '\t'))
i += 1;
/* check for initial '|' */
if (i < size && data[i] == '|')
outer_sep += 1;
/* count the number of pipes in the line */
for (n_sep = 0; i < size && data[i] != '\n'; i += 1)
if (is_table_sep(data, i))
n_sep += 1;
/* march back to check for optional last '|' before blanks and EOL */
while (i
&& (data[i - 1] == ' ' || data[i - 1] == '\t' || data[i - 1] == '\n'))
i -= 1;
if (i && is_table_sep(data, i - 1))
outer_sep += 1;
/* return the number of column or 0 if it's not a table line */
return (n_sep > 0) ? (n_sep - outer_sep + 1) : 0; }
/* prefix_quote • returns blockquote prefix length */
static size_t
prefix_quote(char *data, size_t size) {
size_t i = 0;
if (i < size && data[i] == ' ') i += 1;
if (i < size && data[i] == ' ') i += 1;
if (i < size && data[i] == ' ') i += 1;
if (i < size && data[i] == '>') {
if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
return i + 2;
else return i + 1; }
else return 0; }
/* prefix_code • returns prefix length for block code*/
static size_t
prefix_code(char *data, size_t size) {
if (size > 0 && data[0] == '\t') return 1;
if (size > 3 && data[0] == ' ' && data[1] == ' '
&& data[2] == ' ' && data[3] == ' ') return 4;
return 0; }
/* prefix_oli • returns ordered list item prefix */
static size_t
prefix_oli(char *data, size_t size) {
size_t i = 0;
if (i < size && data[i] == ' ') i += 1;
if (i < size && data[i] == ' ') i += 1;
if (i < size && data[i] == ' ') i += 1;
if (i >= size || data[i] < '0' || data[i] > '9') return 0;
while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
if (i + 1 >= size || data[i] != '.'
|| (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
i = i + 2;
while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
return i; }
/* prefix_uli • returns ordered list item prefix */
static size_t
prefix_uli(char *data, size_t size) {
size_t i = 0;
if (i < size && data[i] == ' ') i += 1;
if (i < size && data[i] == ' ') i += 1;
if (i < size && data[i] == ' ') i += 1;
if (i + 1 >= size
|| (data[i] != '*' && data[i] != '+' && data[i] != '-')
|| (data[i + 1] != ' ' && data[i + 1] != '\t'))
return 0;
i = i + 2;
while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
return i; }
/* parse_block • parsing of one block, returning next char to parse */
static void parse_block(struct buf *ob, struct render *rndr,
char *data, size_t size);
/* parse_blockquote • hanldes parsing of a blockquote fragment */
static size_t
parse_blockquote(struct buf *ob, struct render *rndr,
char *data, size_t size) {
size_t beg, end = 0, pre, work_size = 0;
char *work_data = 0;
struct buf *out = new_work_buffer(rndr);
beg = 0;
while (beg < size) {
for (end = beg + 1; end < size && data[end - 1] != '\n';
end += 1);
pre = prefix_quote(data + beg, end - beg);
if (pre) beg += pre; /* skipping prefix */
else if (is_empty(data + beg, end - beg)
&& (end >= size || (prefix_quote(data + end, size - end) == 0
&& !is_empty(data + end, size - end))))
/* empty line followed by non-quote line */
break;
if (beg < end) { /* copy into the in-place working buffer */
/* bufput(work, data + beg, end - beg); */
if (!work_data)
work_data = data + beg;
else if (data + beg != work_data + work_size)
memmove(work_data + work_size, data + beg,
end - beg);
work_size += end - beg; }
beg = end; }
parse_block(out, rndr, work_data, work_size);
if (rndr->make.blockquote)
rndr->make.blockquote(ob, out, rndr->make.opaque);
release_work_buffer(rndr, out);
return end; }
/* parse_blockquote • hanldes parsing of a regular paragraph */
static size_t
parse_paragraph(struct buf *ob, struct render *rndr,
char *data, size_t size) {
size_t i = 0, end = 0;
int level = 0;
struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
while (i < size) {
for (end = i + 1; end < size && data[end - 1] != '\n';
end += 1);
if (is_empty(data + i, size - i)
|| (level = is_headerline(data + i, size - i)) != 0)
break;
if ((i && data[i] == '#')
|| is_hrule(data + i, size - i)) {
end = i;
break; }
i = end; }
work.size = i;
while (work.size && data[work.size - 1] == '\n')
work.size -= 1;
if (!level) {
struct buf *tmp = new_work_buffer(rndr);
parse_inline(tmp, rndr, work.data, work.size);
if (rndr->make.paragraph)
rndr->make.paragraph(ob, tmp, rndr->make.opaque);
release_work_buffer(rndr, tmp); }
else {
if (work.size) {
size_t beg;
i = work.size;
work.size -= 1;
while (work.size && data[work.size] != '\n')
work.size -= 1;
beg = work.size + 1;
while (work.size && data[work.size - 1] == '\n')
work.size -= 1;
if (work.size) {
struct buf *tmp = new_work_buffer(rndr);
parse_inline(tmp, rndr, work.data, work.size);
if (rndr->make.paragraph)
rndr->make.paragraph(ob, tmp,
rndr->make.opaque);
release_work_buffer(rndr, tmp);
work.data += beg;
work.size = i - beg; }
else work.size = i; }
if (rndr->make.header) {
struct buf *span = new_work_buffer(rndr);
parse_inline(span, rndr, work.data, work.size);
rndr->make.header(ob, span, level,rndr->make.opaque);
release_work_buffer(rndr, span); } }
return end; }
/* parse_blockquote • hanldes parsing of a block-level code fragment */
static size_t
parse_blockcode(struct buf *ob, struct render *rndr,
char *data, size_t size) {
size_t beg, end, pre;
struct buf *work = new_work_buffer(rndr);
beg = 0;
while (beg < size) {
for (end = beg + 1; end < size && data[end - 1] != '\n';
end += 1);
pre = prefix_code(data + beg, end - beg);
if (pre) beg += pre; /* skipping prefix */
else if (!is_empty(data + beg, end - beg))
/* non-empty non-prefixed line breaks the pre */
break;
if (beg < end) {
/* verbatim copy to the working buffer,
escaping entities */
if (is_empty(data + beg, end - beg))
bufputc(work, '\n');
else bufput(work, data + beg, end - beg); }
beg = end; }
while (work->size && work->data[work->size - 1] == '\n')
work->size -= 1;
bufputc(work, '\n');
if (rndr->make.blockcode)
rndr->make.blockcode(ob, work, rndr->make.opaque);
release_work_buffer(rndr, work);
return beg; }
/* parse_listitem • parsing of a single list item */
/* assuming initial prefix is already removed */
static size_t
parse_listitem(struct buf *ob, struct render *rndr,
char *data, size_t size, int *flags) {
struct buf *work = 0, *inter = 0;
size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
int in_empty = 0, has_inside_empty = 0;
/* keeping book of the first indentation prefix */
if (size > 1 && data[0] == ' ') { orgpre = 1;
if (size > 2 && data[1] == ' ') { orgpre = 2;
if (size > 3 && data[2] == ' ') { orgpre = 3; } } }
beg = prefix_uli(data, size);
if (!beg) beg = prefix_oli(data, size);
if (!beg) return 0;
/* skipping to the beginning of the following line */
end = beg;
while (end < size && data[end - 1] != '\n') end += 1;
/* getting working buffers */
work = new_work_buffer(rndr);
inter = new_work_buffer(rndr);
/* putting the first line into the working buffer */
bufput(work, data + beg, end - beg);
beg = end;
/* process the following lines */
while (beg < size) {
end += 1;
while (end < size && data[end - 1] != '\n') end += 1;
/* process an empty line */
if (is_empty(data + beg, end - beg)) {
in_empty = 1;
beg = end;
continue; }
/* calculating the indentation */
i = 0;
if (end - beg > 1 && data[beg] == ' ') { i = 1;
if (end - beg > 2 && data[beg + 1] == ' ') { i = 2;
if (end - beg > 3 && data[beg + 2] == ' ') { i = 3;
if (end - beg > 3 && data[beg + 3] == ' ') { i = 4; } } } }
pre = i;
if (data[beg] == '\t') { i = 1; pre = 8; }
/* checking for a new item */
if ((prefix_uli(data + beg + i, end - beg - i)
&& !is_hrule(data + beg + i, end - beg - i))
|| prefix_oli(data + beg + i, end - beg - i)) {
if (in_empty) has_inside_empty = 1;
if (pre == orgpre) /* the following item must have */
break; /* the same indentation */
if (!sublist) sublist = work->size; }
/* joining only indented stuff after empty lines */
else if (in_empty && i < 4 && data[beg] != '\t') {
*flags |= MKD_LI_END;
break; }
else if (in_empty) {
bufputc(work, '\n');
has_inside_empty = 1; }
in_empty = 0;
/* adding the line without prefix into the working buffer */
bufput(work, data + beg + i, end - beg - i);
beg = end; }
/* render of li contents */
if (has_inside_empty) *flags |= MKD_LI_BLOCK;
if (*flags & MKD_LI_BLOCK) {
/* intermediate render of block li */
if (sublist && sublist < work->size) {
parse_block(inter, rndr, work->data, sublist);
parse_block(inter, rndr, work->data + sublist,
work->size - sublist); }
else
parse_block(inter, rndr, work->data, work->size); }
else {
/* intermediate render of inline li */
if (sublist && sublist < work->size) {
parse_inline(inter, rndr, work->data, sublist);
parse_block(inter, rndr, work->data + sublist,
work->size - sublist); }
else
parse_inline(inter, rndr, work->data, work->size); }
/* render of li itself */
if (rndr->make.listitem)
rndr->make.listitem(ob, inter, *flags, rndr->make.opaque);
release_work_buffer(rndr, inter);
release_work_buffer(rndr, work);
return beg; }
/* parse_list • parsing ordered or unordered list block */
static size_t
parse_list(struct buf *ob, struct render *rndr,
char *data, size_t size, int flags) {
struct buf *work = new_work_buffer(rndr);
size_t i = 0, j;
while (i < size) {
j = parse_listitem(work, rndr, data + i, size - i, &flags);
i += j;
if (!j || (flags & MKD_LI_END)) break; }
if (rndr->make.list)
rndr->make.list(ob, work, flags, rndr->make.opaque);
release_work_buffer(rndr, work);
return i; }
/* parse_atxheader • parsing of atx-style headers */
static size_t
parse_atxheader(struct buf *ob, struct render *rndr,
char *data, size_t size) {
int level = 0;
size_t i, end, skip, span_beg, span_size;
if (!size || data[0] != '#') return 0;
while (level < size && level < 6 && data[level] == '#') level += 1;
for (i = level; i < size && (data[i] == ' ' || data[i] == '\t');
i += 1);
span_beg = i;
for (end = i; end < size && data[end] != '\n'; end += 1);
skip = end;
if (end <= i)
return parse_paragraph(ob, rndr, data, size);
while (end && data[end - 1] == '#') end -= 1;
while (end && (data[end - 1] == ' ' || data[end - 1] == '\t')) end -= 1;
if (end <= i)
return parse_paragraph(ob, rndr, data, size);
span_size = end - span_beg;
if (rndr->make.header) {
struct buf *span = new_work_buffer(rndr);
parse_inline(span, rndr, data + span_beg, span_size);
rndr->make.header(ob, span, level, rndr->make.opaque);
release_work_buffer(rndr, span); }
return skip; }
/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
/* returns the length on match, 0 otherwise */
static size_t
htmlblock_end(struct html_tag *tag, char *data, size_t size) {
size_t i, w;
/* assuming data[0] == '<' && data[1] == '/' already tested */
/* checking tag is a match */
if (tag->size + 3 >= size
|| strncasecmp(data + 2, tag->text, tag->size)
|| data[tag->size + 2] != '>')
return 0;
/* checking white lines */
i = tag->size + 3;
w = 0;
if (i < size && (w = is_empty(data + i, size - i)) == 0)
return 0; /* non-blank after tag */
i += w;
w = 0;
if (i < size && (w = is_empty(data + i, size - i)) == 0)
return 0; /* non-blank line after tag line */
return i + w; }
/* parse_htmlblock • parsing of inline HTML block */
static size_t
parse_htmlblock(struct buf *ob, struct render *rndr,
char *data, size_t size) {
size_t i, j = 0;
struct html_tag *curtag;
int found;
struct buf work = { data, 0, 0, 0, 0 };
/* identification of the opening tag */
if (size < 2 || data[0] != '<') return 0;
curtag = find_block_tag(data + 1, size - 1);
/* handling of special cases */
if (!curtag) {
/* HTML comment, laxist form */
if (size > 5 && data[1] == '!'
&& data[2] == '-' && data[3] == '-') {
i = 5;
while (i < size
&& !(data[i - 2] == '-' && data[i - 1] == '-'
&& data[i] == '>'))
i += 1;
i += 1;
if (i < size)
j = is_empty(data + i, size - i);
if (j) {
work.size = i + j;
if (rndr->make.blockhtml)
rndr->make.blockhtml(ob, &work,
rndr->make.opaque);
return work.size; } }
/* HR, which is the only self-closing block tag considered */
if (size > 4
&& (data[1] == 'h' || data[1] == 'H')
&& (data[2] == 'r' || data[2] == 'R')) {
i = 3;
while (i < size && data[i] != '>')
i += 1;
if (i + 1 < size) {
i += 1;
j = is_empty(data + i, size - i);
if (j) {
work.size = i + j;
if (rndr->make.blockhtml)
rndr->make.blockhtml(ob, &work,
rndr->make.opaque);
return work.size; } } }
/* no special case recognised */
return 0; }
/* looking for an unindented matching closing tag */
/* followed by a blank line */
i = 1;
found = 0;
#if 0
while (i < size) {
i += 1;
while (i < size && !(data[i - 2] == '\n'
&& data[i - 1] == '<' && data[i] == '/'))
i += 1;
if (i + 2 + curtag->size >= size) break;
j = htmlblock_end(curtag, data + i - 1, size - i + 1);
if (j) {
i += j - 1;
found = 1;
break; } }
#endif
/* if not found, trying a second pass looking for indented match */
/* but not if tag is "ins" or "del" (following original Markdown.pl) */
if (!found && curtag != INS_TAG && curtag != DEL_TAG) {
i = 1;
while (i < size) {
i += 1;
while (i < size
&& !(data[i - 1] == '<' && data[i] == '/'))
i += 1;
if (i + 2 + curtag->size >= size) break;
j = htmlblock_end(curtag, data + i - 1, size - i + 1);
if (j) {
i += j - 1;
found = 1;
break; } } }
if (!found) return 0;
/* the end of the block has been found */
work.size = i;
if (rndr->make.blockhtml)
rndr->make.blockhtml(ob, &work, rndr->make.opaque);
return i; }
/* parse_table_cell • parse a cell inside a table */
static void
parse_table_cell(struct buf *ob, struct render *rndr, char *data, size_t size,
int flags) {
struct buf *span = new_work_buffer(rndr);
parse_inline(span, rndr, data, size);
rndr->make.table_cell(ob, span, flags, rndr->make.opaque);
release_work_buffer(rndr, span); }
/* parse_table_row • parse an input line into a table row */
static size_t
parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size,
int *aligns, size_t align_size, int flags) {
size_t i = 0, col = 0;
size_t beg, end, total = 0;
struct buf *cells = new_work_buffer(rndr);
int align;
/* skip leading blanks and sperator */
while (i < size && (data[i] == ' ' || data[i] == '\t'))
i += 1;
if (i < size && data[i] == '|')
i += 1;
/* go over all the cells */
while (i < size && total == 0) {
/* check optional left/center align marker */
align = 0;
if (data[i] == ':') {
align |= MKD_CELL_ALIGN_LEFT;
i += 1; }
/* skip blanks */
while (i < size && (data[i] == ' ' || data[i] == '\t'))
i += 1;
beg = i;
/* forward to the next separator or EOL */
while (i < size && !is_table_sep(data, i) && data[i] != '\n')
i += 1;
end = i;
if (i < size) {
i += 1;
if (data[i - 1] == '\n')
total = i; }
/* check optional right/center align marker */
if (i > beg && data[end - 1] == ':') {
align |= MKD_CELL_ALIGN_RIGHT;
end -= 1; }
/* remove trailing blanks */
while (end > beg
&& (data[end - 1] == ' ' || data[end - 1] == '\t'))
end -= 1;
/* skip the last cell if it was only blanks */
/* (because it is only the optional end separator) */
if (total && end <= beg) continue;
/* fallback on default alignment if not explicit */
if (align == 0 && aligns && col < align_size)
align = aligns[col];
/* render cells */
parse_table_cell(cells, rndr, data + beg, end - beg,
align | flags);
col += 1; }
/* render the whole row and clean up */
rndr->make.table_row(ob, cells, flags, rndr->make.opaque);
release_work_buffer(rndr, cells);
return total ? total : size; }
/* parse_table • parsing of a whole table */
static size_t
parse_table(struct buf *ob, struct render *rndr, char *data, size_t size) {
size_t i = 0, head_end, col;
size_t align_size = 0;
int *aligns = 0;
struct buf *head = 0;
struct buf *rows = new_work_buffer(rndr);
/* skip the first (presumably header) line */
while (i < size && data[i] != '\n')
i += 1;
head_end = i;
/* fallback on end of input */
if (i >= size) {
parse_table_row(rows, rndr, data, size, 0, 0, 0);
rndr->make.table(ob, 0, rows, rndr->make.opaque);
release_work_buffer(rndr, rows);
return i; }
/* attempt to parse a table rule, i.e. blanks, dash, colons and sep */
i += 1;
col = 0;
while (i < size && (data[i] == ' ' || data[i] == '\t' || data[i] == '-'
|| data[i] == ':' || data[i] == '|')) {
if (data[i] == '|') align_size += 1;
if (data[i] == ':') col = 1;
i += 1; }
if (i < size && data[i] == '\n') {
align_size += 1;
/* render the header row */
head = new_work_buffer(rndr);
parse_table_row(head, rndr, data, head_end, 0, 0,
MKD_CELL_HEAD);
/* parse alignments if provided */
if (col && (aligns = malloc(align_size * sizeof *aligns)) != 0){
for (i = 0; i < align_size; i += 1)
aligns[i] = 0;
col = 0;
i = head_end + 1;
/* skip initial white space and optional separator */
while (i < size && (data[i] == ' ' || data[i] == '\t'))
i += 1;
if (data[i] == '|') i += 1;
/* compute default alignment for each column */
while (i < size && data[i] != '\n') {
if (data[i] == ':')
aligns[col] |= MKD_CELL_ALIGN_LEFT;
while (i < size
&& data[i] != '|' && data[i] != '\n')
i += 1;
if (data[i - 1] == ':')
aligns[col] |= MKD_CELL_ALIGN_RIGHT;
if (i < size && data[i] == '|')
i += 1;
col += 1; } }
/* point i to the beginning of next line/row */
i += 1; }
else {
/* there is no valid ruler, continuing without header */
i = 0; }
/* render the table body lines */
while (i < size && is_tableline(data + i, size - i))
i += parse_table_row(rows, rndr, data + i, size - i,
aligns, align_size, 0);
/* render the full table */
rndr->make.table(ob, head, rows, rndr->make.opaque);
/* cleanup */
if (head) release_work_buffer(rndr, head);
release_work_buffer(rndr, rows);
free(aligns);
return i; }
/* parse_block • parsing of one block, returning next char to parse */
static void
parse_block(struct buf *ob, struct render *rndr,
char *data, size_t size) {
size_t beg, end, i;
char *txt_data;
int has_table = (rndr->make.table && rndr->make.table_row
&& rndr->make.table_cell);
if (rndr->work.size > rndr->make.max_work_stack) {
if (size) bufput(ob, data, size);
return; }
beg = 0;
while (beg < size) {
txt_data = data + beg;
end = size - beg;
if (data[beg] == '#')
beg += parse_atxheader(ob, rndr, txt_data, end);
else if (data[beg] == '<' && rndr->make.blockhtml
&& (i = parse_htmlblock(ob, rndr, txt_data, end)) != 0)
beg += i;
else if ((i = is_empty(txt_data, end)) != 0)
beg += i;
else if (is_hrule(txt_data, end)) {
if (rndr->make.hrule)
rndr->make.hrule(ob, rndr->make.opaque);
while (beg < size && data[beg] != '\n') beg += 1;
beg += 1; }
else if (prefix_quote(txt_data, end))
beg += parse_blockquote(ob, rndr, txt_data, end);
else if (prefix_code(txt_data, end))
beg += parse_blockcode(ob, rndr, txt_data, end);
else if (prefix_uli(txt_data, end))
beg += parse_list(ob, rndr, txt_data, end, 0);
else if (prefix_oli(txt_data, end))
beg += parse_list(ob, rndr, txt_data, end,
MKD_LIST_ORDERED);
else if (has_table && is_tableline(txt_data, end))
beg += parse_table(ob, rndr, txt_data, end);
else
beg += parse_paragraph(ob, rndr, txt_data, end); } }
/*********************
* REFERENCE PARSING *
*********************/
/* is_ref • returns whether a line is a reference or not */
static int
is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs) {
size_t i = 0;
size_t id_offset, id_end;
size_t link_offset, link_end;
size_t title_offset, title_end;
size_t line_end;
struct link_ref *lr;
struct buf *id;
/* up to 3 optional leading spaces */
if (beg + 3 >= end) return 0;
if (data[beg] == ' ') { i = 1;
if (data[beg + 1] == ' ') { i = 2;
if (data[beg + 2] == ' ') { i = 3;
if (data[beg + 3] == ' ') return 0; } } }
i += beg;
/* id part: anything but a newline between brackets */
if (data[i] != '[') return 0;
i += 1;
id_offset = i;
while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
i += 1;
if (i >= end || data[i] != ']') return 0;
id_end = i;
/* spacer: colon (space | tab)* newline? (space | tab)* */
i += 1;
if (i >= end || data[i] != ':') return 0;
i += 1;
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
if (i < end && (data[i] == '\n' || data[i] == '\r')) {
i += 1;
if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
if (i >= end) return 0;
/* link: whitespace-free sequence, optionally between angle brackets */
if (data[i] == '<') i += 1;
link_offset = i;
while (i < end && data[i] != ' ' && data[i] != '\t'
&& data[i] != '\n' && data[i] != '\r') i += 1;
if (data[i - 1] == '>') link_end = i - 1;
else link_end = i;
/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
if (i < end && data[i] != '\n' && data[i] != '\r'
&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
return 0;
line_end = 0;
/* computing end-of-line */
if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
line_end = i + 1;
/* optional (space|tab)* spacer after a newline */
if (line_end) {
i = line_end + 1;
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
/* optional title: any non-newline sequence enclosed in '"()
alone on its line */
title_offset = title_end = 0;
if (i + 1 < end
&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
i += 1;
title_offset = i;
/* looking for EOL */
while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
title_end = i + 1;
else title_end = i;
/* stepping back */
i -= 1;
while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
i -= 1;
if (i > title_offset
&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
line_end = title_end;
title_end = i; } }
if (!line_end) return 0; /* garbage after the link */
/* a valid ref has been found, filling-in return structures */
if (last) *last = line_end;
if (!refs) return 1;
id = bufnew(WORK_UNIT);
if (build_ref_id(id, data + id_offset, id_end - id_offset) < 0) {
bufrelease(id);
return 0; }
lr = arr_item(refs, arr_newitem(refs));
lr->id = id;
lr->link = bufnew(link_end - link_offset);
bufput(lr->link, data + link_offset, link_end - link_offset);
if (title_end > title_offset) {
lr->title = bufnew(title_end - title_offset);
bufput(lr->title, data + title_offset,
title_end - title_offset); }
else lr->title = 0;
return 1; }
/**********************
* EXPORTED FUNCTIONS *
**********************/
/* markdown • parses the input buffer and renders it into the output buffer */
void
markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer) {
struct link_ref *lr;
struct buf *text = bufnew(TEXT_UNIT);
size_t i, beg, end;
struct render rndr;
/* filling the render structure */
if (!rndrer) return;
rndr.make = *rndrer;
if (rndr.make.max_work_stack < 1)
rndr.make.max_work_stack = 1;
arr_init(&rndr.refs, sizeof (struct link_ref));
parr_init(&rndr.work);
for (i = 0; i < 256; i += 1) rndr.active_char[i] = 0;
if ((rndr.make.emphasis || rndr.make.double_emphasis
|| rndr.make.triple_emphasis)
&& rndr.make.emph_chars)
for (i = 0; rndr.make.emph_chars[i]; i += 1)
rndr.active_char[(unsigned char)rndr.make.emph_chars[i]]
= char_emphasis;
if (rndr.make.codespan) rndr.active_char['`'] = char_codespan;
if (rndr.make.linebreak) rndr.active_char['\n'] = char_linebreak;
if (rndr.make.image || rndr.make.link)
rndr.active_char['['] = char_link;
rndr.active_char['<'] = char_langle_tag;
rndr.active_char['\\'] = char_escape;
rndr.active_char['&'] = char_entity;
/* first pass: looking for references, copying everything else */
beg = 0;
while (beg < ib->size) /* iterating over lines */
if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
beg = end;
else { /* skipping to the next line */
end = beg;
while (end < ib->size
&& ib->data[end] != '\n' && ib->data[end] != '\r')
end += 1;
/* adding the line body if present */
if (end > beg) bufput(text, ib->data + beg, end - beg);
while (end < ib->size
&& (ib->data[end] == '\n' || ib->data[end] == '\r')) {
/* add one \n per newline */
if (ib->data[end] == '\n'
|| (end + 1 < ib->size
&& ib->data[end + 1] != '\n'))
bufputc(text, '\n');
end += 1; }
beg = end; }
/* sorting the reference array */
if (rndr.refs.size)
qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit,
cmp_link_ref_sort);
/* adding a final newline if not already present */
if (text->size
&& text->data[text->size - 1] != '\n'
&& text->data[text->size - 1] != '\r')
bufputc(text, '\n');
/* second pass: actual rendering */
if (rndr.make.prolog)
rndr.make.prolog(ob, rndr.make.opaque);
parse_block(ob, &rndr, text->data, text->size);
if (rndr.make.epilog)
rndr.make.epilog(ob, rndr.make.opaque);
/* clean-up */
bufrelease(text);
lr = rndr.refs.base;
for (i = 0; i < rndr.refs.size; i += 1) {
bufrelease(lr[i].id);
bufrelease(lr[i].link);
bufrelease(lr[i].title); }
arr_free(&rndr.refs);
assert(rndr.work.size == 0);
for (i = 0; i < rndr.work.asize; i += 1)
bufrelease(rndr.work.item[i]);
parr_free(&rndr.work); }
/* vim: set filetype=c: */
/* markdown.h - generic markdown parser */
/*
* Copyright (c) 2009, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef LITHIUM_MARKDOWN_H
#define LITHIUM_MARKDOWN_H
#include "buffer.h"
/********************
* TYPE DEFINITIONS *
********************/
/* mkd_autolink • type of autolink */
enum mkd_autolink {
MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/
MKDA_NORMAL, /* normal http/http/ftp/etc link */
MKDA_EXPLICIT_EMAIL, /* e-mail link with explit mailto: */
MKDA_IMPLICIT_EMAIL /* e-mail link without mailto: */
};
/* mkd_renderer • functions for rendering parsed data */
struct mkd_renderer {
/* document level callbacks */
void (*prolog)(struct buf *ob, void *opaque);
void (*epilog)(struct buf *ob, void *opaque);
/* block level callbacks - NULL skips the block */
void (*blockcode)(struct buf *ob, struct buf *text, void *opaque);
void (*blockquote)(struct buf *ob, struct buf *text, void *opaque);
void (*blockhtml)(struct buf *ob, struct buf *text, void *opaque);
void (*header)(struct buf *ob, struct buf *text,
int level, void *opaque);
void (*hrule)(struct buf *ob, void *opaque);
void (*list)(struct buf *ob, struct buf *text, int flags, void *opaque);
void (*listitem)(struct buf *ob, struct buf *text,
int flags, void *opaque);
void (*paragraph)(struct buf *ob, struct buf *text, void *opaque);
void (*table)(struct buf *ob, struct buf *head_row, struct buf *rows,
void *opaque);
void (*table_cell)(struct buf *ob, struct buf *text, int flags,
void *opaque);
void (*table_row)(struct buf *ob, struct buf *cells, int flags,
void *opaque);
/* span level callbacks - NULL or return 0 prints the span verbatim */
int (*autolink)(struct buf *ob, struct buf *link,
enum mkd_autolink type, void *opaque);
int (*codespan)(struct buf *ob, struct buf *text, void *opaque);
int (*double_emphasis)(struct buf *ob, struct buf *text,
char c, void *opaque);
int (*emphasis)(struct buf *ob, struct buf *text, char c,void*opaque);
int (*image)(struct buf *ob, struct buf *link, struct buf *title,
struct buf *alt, void *opaque);
int (*linebreak)(struct buf *ob, void *opaque);
int (*link)(struct buf *ob, struct buf *link, struct buf *title,
struct buf *content, void *opaque);
int (*raw_html_tag)(struct buf *ob, struct buf *tag, void *opaque);
int (*triple_emphasis)(struct buf *ob, struct buf *text,
char c, void *opaque);
/* low level callbacks - NULL copies input directly into the output */
void (*entity)(struct buf *ob, struct buf *entity, void *opaque);
void (*normal_text)(struct buf *ob, struct buf *text, void *opaque);
/* renderer data */
int max_work_stack; /* prevent arbitrary deep recursion, cf README */
const char *emph_chars; /* chars that trigger emphasis rendering */
void *opaque; /* opaque data send to every rendering callback */
};
/*********
* FLAGS *
*********/
/* list/listitem flags */
#define MKD_LIST_ORDERED 1
#define MKD_LI_BLOCK 2 /* <li> containing block data */
/* table cell flags */
#define MKD_CELL_ALIGN_DEFAULT 0
#define MKD_CELL_ALIGN_LEFT 1
#define MKD_CELL_ALIGN_RIGHT 2
#define MKD_CELL_ALIGN_CENTER 3 /* LEFT | RIGHT */
#define MKD_CELL_ALIGN_MASK 3
#define MKD_CELL_HEAD 4
/**********************
* EXPORTED FUNCTIONS *
**********************/
/* markdown • parses the input buffer and renders it into the output buffer */
void
markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndr);
#endif /* ndef LITHIUM_MARKDOWN_H */
/* vim: set filetype=c: */
/* main.c - main function for markdown module testing */
/*
* Copyright (c) 2009, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "markdown.h"
#include "renderers.h"
#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define READ_UNIT 1024
#define OUTPUT_UNIT 64
/* buffer statistics, to track some memleaks */
extern long buffer_stat_nb;
extern size_t buffer_stat_alloc_bytes;
/* usage • print the option list */
void
usage(FILE *out, const char *name) {
fprintf(out, "Usage: %s [-h | -x] [-d | -m | -n] [input-file]\n\n",
name);
fprintf(out, "\t-d, --discount\n"
"\t\tEnable some Discount extensions (image size specficiation,\n"
"\t\tclass blocks and 'abbr:', 'class:', 'id:' and 'raw:'\n"
"\t\tpseudo-protocols)\n"
"\t-H, --html\n"
"\t\tOutput HTML-style self-closing tags (e.g. <br>)\n"
"\t-h, --help\n"
"\t\tDisplay this help text and exit without further processing\n"
"\t-m, --markdown\n"
"\t\tDisable all extensions and use strict markdown syntax\n"
"\t-n, --natext\n"
"\t\tEnable support Discount extensions and Natasha's own\n"
"\t\textensions (id header attribute, class paragraph attribute,\n"
"\t\t'ins' and 'del' elements, and plain span elements)\n"
"\t-x, --xhtml\n"
"\t\tOutput XHTML-style self-closing tags (e.g. <br />)\n"); }
/* main • main function, interfacing STDIO with the parser */
int
main(int argc, char **argv) {
struct buf *ib, *ob;
size_t ret;
FILE *in = stdin;
const struct mkd_renderer *hrndr, *xrndr;
const struct mkd_renderer **prndr;
int ch, argerr, help;
struct option longopts[] = {
{ "discount", no_argument, 0, 'd' },
{ "html", no_argument, 0, 'H' },
{ "help", no_argument, 0, 'h' },
{ "markdown", no_argument, 0, 'm' },
{ "natext", no_argument, 0, 'n' },
{ "xhtml", no_argument, 0, 'x' },
{ 0, 0, 0, 0 } };
/* default options: strict markdown input, HTML output */
hrndr = &mkd_html;
xrndr = &mkd_xhtml;
prndr = &hrndr;
/* argument parsing */
argerr = help = 0;
while (!argerr &&
(ch = getopt_long(argc, argv, "dHhmnx", longopts, 0)) != -1)
switch (ch) {
case 'd': /* discount extension */
hrndr = &discount_html;
xrndr = &discount_xhtml;
break;
case 'H': /* HTML output */
prndr = &hrndr;
break;
case 'h': /* display help */
argerr = help = 1;
break;
case 'm': /* strict markdown */
hrndr = &mkd_html;
xrndr = &mkd_xhtml;
break;
case 'n': /* Discount + Natasha's extensions */
hrndr = &nat_html;
xrndr = &nat_xhtml;
break;
case 'x': /* XHTML output */
prndr = &xrndr;
break;
default:
argerr = 1; }
if (argerr) {
usage(help ? stdout : stderr, argv[0]);
return help ? EXIT_SUCCESS : EXIT_FAILURE; }
argc -= optind;
argv += optind;
/* opening the file if given from the command line */
if (argc > 0) {
in = fopen(argv[0], "r");
if (!in) {
fprintf(stderr,"Unable to open input file \"%s\": %s\n",
argv[0], strerror(errno));
return 1; } }
/* reading everything */
ib = bufnew(READ_UNIT);
bufgrow(ib, READ_UNIT);
while ((ret = fread(ib->data + ib->size, 1,
ib->asize - ib->size, in)) > 0) {
ib->size += ret;
bufgrow(ib, ib->size + READ_UNIT); }
if (in != stdin) fclose(in);
/* performing markdown parsing */
ob = bufnew(OUTPUT_UNIT);
markdown(ob, ib, *prndr);
/* writing the result to stdout */
ret = fwrite(ob->data, 1, ob->size, stdout);
if (ret < ob->size)
fprintf(stderr, "Warning: only %zu output byte written, "
"out of %zu\n",
ret,
ob->size);
/* cleanup */
bufrelease(ib);
bufrelease(ob);
/* memory checks */
if (buffer_stat_nb)
fprintf(stderr, "Warning: %ld buffers still active\n",
buffer_stat_nb);
if (buffer_stat_alloc_bytes)
fprintf(stderr, "Warning: %zu bytes still allocated\n",
buffer_stat_alloc_bytes);
return 0; }
/* vim: set filetype=c: */
/* mkd2latex.c - LaTeX-formatted output from markdown text */
/*
* Copyright (c) 2009, Baptiste Daroussin, Natacha Porté, and Michael Huang
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Links require the hyperref package, and images require the graphicx
* package.
*/
#include "markdown.h"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#define READ_UNIT 1024
#define OUTPUT_UNIT 64
/*********************
* ENTITY CONVERSION *
*********************/
struct str_pair {
const char *entity;
const char *latex; };
static struct str_pair entity_latex[] = {
{ "&AElig;", "\\AE{}" },
{ "&Aacute;", "\\'A" },
{ "&Acirc;", "\\^A" },
{ "&Agrave;", "\\`A" },
{ "&Aring;", "\\AA{}" },
{ "&Atilde;", "\\~A" },
{ "&Auml;", "\\\"A" },
{ "&Ccedil;", "\\c{C}" },
// { "&ETH;", "\\DH{}" }, // not available in OT1
{ "&Eacute;", "\\'E" },
{ "&Ecirc;", "\\^E" },
{ "&Egrave;", "\\`E" },
{ "&Euml;", "\\\"E" },
{ "&Iacute;", "\\'I" },
{ "&Icirc;", "\\^I" },
{ "&Igrave;", "\\`I" },
{ "&Iuml;", "\\\"I" },
{ "&Ntilde;", "\\~N" },
{ "&Oacute;", "\\'O" },
{ "&Ocirc;", "\\^O" },
{ "&Ograve;", "\\`O" },
{ "&Oslash;", "\\O{}" },
{ "&Otilde;", "\\~O" },
{ "&Ouml;", "\\\"O" },
// { "&THORN;", "\\TH{}" }, // not available in OT1
{ "&Uacute;", "\\'U" },
{ "&Ucirc;", "\\^U" },
{ "&Ugrave;", "\\`U" },
{ "&Uuml;", "\\\"U" },
{ "&Ygrave;", "\\`Y" },
{ "&aacute;", "\\'a" },
{ "&acirc;", "\\^a" },
// { "&acute;", "\\textasciiacute{}" }, // requires textcomp
{ "&aelig;", "\\ae{}" },
{ "&agrave;", "\\`a" },
{ "&amp;", "\\&" },
{ "&apos;", "'" },
{ "&aring;", "\\aa{}" },
{ "&atilde;", "\\~a" },
{ "&auml;", "\\\"a" },
// { "&brvbar;", "\\textbrokenbar{}" }, // requires textcomp
{ "&ccedil;", "\\c{c}" },
{ "&cedil;", "\\c{}" },
// { "&cent;", "\\textcent{}" }, // requires textcomp
{ "&copy;", "\\copyright{}" },
// { "&curren;", "\\textcurrency{}" }, // requires textcomp
// { "&deg;", "\\textdegree{}" }, // requires textcomp
// { "&divide;", "\\textdiv{}" }, // requires textcomp
{ "&eacute;", "\\'e" },
{ "&ecirc;", "\\^e" },
{ "&egrave;", "\\`e" },
// { "&eth;", "\\dh{}" }. // not available in OT1
{ "&euml;", "\\\"e" },
// { "&frac12;", "\\textonehalf{}" }, // requires textcomp
// { "&frac14;", "\\textonequarter{}" }, // requires textcomp
// { "&frac34;", "\\textthreequarter{}" }, // requires textcomp
{ "&gt;", "$>$" },
{ "&iacute;", "\\'\\i{}" },
{ "&icirc;", "\\^\\i{}" },
{ "&iexcl;", "\\textexclamdown{}" },
{ "&igrave;", "\\`\\i{}" },
{ "&iquest;", "\\" },
{ "&iuml;", "\\\"\\i{}" },
// { "&laquo;", "\\guillemotleft{}" }, // not available in OT1
{ "&lt;", "$<$" },
// { "&macr;", "\\textasciimacaron{}" }, // requires textcomp
// { "&micro;", "\\textmu{}"}, // requires textcomp
{ "&middot;", "\\textperiodcentered{}" },
{ "&nbsp;", "~" },
// { "&not;", "\\textlnot{}" }, // requires textcomp
{ "&ntilde;", "\\~n" },
{ "&oacute;", "\\'o" },
{ "&ocirc;", "\\^o" },
{ "&ograve;", "\\`o" },
{ "&ordf;", "\\textordfeminine{}" },
{ "&ordm;", "\\textordmasculine{}" },
{ "&oslash;", "\\o{}" },
{ "&otilde;", "\\~o" },
{ "&ouml;", "\\\"o" },
{ "&para;", "\\P{}" },
// { "&plusmn;", "\\textpm{}" }, // requires textcomp
{ "&pound;", "\\textsterling{}" },
{ "&quot;", "\"" },
// { "&raquo;", "\\guillemotright{}" }, // not available in OT1
{ "&reg;", "\\textregistered{}" },
{ "&sect;", "\\S{}" },
{ "&shy;", "\\-" },
// { "&sup1;", "\\textonesuperior{}" }, // requires textcomp
// { "&sup2;", "\\texttwosuperior{}" }, // requires textcomp
// { "&sup3;", "\\textthreesuperior{}" }, // requires textcomp
{ "&szlig;", "\\ss{}" },
// { "&thorn;", "\\th{}" }, // not available in OT1
// { "&times;", "\\texttimes{}" }, // requires textcomp
{ "&uacute;", "\\'u" },
{ "&ucirc;", "\\^u" },
{ "&ugrave;", "\\`u" },
// { "&uml;", "\\textasciidieresis{}" }, // requires textcomp
{ "&uuml;", "\\\"u" },
{ "&yacute;", "\\'y" },
// { "&yen;", "\\textyen{}" }, // requires textcomp
{ "&yuml;", "\\\"y" },
};
static int cmp_entity(const void *key, const void *element) {
const struct str_pair *pair = element;
const struct buf *entity = key;
return bufcmps(entity, pair->entity); }
static const char *entity2latex(const struct buf *entity) {
const struct str_pair *pair;
pair = bsearch(entity, entity_latex,
sizeof entity_latex / sizeof *entity_latex,
sizeof *entity_latex,
&cmp_entity);
return pair ? pair->latex : 0; }
/******************************
* MARKDOWN TO LATEX RENDERER *
******************************/
void
latex_text_escape(struct buf *ob, char *src, size_t size) {
size_t i = 0, org;
while (i < size) {
/* copying directly unescaped characters */
org = i;
while (i < size && src[i] != '&' && src[i] != '%'
&& src[i] != '$' && src[i] != '#' && src[i] != '_'
&& src[i] != '{' && src[i] != '}' && src[i] != '~'
&& src[i] != '^' && src[i] != '\\' && src[i] != '<'
&& src[i] != '>')
i += 1;
if (i > org) bufput(ob, src + org, i - org);
/* escaping */
if (i >= size) break;
else if (src[i] == '&') BUFPUTSL(ob, "\\&");
else if (src[i] == '%') BUFPUTSL(ob, "\\%");
else if (src[i] == '$') BUFPUTSL(ob, "\\$");
else if (src[i] == '#') BUFPUTSL(ob, "\\#");
else if (src[i] == '_') BUFPUTSL(ob, "\\_");
else if (src[i] == '{') BUFPUTSL(ob, "\\{");
else if (src[i] == '}') BUFPUTSL(ob, "\\}");
else if (src[i] == '<') BUFPUTSL(ob, "$<$");
else if (src[i] == '>') BUFPUTSL(ob, "$<$");
else if (src[i] == '~') BUFPUTSL(ob, "\\textasciitilde{}");
else if (src[i] == '^') BUFPUTSL(ob, "\\textasciicircum{}");
else if (src[i] == '\\') BUFPUTSL(ob, "\\textbackslash{}");
i += 1; } }
static void
latex_prolog(struct buf *ob, void *opaque) {
BUFPUTSL(ob,
"\\documentclass{article}\n"
"\\usepackage{hyperref}\n"
"\\usepackage{graphicx}\n"
"\\begin{document}\n"); }
static void
latex_epilog(struct buf *ob, void *opaque) {
BUFPUTSL(ob, "\n\\end{document}\n"); }
static int
latex_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type,
void *opaque) {
if (!link || !link->size) return 0;
BUFPUTSL(ob, "\\href{");
if (type == MKDA_IMPLICIT_EMAIL) BUFPUTSL(ob, "mailto:");
bufput(ob, link->data, link->size);
BUFPUTSL(ob, "}{");
if (type == MKDA_EXPLICIT_EMAIL && link->size > 7)
latex_text_escape(ob, link->data + 7, link->size - 7);
else latex_text_escape(ob, link->data, link->size);
BUFPUTSL(ob, "}");
return 1; }
static int
latex_link(struct buf *ob, struct buf *link, struct buf *title,
struct buf *content, void *opaque) {
BUFPUTSL(ob, "\\href{");
if (link && link->size) bufput(ob, link->data, link->size);
BUFPUTSL(ob, "}{");
if (content && content->size)
bufput(ob, content->data, content->size);
BUFPUTSL(ob, "}");
return 1; }
static int
latex_image(struct buf *ob, struct buf *link, struct buf *title,
struct buf *alt, void *opaque) {
if (!link || !link->size) return 0;
BUFPUTSL(ob, "\\includegraphics{");
bufput(ob, link->data, link->size);
BUFPUTSL(ob, "}");
return 1; }
static void
latex_blockcode(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "\\begin{verbatim}\n");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "\\end{verbatim}\n"); }
static void
latex_blockquote(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "\\begin{quote}\n");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "\\end{quote}\n"); }
static int
latex_codespan(struct buf *ob, struct buf *text, void *opaque) {
BUFPUTSL(ob, "\\texttt{");
if (text) latex_text_escape(ob, text->data, text->size);
BUFPUTSL(ob, "}");
return 1; }
static void
latex_header(struct buf *ob, struct buf *text, int level, void *opaque) {
if (ob->size) bufputc(ob, '\n');
switch(level) {
case 1:
BUFPUTSL(ob,"\\section{");
break;
case 2:
BUFPUTSL(ob, "\\subsection{");
break;
case 3:
BUFPUTSL(ob, "\\subsubsection{");
break;
default:
fprintf(stderr, "Warning: ignoring header level %d\n",
level);
}
if (text) bufput(ob, text->data, text->size);
if (level >= 1 && level <= 3) BUFPUTSL(ob, "}\n");
}
static int
latex_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "\\textbf{");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "}");
return 1; }
static int
latex_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "\\emph{");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "}");
return 1; }
static int
latex_linebreak(struct buf *ob, void *opaque) {
BUFPUTSL(ob, "\\\\");
return 1; }
static void
latex_paragraph(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "\n"); }
static void
latex_list(struct buf *ob, struct buf *text, int flags, void *opaque) {
if (ob->size) bufputc(ob, '\n');
if (flags & MKD_LIST_ORDERED)
BUFPUTSL(ob, "\\begin{enumerate}\n");
else
BUFPUTSL(ob, "\\begin{itemize}\n");
if (text) bufput(ob, text->data, text->size);
if (flags & MKD_LIST_ORDERED)
BUFPUTSL(ob, "\\end{enumerate}\n");
else
BUFPUTSL(ob, "\\end{itemize}\n"); }
static void
latex_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) {
BUFPUTSL(ob, "\\item ");
if (text) {
while (text->size && text->data[text->size - 1] == '\n')
text->size -= 1;
bufput(ob, text->data, text->size); }
BUFPUTSL(ob, "\n"); }
static void
latex_hrule(struct buf *ob, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "\\hrule"); }
static int
latex_triple_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "\\textbf{\\emph{");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "}}");
return 1; }
static void
latex_entity(struct buf *ob, struct buf *entity, void *opaque) {
const char *rendered = entity2latex(entity);
if (rendered)
bufputs(ob, rendered);
else {
BUFPUTSL(ob, "\\texttt{");
bufput(ob, entity->data, entity->size);
BUFPUTSL(ob, "}"); } }
static void
latex_normal_text(struct buf *ob, struct buf *text, void *opaque) {
if (text) latex_text_escape(ob, text->data, text->size); }
/* renderer structure */
struct mkd_renderer to_latex = {
/* document-level callbacks */
latex_prolog,
latex_epilog,
/* block-level callbacks */
latex_blockcode,
latex_blockquote,
latex_blockcode,
latex_header,
latex_hrule,
latex_list,
latex_listitem,
latex_paragraph,
NULL,
NULL,
NULL,
/* span-level callbacks */
latex_autolink,
latex_codespan,
latex_double_emphasis,
latex_emphasis,
latex_image,
latex_linebreak,
latex_link,
latex_codespan,
latex_triple_emphasis,
/* low-level callbacks */
latex_entity,
latex_normal_text,
/* renderer data */
64,
"*_",
NULL };
/*****************
* MAIN FUNCTION *
*****************/
/* main • main function, interfacing STDIO with the parser */
int
main(int argc, char **argv) {
struct buf *ib, *ob;
size_t ret;
FILE *in = stdin;
/* opening the file if given from the command line */
if (argc > 1) {
in = fopen(argv[1], "r");
if (!in) {
fprintf(stderr,"Unable to open input file \"%s\": %s\n",
argv[1], strerror(errno));
return 1; } }
/* reading everything */
ib = bufnew(READ_UNIT);
bufgrow(ib, READ_UNIT);
while ((ret = fread(ib->data + ib->size, 1,
ib->asize - ib->size, in)) > 0) {
ib->size += ret;
bufgrow(ib, ib->size + READ_UNIT); }
if (in != stdin) fclose(in);
/* performing markdown to LaTeX */
ob = bufnew(OUTPUT_UNIT);
markdown(ob, ib, &to_latex);
/* writing the result to stdout */
ret = fwrite(ob->data, 1, ob->size, stdout);
if (ret < ob->size)
fprintf(stderr, "Warning: only %zu output byte written, "
"out of %zu\n",
ret,
ob->size);
/* cleanup */
bufrelease(ib);
bufrelease(ob);
return 0; }
/* vim: set filetype=c: */
/* mkd2man.c - man-page-formatted output from markdown text */
/*
* Copyright (c) 2009, Baptiste Daroussin and Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "markdown.h"
#include <stdio.h>
#include <errno.h>
#include <string.h>
#define READ_UNIT 1024
#define OUTPUT_UNIT 64
/****************************
* MARKDOWN TO MAN RENDERER *
****************************/
void
man_text_escape(struct buf *ob, char *src, size_t size) {
size_t i = 0, org;
while (i < size) {
/* copying directly unescaped characters */
org = i;
while (i < size && src[i] != '-')
i += 1;
if (i > org) bufput(ob, src + org, i - org);
/* escaping */
if (i >= size) break;
else if (src[i] == '-') BUFPUTSL(ob, "\\-");
i += 1; } }
static void
man_blockcode(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, ".nf\n");
if (text) man_text_escape(ob, text->data, text->size);
BUFPUTSL(ob, ".fi\n"); }
static int
man_codespan(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, ".nf\n");
if (text) man_text_escape(ob, text->data, text->size);
BUFPUTSL(ob, ".fi\n");
return 1; }
static void
man_header(struct buf *ob, struct buf *text, int level, void *opaque) {
if (ob->size) bufputc(ob, '\n');
switch(level) {
case 1:
bufprintf(ob,".TH ");
break;
case 2:
bufprintf(ob, ".SH ");
break;
case 3:
bufprintf(ob, ".SS ");
break;
}
if (text) bufput(ob, text->data, text->size);
}
static int
man_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "\\fB");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "\\fP");
return 1; }
static int
man_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "\\fI");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "\\fP");
return 1; }
static int
man_linebreak(struct buf *ob, void *opaque) {
BUFPUTSL(ob, "\n.LP\n");
return 1; }
static void
man_paragraph(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, ".TP\n");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "\n"); }
static void
man_list(struct buf *ob, struct buf *text, int flags, void *opaque) {
if (ob->size) bufputc(ob, '\n');
if (flags & MKD_LIST_ORDERED)
BUFPUTSL(ob,"\n.nr step 0 1\n");
else
BUFPUTSL(ob,".\n");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "\n"); }
static void
man_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) {
if (flags & MKD_LIST_ORDERED)
BUFPUTSL(ob, ".IP \\n+[step]\n");
else
BUFPUTSL(ob, ".IP \\[bu] 2 \n");
if (text) {
while (text->size && text->data[text->size - 1] == '\n')
text->size -= 1;
bufput(ob, text->data, text->size); }
BUFPUTSL(ob, "\n"); }
static void
man_normal_text(struct buf *ob, struct buf *text, void *opaque) {
if (text) man_text_escape(ob, text->data, text->size); }
/* renderer structure */
struct mkd_renderer to_man = {
/* document-level callbacks */
NULL,
NULL,
/* block-level callbacks */
man_blockcode,
NULL,
NULL,
man_header,
NULL,
man_list,
man_listitem,
man_paragraph,
NULL,
NULL,
NULL,
/* span-level callbacks */
NULL,
man_codespan,
man_double_emphasis,
man_emphasis,
NULL,
man_linebreak,
NULL,
NULL,
NULL,
/* low-level callbacks */
NULL,
man_normal_text,
/* renderer data */
64,
"*_",
NULL };
/*****************
* MAIN FUNCTION *
*****************/
/* main • main function, interfacing STDIO with the parser */
int
main(int argc, char **argv) {
struct buf *ib, *ob;
size_t ret;
FILE *in = stdin;
/* opening the file if given from the command line */
if (argc > 1) {
in = fopen(argv[1], "r");
if (!in) {
fprintf(stderr,"Unable to open input file \"%s\": %s\n",
argv[1], strerror(errno));
return 1; } }
/* reading everything */
ib = bufnew(READ_UNIT);
bufgrow(ib, READ_UNIT);
while ((ret = fread(ib->data + ib->size, 1,
ib->asize - ib->size, in)) > 0) {
ib->size += ret;
bufgrow(ib, ib->size + READ_UNIT); }
if (in != stdin) fclose(in);
/* performing markdown to man */
ob = bufnew(OUTPUT_UNIT);
markdown(ob, ib, &to_man);
/* writing the result to stdout */
ret = fwrite(ob->data, 1, ob->size, stdout);
if (ret < ob->size)
fprintf(stderr, "Warning: only %zu output byte written, "
"out of %zu\n",
ret,
ob->size);
/* cleanup */
bufrelease(ib);
bufrelease(ob);
return 0; }
/* vim: set filetype=c: */
/* renderers.h - example markdown renderers */
/*
* Copyright (c) 2009, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "renderers.h"
#include <strings.h>
/*****************************
* EXPORTED HELPER FUNCTIONS *
*****************************/
/* lus_attr_escape • copy the buffer entity-escaping '<', '>', '&' and '"' */
void
lus_attr_escape(struct buf *ob, char *src, size_t size) {
size_t i = 0, org;
while (i < size) {
/* copying directly unescaped characters */
org = i;
while (i < size && src[i] != '<' && src[i] != '>'
&& src[i] != '&' && src[i] != '"')
i += 1;
if (i > org) bufput(ob, src + org, i - org);
/* escaping */
if (i >= size) break;
else if (src[i] == '<') BUFPUTSL(ob, "&lt;");
else if (src[i] == '>') BUFPUTSL(ob, "&gt;");
else if (src[i] == '&') BUFPUTSL(ob, "&amp;");
else if (src[i] == '"') BUFPUTSL(ob, "&quot;");
i += 1; } }
/* lus_body_escape • copy the buffer entity-escaping '<', '>' and '&' */
void
lus_body_escape(struct buf *ob, char *src, size_t size) {
size_t i = 0, org;
while (i < size) {
/* copying directly unescaped characters */
org = i;
while (i < size && src[i] != '<' && src[i] != '>'
&& src[i] != '&')
i += 1;
if (i > org) bufput(ob, src + org, i - org);
/* escaping */
if (i >= size) break;
else if (src[i] == '<') BUFPUTSL(ob, "&lt;");
else if (src[i] == '>') BUFPUTSL(ob, "&gt;");
else if (src[i] == '&') BUFPUTSL(ob, "&amp;");
i += 1; } }
/********************
* GENERIC RENDERER *
********************/
static int
rndr_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type,
void *opaque) {
if (!link || !link->size) return 0;
BUFPUTSL(ob, "<a href=\"");
if (type == MKDA_IMPLICIT_EMAIL) BUFPUTSL(ob, "mailto:");
lus_attr_escape(ob, link->data, link->size);
BUFPUTSL(ob, "\">");
if (type == MKDA_EXPLICIT_EMAIL && link->size > 7)
lus_body_escape(ob, link->data + 7, link->size - 7);
else lus_body_escape(ob, link->data, link->size);
BUFPUTSL(ob, "</a>");
return 1; }
static void
rndr_blockcode(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<pre><code>");
if (text) lus_body_escape(ob, text->data, text->size);
BUFPUTSL(ob, "</code></pre>\n"); }
static void
rndr_blockquote(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<blockquote>\n");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</blockquote>\n"); }
static int
rndr_codespan(struct buf *ob, struct buf *text, void *opaque) {
BUFPUTSL(ob, "<code>");
if (text) lus_body_escape(ob, text->data, text->size);
BUFPUTSL(ob, "</code>");
return 1; }
static int
rndr_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "<strong>");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</strong>");
return 1; }
static int
rndr_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "<em>");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</em>");
return 1; }
static void
rndr_header(struct buf *ob, struct buf *text, int level, void *opaque) {
if (ob->size) bufputc(ob, '\n');
bufprintf(ob, "<h%d>", level);
if (text) bufput(ob, text->data, text->size);
bufprintf(ob, "</h%d>\n", level); }
static int
rndr_link(struct buf *ob, struct buf *link, struct buf *title,
struct buf *content, void *opaque) {
BUFPUTSL(ob, "<a href=\"");
if (link && link->size) lus_attr_escape(ob, link->data, link->size);
if (title && title->size) {
BUFPUTSL(ob, "\" title=\"");
lus_attr_escape(ob, title->data, title->size); }
BUFPUTSL(ob, "\">");
if (content && content->size) bufput(ob, content->data, content->size);
BUFPUTSL(ob, "</a>");
return 1; }
static void
rndr_list(struct buf *ob, struct buf *text, int flags, void *opaque) {
if (ob->size) bufputc(ob, '\n');
bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
if (text) bufput(ob, text->data, text->size);
bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6); }
static void
rndr_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) {
BUFPUTSL(ob, "<li>");
if (text) {
while (text->size && text->data[text->size - 1] == '\n')
text->size -= 1;
bufput(ob, text->data, text->size); }
BUFPUTSL(ob, "</li>\n"); }
static void
rndr_normal_text(struct buf *ob, struct buf *text, void *opaque) {
if (text) lus_body_escape(ob, text->data, text->size); }
static void
rndr_paragraph(struct buf *ob, struct buf *text, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<p>");
if (text) bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</p>\n"); }
static void
rndr_raw_block(struct buf *ob, struct buf *text, void *opaque) {
size_t org, sz;
if (!text) return;
sz = text->size;
while (sz > 0 && text->data[sz - 1] == '\n') sz -= 1;
org = 0;
while (org < sz && text->data[org] == '\n') org += 1;
if (org >= sz) return;
if (ob->size) bufputc(ob, '\n');
bufput(ob, text->data + org, sz - org);
bufputc(ob, '\n'); }
static int
rndr_raw_inline(struct buf *ob, struct buf *text, void *opaque) {
bufput(ob, text->data, text->size);
return 1; }
static int
rndr_triple_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size) return 0;
BUFPUTSL(ob, "<strong><em>");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</em></strong>");
return 1; }
/*******************
* HTML 4 RENDERER *
*******************/
static void
html_hrule(struct buf *ob, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<hr>\n"); }
static int
html_image(struct buf *ob, struct buf *link, struct buf *title,
struct buf *alt, void *opaque) {
if (!link || !link->size) return 0;
BUFPUTSL(ob, "<img src=\"");
lus_attr_escape(ob, link->data, link->size);
BUFPUTSL(ob, "\" alt=\"");
if (alt && alt->size)
lus_attr_escape(ob, alt->data, alt->size);
if (title && title->size) {
BUFPUTSL(ob, "\" title=\"");
lus_attr_escape(ob, title->data, title->size); }
BUFPUTSL(ob, "\">");
return 1; }
static int
html_linebreak(struct buf *ob, void *opaque) {
BUFPUTSL(ob, "<br>\n");
return 1; }
/* exported renderer structure */
const struct mkd_renderer mkd_html = {
NULL,
NULL,
rndr_blockcode,
rndr_blockquote,
rndr_raw_block,
rndr_header,
html_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
NULL,
NULL,
NULL,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
html_image,
html_linebreak,
rndr_link,
rndr_raw_inline,
rndr_triple_emphasis,
NULL,
rndr_normal_text,
64,
"*_",
NULL };
/**********************
* XHTML 1.0 RENDERER *
**********************/
static void
xhtml_hrule(struct buf *ob, void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<hr />\n"); }
static int
xhtml_image(struct buf *ob, struct buf *link, struct buf *title,
struct buf *alt, void *opaque) {
if (!link || !link->size) return 0;
BUFPUTSL(ob, "<img src=\"");
lus_attr_escape(ob, link->data, link->size);
BUFPUTSL(ob, "\" alt=\"");
if (alt && alt->size)
lus_attr_escape(ob, alt->data, alt->size);
if (title && title->size) {
BUFPUTSL(ob, "\" title=\"");
lus_attr_escape(ob, title->data, title->size); }
BUFPUTSL(ob, "\" />");
return 1; }
static int
xhtml_linebreak(struct buf *ob, void *opaque) {
BUFPUTSL(ob, "<br />\n");
return 1; }
/* exported renderer structure */
const struct mkd_renderer mkd_xhtml = {
NULL,
NULL,
rndr_blockcode,
rndr_blockquote,
rndr_raw_block,
rndr_header,
xhtml_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
NULL,
NULL,
NULL,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
xhtml_image,
xhtml_linebreak,
rndr_link,
rndr_raw_inline,
rndr_triple_emphasis,
NULL,
rndr_normal_text,
64,
"*_",
NULL };
/**********************
* DISCOUNT RENDERERS *
**********************/
static int
print_link_wxh(struct buf *ob, struct buf *link) {
size_t eq, ex, end;
eq = link->size - 1;
while (eq > 0 && (link->data[eq - 1] != ' ' || link->data[eq] != '='))
eq -= 1;
if (eq <= 0) return 0;
ex = eq + 1;
while (ex < link->size
&& link->data[ex] >= '0' && link->data[ex] <= '9')
ex += 1;
if (ex >= link->size || ex == eq + 1 || link->data[ex] != 'x') return 0;
end = ex + 1;
while (end < link->size
&& link->data[end] >= '0' && link->data[end] <= '9')
end += 1;
if (end == ex + 1) return 0;
/* everything is fine, proceeding to actual printing */
lus_attr_escape(ob, link->data, eq - 1);
BUFPUTSL(ob, "\" width=");
bufput(ob, link->data + eq + 1, ex - eq - 1);
BUFPUTSL(ob, " height=");
bufput(ob, link->data + ex + 1, end - ex - 1);
return 1; }
static int
discount_image(struct buf *ob, struct buf *link, struct buf *title,
struct buf *alt, int xhtml) {
if (!link || !link->size) return 0;
BUFPUTSL(ob, "<img src=\"");
if (!print_link_wxh(ob, link)) {
lus_attr_escape(ob, link->data, link->size);
bufputc(ob, '"'); }
BUFPUTSL(ob, " alt=\"");
if (alt && alt->size)
lus_attr_escape(ob, alt->data, alt->size);
if (title && title->size) {
BUFPUTSL(ob, "\" title=\"");
lus_attr_escape(ob, title->data, title->size); }
bufputs(ob, xhtml ? "\" />" : "\">");
return 1; }
static int
html_discount_image(struct buf *ob, struct buf *link, struct buf *title,
struct buf *alt, void *opaque) {
return discount_image(ob, link, title, alt, 0); }
static int
xhtml_discount_image(struct buf *ob, struct buf *link, struct buf *title,
struct buf *alt, void *opaque) {
return discount_image(ob, link, title, alt, 1); }
static int
discount_link(struct buf *ob, struct buf *link, struct buf *title,
struct buf *content, void *opaque) {
if (!link) return rndr_link(ob, link, title, content, opaque);
else if (link->size > 5 && !strncasecmp(link->data, "abbr:", 5)) {
BUFPUTSL(ob, "<abbr title=\"");
lus_attr_escape(ob, link->data + 5, link->size - 5);
BUFPUTSL(ob, "\">");
bufput(ob, content->data, content->size);
BUFPUTSL(ob, "</abbr>");
return 1; }
else if (link->size > 6 && !strncasecmp(link->data, "class:", 6)) {
BUFPUTSL(ob, "<span class=\"");
lus_attr_escape(ob, link->data + 6, link->size - 6);
BUFPUTSL(ob, "\">");
bufput(ob, content->data, content->size);
BUFPUTSL(ob, "</span>");
return 1; }
else if (link->size > 3 && !strncasecmp(link->data, "id:", 3)) {
BUFPUTSL(ob, "<a id=\"");
lus_attr_escape(ob, link->data + 3, link->size - 3);
BUFPUTSL(ob, "\">");
bufput(ob, content->data, content->size);
BUFPUTSL(ob, "</span>");
return 1; }
else if (link->size > 4 && !strncasecmp(link->data, "raw:", 4)) {
lus_attr_escape(ob, link->data + 4, link->size - 4);
return 1; }
return rndr_link(ob, link, title, content, opaque); }
static void
discount_blockquote(struct buf *ob, struct buf *text, void *opaque) {
size_t i = 5, size = text->size;
char *data = text->data;
if (text->size < 5 || strncasecmp(text->data, "<p>%", 4)) {
rndr_blockquote(ob, text, opaque);
return; }
while (i < size && data[i] != '\n' && data[i] != '%')
i += 1;
if (i >= size || data[i] != '%') {
rndr_blockquote(ob, text, opaque);
return; }
BUFPUTSL(ob, "<div class=\"");
bufput(ob, text->data + 4, i - 4);
BUFPUTSL(ob, "\"><p>");
i += 1;
if (i + 4 >= text->size && !strncasecmp(text->data + i, "</p>", 4)) {
size_t old_i = i;
i += 4;
while (i + 3 < text->size
&& (data[i] != '<' || data[i + 1] != 'p' || data[i + 2] != '>'))
i += 1;
if (i + 3 >= text->size) i = old_i; }
bufput(ob, text->data + i, text->size - i);
BUFPUTSL(ob, "</div>\n"); }
static void
discount_table(struct buf *ob, struct buf *head_row, struct buf *rows,
void *opaque) {
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<table>\n");
if (head_row) {
BUFPUTSL(ob, "<thead>\n");
bufput(ob, head_row->data, head_row->size);
BUFPUTSL(ob, "</thead>\n<tbody>\n"); }
if (rows)
bufput(ob, rows->data, rows->size);
if (head_row)
BUFPUTSL(ob, "</tbody>\n");
BUFPUTSL(ob, "</table>\n"); }
static void
discount_table_row(struct buf *ob, struct buf *cells, int flags, void *opaque){
(void)flags;
BUFPUTSL(ob, " <tr>\n");
if (cells) bufput(ob, cells->data, cells->size);
BUFPUTSL(ob, " </tr>\n"); }
static void
discount_table_cell(struct buf *ob, struct buf *text, int flags, void *opaque){
if (flags & MKD_CELL_HEAD)
BUFPUTSL(ob, " <th");
else
BUFPUTSL(ob, " <td");
switch (flags & MKD_CELL_ALIGN_MASK) {
case MKD_CELL_ALIGN_LEFT:
BUFPUTSL(ob, " align=\"left\"");
break;
case MKD_CELL_ALIGN_RIGHT:
BUFPUTSL(ob, " align=\"right\"");
break;
case MKD_CELL_ALIGN_CENTER:
BUFPUTSL(ob, " align=\"center\"");
break; }
bufputc(ob, '>');
if (text) bufput(ob, text->data, text->size);
if (flags & MKD_CELL_HEAD)
BUFPUTSL(ob, "</th>\n");
else
BUFPUTSL(ob, "</td>\n"); }
/* exported renderer structures */
const struct mkd_renderer discount_html = {
NULL,
NULL,
rndr_blockcode,
discount_blockquote,
rndr_raw_block,
rndr_header,
html_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
discount_table,
discount_table_cell,
discount_table_row,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
html_discount_image,
html_linebreak,
discount_link,
rndr_raw_inline,
rndr_triple_emphasis,
NULL,
rndr_normal_text,
64,
"*_",
NULL };
const struct mkd_renderer discount_xhtml = {
NULL,
NULL,
rndr_blockcode,
discount_blockquote,
rndr_raw_block,
rndr_header,
xhtml_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
discount_table,
discount_table_cell,
discount_table_row,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
xhtml_discount_image,
xhtml_linebreak,
discount_link,
rndr_raw_inline,
rndr_triple_emphasis,
NULL,
rndr_normal_text,
64,
"*_",
NULL };
/****************************
* NATACHA'S OWN EXTENSIONS *
****************************/
static void
nat_span(struct buf *ob, struct buf *text, char *tag) {
bufprintf(ob, "<%s>", tag);
bufput(ob, text->data, text->size);
bufprintf(ob, "</%s>", tag); }
static int
nat_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size || c == '+' || c == '-') return 0;
if (c == '|') nat_span(ob, text, "span");
else nat_span(ob, text, "em");
return 1; }
static int
nat_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size || c == '|') return 0;
if (c == '+') nat_span(ob, text, "ins");
else if (c == '-') nat_span(ob, text, "del");
else nat_span(ob, text, "strong");
return 1; }
static int
nat_triple_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) {
if (!text || !text->size || c == '+' || c == '-' || c == '|') return 0;
BUFPUTSL(ob, "<strong><em>");
bufput(ob, text->data, text->size);
BUFPUTSL(ob, "</em></strong>");
return 1; }
static void
nat_header(struct buf *ob, struct buf *text, int level, void *opaque) {
size_t i = 0;
if (ob->size) bufputc(ob, '\n');
while (i < text->size && (text->data[i] == '-' || text->data[i] == '_'
|| text->data[i] == '.' || text->data[i] == ':'
|| (text->data[i] >= 'a' && text->data[i] <= 'z')
|| (text->data[i] >= 'A' && text->data[i] <= 'Z')
|| (text->data[i] >= '0' && text->data[i] <= '0')))
i += 1;
bufprintf(ob, "<h%d", level);
if (i < text->size && text->data[i] == '#') {
bufprintf(ob, " id=\"%.*s\">", (int)i, text->data);
i += 1; }
else {
bufputc(ob, '>');
i = 0; }
bufput(ob, text->data + i, text->size - i);
bufprintf(ob, "</h%d>\n", level); }
static void
nat_paragraph(struct buf *ob, struct buf *text, void *opaque) {
size_t i = 0;
if (ob->size) bufputc(ob, '\n');
BUFPUTSL(ob, "<p");
if (text && text->size && text->data[0] == '(') {
i = 1;
while (i < text->size && (text->data[i] == ' '
/* this seems to be a bit more restrictive than */
/* what is allowed for class names */
|| (text->data[i] >= 'a' && text->data[i] <= 'z')
|| (text->data[i] >= 'A' && text->data[i] <= 'Z')
|| (text->data[i] >= '0' && text->data[i] <= '0')))
i += 1;
if (i < text->size && text->data[i] == ')') {
bufprintf(ob, " class=\"%.*s\"",
(int)(i - 1), text->data + 1);
i += 1; }
else i = 0; }
bufputc(ob, '>');
if (text) bufput(ob, text->data + i, text->size - i);
BUFPUTSL(ob, "</p>\n"); }
/* exported renderer structures */
const struct mkd_renderer nat_html = {
NULL,
NULL,
rndr_blockcode,
discount_blockquote,
rndr_raw_block,
nat_header,
html_hrule,
rndr_list,
rndr_listitem,
nat_paragraph,
NULL,
NULL,
NULL,
rndr_autolink,
rndr_codespan,
nat_double_emphasis,
nat_emphasis,
html_discount_image,
html_linebreak,
discount_link,
rndr_raw_inline,
nat_triple_emphasis,
NULL,
rndr_normal_text,
64,
"*_-+|",
NULL };
const struct mkd_renderer nat_xhtml = {
NULL,
NULL,
rndr_blockcode,
discount_blockquote,
rndr_raw_block,
nat_header,
xhtml_hrule,
rndr_list,
rndr_listitem,
nat_paragraph,
NULL,
NULL,
NULL,
rndr_autolink,
rndr_codespan,
nat_double_emphasis,
nat_emphasis,
xhtml_discount_image,
xhtml_linebreak,
discount_link,
rndr_raw_inline,
nat_triple_emphasis,
NULL,
rndr_normal_text,
64,
"*_-+|",
NULL };
/* renderers.h - example markdown renderers */
/*
* Copyright (c) 2009, Natacha Porté
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef MARKDOWN_RENDERERS_H
#define MARKDOWN_RENDERERS_H
#include "markdown.h"
/*****************************
* EXPORTED HELPER FUNCTIONS *
*****************************/
/* lus_attr_escape • copy the buffer entity-escaping '<', '>', '&' and '"' */
void
lus_attr_escape(struct buf *ob, char *src, size_t size);
/* lus_body_escape • copy the buffer entity-escaping '<', '>' and '&' */
void
lus_body_escape(struct buf *ob, char *src, size_t size);
/***********************
* RENDERER STRUCTURES *
***********************/
/* original markdown renderers */
extern const struct mkd_renderer mkd_html; /* HTML 4 renderer */
extern const struct mkd_renderer mkd_xhtml; /* XHTML 1.0 renderer */
/* renderers with some discount extensions */
extern const struct mkd_renderer discount_html;
extern const struct mkd_renderer discount_xhtml;
/* renderers with Natasha's own extensions */
extern const struct mkd_renderer nat_html;
extern const struct mkd_renderer nat_xhtml;
#endif /* ndef MARKDOWN_RENDERERS_H */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment