-
-
Save kkestell/2fb1f5d5bbc4bf00ed594d8f78ed6ccb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "utf8.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
utf8_int8_t *ib, *it, *ob; | |
size_t len = 0, cap = 0; | |
void parse_text(); | |
void parse_heading(); | |
void parse_unordered_list(); | |
void parse_ordered_list(); | |
void parse_code_block(); | |
void parse_blockquote(); | |
void parse_horizontal_rule(); | |
void parse_paragraph(); | |
void load(const char *filename); | |
void save(const char *filename); | |
utf8_int32_t peek(); | |
void pop(int n); | |
void skip_whitespace(); | |
void put_codepoint(utf8_int32_t codepoint); | |
void put_string(const char *str); | |
int main(int argc, char *argv[]) | |
{ | |
if (argc < 3) | |
{ | |
printf("Usage: %s <input_file> <output_file>\n", argv[0]); | |
return 1; | |
} | |
load(argv[1]); | |
while (1) | |
{ | |
utf8_int32_t codepoint = peek(); | |
if (codepoint == 0) | |
{ | |
break; | |
} | |
switch (codepoint) | |
{ | |
case '#': | |
parse_heading(); | |
break; | |
case '*': | |
parse_unordered_list(); | |
break; | |
case '1': | |
parse_ordered_list(); | |
break; | |
case '`': | |
parse_code_block(); | |
break; | |
case '>': | |
parse_blockquote(); | |
break; | |
case '-': | |
parse_horizontal_rule(); | |
break; | |
default: | |
parse_paragraph(); | |
break; | |
} | |
while (peek() == '\n') | |
{ | |
pop(1); | |
} | |
} | |
save(argv[2]); | |
free(ib); | |
free(ob); | |
return 0; | |
} | |
void parse_text() | |
{ | |
while (peek() != '\n' && peek() != 0) | |
{ | |
if (peek() == '*') | |
{ | |
pop(1); // * | |
if (peek() == '*') | |
{ | |
pop(1); // * | |
put_string("<strong>"); | |
while (!(it[0] == '*' && it[1] == '*') && peek() != '\n' && peek() != 0) | |
{ | |
put_codepoint(peek()); | |
pop(1); | |
} | |
pop(2); // ** | |
put_string("</strong>"); | |
} | |
else | |
{ | |
put_string("<em>"); | |
while (peek() != '*' && peek() != '\n' && peek() != 0) | |
{ | |
put_codepoint(peek()); | |
pop(1); | |
} | |
pop(1); // * | |
put_string("</em>"); | |
} | |
} | |
else if (peek() == '[') | |
{ | |
pop(1); // [ | |
utf8_int32_t text[256]; | |
size_t text_len = 0; | |
while (peek() != ']') | |
{ | |
utf8_int32_t codepoint = peek(); | |
text[text_len++] = codepoint; | |
pop(1); | |
} | |
pop(2); // ]( | |
utf8_int32_t link[256]; | |
size_t link_len = 0; | |
while (peek() != ')') | |
{ | |
utf8_int32_t codepoint = peek(); | |
link[link_len++] = codepoint; | |
pop(1); | |
} | |
pop(1); // ) | |
put_string("<a href=\""); | |
for (size_t i = 0; i < link_len; i++) | |
{ | |
put_codepoint(link[i]); | |
} | |
put_string("\">"); | |
for (size_t i = 0; i < text_len; i++) | |
{ | |
put_codepoint(text[i]); | |
} | |
put_string("</a>"); | |
} | |
else | |
{ | |
put_codepoint(peek()); | |
pop(1); | |
} | |
} | |
} | |
void parse_heading() | |
{ | |
int level = 0; | |
while (peek() == '#') | |
{ | |
level++; | |
pop(1); // # | |
} | |
put_string("<h"); | |
put_codepoint('0' + level); | |
put_string(">"); | |
skip_whitespace(); | |
parse_text(); | |
put_string("</h"); | |
put_codepoint('0' + level); | |
put_string(">\n"); | |
} | |
void parse_unordered_list() | |
{ | |
put_string("<ul>\n"); | |
while (peek() == '*') | |
{ | |
pop(1); // * | |
skip_whitespace(); | |
put_string("<li>"); | |
parse_text(); | |
put_string("</li>\n"); | |
pop(1); // \n | |
} | |
put_string("</ul>\n"); | |
} | |
void parse_ordered_list() | |
{ | |
put_string("<ol>\n"); | |
int index = 1; | |
while (peek() == '0' + index) | |
{ | |
pop(2); // 1. | |
skip_whitespace(); | |
put_string("<li>"); | |
parse_text(); | |
put_string("</li>\n"); | |
pop(1); // \n | |
index++; | |
} | |
put_string("</ol>\n"); | |
} | |
void parse_code_block() | |
{ | |
pop(3); // ``` | |
put_string("<pre><code>"); | |
while (peek() != '`' && peek() != 0) | |
{ | |
put_codepoint(peek()); | |
pop(1); | |
} | |
put_string("</code></pre>\n"); | |
pop(3); // ``` | |
} | |
void parse_blockquote() | |
{ | |
put_string("<blockquote>"); | |
pop(1); // > | |
skip_whitespace(); | |
parse_text(); | |
put_string("</blockquote>\n"); | |
} | |
void parse_horizontal_rule() | |
{ | |
pop(3); // --- | |
put_string("<hr>\n"); | |
} | |
void parse_paragraph() | |
{ | |
put_string("<p>"); | |
parse_text(); | |
put_string("</p>\n"); | |
} | |
void load(const char *filename) | |
{ | |
FILE *file = fopen(filename, "r"); | |
if (!file) | |
{ | |
perror("Error opening file"); | |
exit(EXIT_FAILURE); | |
} | |
fseek(file, 0, SEEK_END); | |
long size = ftell(file); | |
rewind(file); | |
ib = (utf8_int8_t *)malloc(size + 1); | |
if (!ib) | |
{ | |
fprintf(stderr, "Memory allocation failed\n"); | |
fclose(file); | |
exit(EXIT_FAILURE); | |
} | |
fread(ib, 1, size, file); | |
ib[size] = '\0'; | |
fclose(file); | |
it = ib; | |
ob = (utf8_int8_t *)malloc(cap); | |
if (!ob) | |
{ | |
fprintf(stderr, "Memory allocation failed\n"); | |
exit(EXIT_FAILURE); | |
} | |
} | |
void save(const char *filename) | |
{ | |
FILE *file = fopen(filename, "w"); | |
fwrite(ob, 1, len, file); | |
fclose(file); | |
} | |
utf8_int32_t peek() | |
{ | |
utf8_int32_t codepoint = 0; | |
utf8codepoint(it, &codepoint); | |
return codepoint; | |
} | |
void pop(int n) | |
{ | |
for (int i = 0; i < n; i++) | |
{ | |
utf8_int32_t codepoint = 0; | |
it = utf8codepoint(it, &codepoint); | |
} | |
} | |
void skip_whitespace() | |
{ | |
while (peek() == ' ') | |
{ | |
pop(1); | |
} | |
} | |
void put_codepoint(utf8_int32_t codepoint) | |
{ | |
utf8_int8_t encoded[5] = {0}; | |
utf8_int8_t *encoded_ptr = encoded; | |
utf8catcodepoint(encoded, codepoint, sizeof(encoded)); | |
while (*encoded_ptr) | |
{ | |
if (len + 1 >= cap) | |
{ | |
size_t new_cap = cap == 0 ? 8 : cap * 2; | |
utf8_int8_t *temp_ob = (utf8_int8_t *)realloc(ob, new_cap); | |
if (temp_ob == NULL) | |
{ | |
fprintf(stderr, "Memory allocation failed\n"); | |
exit(EXIT_FAILURE); | |
} | |
ob = temp_ob; | |
cap = new_cap; | |
} | |
ob[len++] = *encoded_ptr++; | |
} | |
} | |
void put_string(const char *str) | |
{ | |
while (*str) | |
{ | |
put_codepoint(*str++); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment