Created
May 9, 2021 00:09
-
-
Save etscrivner/9880e0a247ebdee42326b9d179268d6f to your computer and use it in GitHub Desktop.
Jasper is a simple markdown parser that uses successive application of rewriting rules to apply markdown to a file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// jasper.c - Processor for custom flavor of markdown called jasperstone | |
// | |
// Description: | |
// | |
// Jasperstone processes a markup file by applying a set of rewriting rules to | |
// the document. Each rewriting rule processes the entire document replacing a | |
// particular piece of syntax. The end result is a file that is completely | |
// marked up without the need for a complex parser. | |
#include <inttypes.h> | |
#include <stdarg.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include "language_layer.h" | |
#include "memory_arena.h" | |
#define MemorySize Megabytes(64) | |
/////////////////////////////////////////////////////////////////////////////// | |
String_UTF8 KebabCaseName(String_UTF8 Name, Memory_Arena *Arena) | |
{ | |
String_UTF8 Result = {0}; | |
Result.Str = ArenaAlloc(Arena, Name.Length); | |
Result.Length = 0; | |
b32 IsFirstSpace = true; | |
for (u32 I = 0; I < Name.Length; ++I) { | |
if (Name.Str[I] == ' ' || Name.Str[I] == '\t') { | |
if (IsFirstSpace) { | |
Result.Str[Result.Length++] = '-'; | |
IsFirstSpace = false; | |
} | |
continue; | |
} else { | |
if (IsCapitalLetter(Name.Str[I])) { | |
Result.Str[Result.Length++] = ToLower(Name.Str[I]); | |
} else { | |
Result.Str[Result.Length++] = Name.Str[I]; | |
} | |
IsFirstSpace = true; | |
} | |
} | |
return(Result); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
typedef struct Rule_Rewriter_Tag Rule_Rewriter; | |
typedef void Rewriter_Function(Rule_Rewriter*); | |
typedef enum { | |
REWRITE_RULE_pair, | |
REWRITE_RULE_list, | |
REWRITE_RULE_custom | |
} Rewrite_Rule_Type; | |
typedef struct { | |
char *Name; | |
Rewrite_Rule_Type Type; | |
union { | |
struct { | |
char *PairStr; | |
char *TagBegin; | |
char *TagEnd; | |
} Pair; | |
struct { | |
char *ItemPrefix; | |
char *TagBegin; | |
char *TagEnd; | |
} List; | |
Rewriter_Function *Rewrite; | |
}; | |
} Rewrite_Rule; | |
typedef struct Rule_Rewriter_Tag { | |
String_UTF8 Source; | |
String_UTF8 Working; | |
u32 SourceIndex; | |
u32 WorkingIndex; | |
Memory_Arena *Arena; | |
} Rule_Rewriter; | |
Rule_Rewriter RuleRewriterInit(char *FilePath, Memory_Arena *Arena) | |
{ | |
Rule_Rewriter Result = {0}; | |
ArenaReadFile(FilePath, Arena, &Result.Source); | |
Result.Working.Str = Arena->Base + Arena->Used; | |
Result.Working.Length = Arena->Size - Arena->Used; | |
Result.Arena = Arena; | |
return(Result); | |
} | |
void RewriterCompleteRule(Rule_Rewriter *Rewriter) | |
{ | |
memcpy(Rewriter->Arena->Base, Rewriter->Working.Str, Rewriter->WorkingIndex); | |
Rewriter->Arena->Used = Rewriter->WorkingIndex; | |
Rewriter->Source.Str = Rewriter->Arena->Base; | |
Rewriter->Source.Length = Rewriter->WorkingIndex; | |
Rewriter->SourceIndex = 0; | |
Rewriter->Working.Str = Rewriter->Arena->Base + Rewriter->Arena->Used; | |
Rewriter->Working.Length = Rewriter->Arena->Size - Rewriter->Arena->Used; | |
Rewriter->WorkingIndex = 0; | |
} | |
b32 RewriterHasSourceText(Rule_Rewriter *Rewriter) { | |
return(Rewriter->SourceIndex < Rewriter->Source.Length); | |
} | |
b32 RewriterHasWorkingSpace(Rule_Rewriter *Rewriter) { | |
return(Rewriter->WorkingIndex < Rewriter->Working.Length); | |
} | |
b32 RewriterIsValid(Rule_Rewriter *Rewriter) { | |
return(RewriterHasSourceText(Rewriter) && RewriterHasWorkingSpace(Rewriter)); | |
} | |
void RewriterSourceAdvance(Rule_Rewriter *Rewriter, u32 Length) { | |
Assert(Rewriter->SourceIndex + Length <= Rewriter->Source.Length); | |
Rewriter->SourceIndex += Length; | |
} | |
void RewriterCopyNext(Rule_Rewriter *Rewriter) { | |
Assert(Rewriter->SourceIndex < Rewriter->Source.Length); | |
Assert(Rewriter->WorkingIndex < Rewriter->Working.Length); | |
u32 Length = UTF8CodepointLengthBytes(Rewriter->Source.Str[Rewriter->SourceIndex]); | |
Assert(Rewriter->SourceIndex + Length <= Rewriter->Source.Length); | |
Assert(Rewriter->WorkingIndex + Length <= Rewriter->Working.Length); | |
memcpy( | |
Rewriter->Working.Str + Rewriter->WorkingIndex, | |
Rewriter->Source.Str + Rewriter->SourceIndex, | |
Length | |
); | |
Rewriter->WorkingIndex += Length; | |
Rewriter->SourceIndex += Length; | |
} | |
void RewriterAdvanceAndCopy(Rule_Rewriter *Rewriter, u32 Length) { | |
for (u32 I = 0; I < Length; ++I) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
b32 RewriterSourceMatches(Rule_Rewriter *Rewriter, char *Match) { | |
b32 Result = false; | |
u32 MatchLength = strlen(Match); | |
if (Rewriter->SourceIndex + MatchLength <= Rewriter->Source.Length) { | |
if (memcmp(Rewriter->Source.Str + Rewriter->SourceIndex, Match, MatchLength) == 0) { | |
Result = true; | |
} | |
} | |
return(Result); | |
} | |
b32 RewriterSourceMatchesCh(Rule_Rewriter *Rewriter, char Ch) { | |
b32 Result = false; | |
if (Rewriter->Source.Str[Rewriter->SourceIndex] == Ch) { | |
Result = true; | |
} | |
return(Result); | |
} | |
void RewriterPutc(Rule_Rewriter *Rewriter, char Ch) { | |
if (Rewriter->WorkingIndex + 1 < Rewriter->Working.Length) { | |
Rewriter->Working.Str[Rewriter->WorkingIndex++] = Ch; | |
} | |
} | |
void RewriterPut(Rule_Rewriter *Rewriter, char *Text) { | |
u32 TextLength = strlen(Text); | |
if (Rewriter->WorkingIndex + TextLength < Rewriter->Working.Length) { | |
memcpy(Rewriter->Working.Str + Rewriter->WorkingIndex, Text, TextLength); | |
Rewriter->WorkingIndex += TextLength; | |
} | |
} | |
void RewriterPutf(Rule_Rewriter *Rewriter, char *Format, ...) { | |
va_list List; | |
va_start(List, Format); | |
Rewriter->WorkingIndex += vsnprintf( | |
(char*)(Rewriter->Working.Str + Rewriter->WorkingIndex), | |
Rewriter->Working.Length - Rewriter->WorkingIndex, | |
Format, | |
List | |
); | |
va_end(List); | |
} | |
void RewriterSkipWhitespace(Rule_Rewriter *Rewriter) { | |
while (RewriterIsValid(Rewriter) && | |
(RewriterSourceMatchesCh(Rewriter, ' ') || | |
RewriterSourceMatchesCh(Rewriter, '\t'))) { | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
} | |
b32 RewriterSkipCode(Rule_Rewriter *Rewriter) { | |
b32 SkippedCode = false; | |
if (RewriterIsValid(Rewriter) && RewriterSourceMatches(Rewriter, "```")) { | |
SkippedCode = true; | |
RewriterAdvanceAndCopy(Rewriter, 3); | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "```")) { | |
RewriterAdvanceAndCopy(Rewriter, 3); | |
break; | |
} else { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
return(SkippedCode); | |
} | |
void RewritePair(Rule_Rewriter *Rewriter, Rewrite_Rule *Rule) | |
{ | |
b32 InPairTag = false; | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, Rule->Pair.PairStr)) { | |
RewriterSourceAdvance(Rewriter, strlen(Rule->Pair.PairStr)); | |
if (InPairTag) { | |
InPairTag = false; | |
RewriterPut(Rewriter, Rule->Pair.TagEnd); | |
} else { | |
InPairTag = true; | |
RewriterPut(Rewriter, Rule->Pair.TagBegin); | |
} | |
} else { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
} | |
void RewriteParagraph(Rule_Rewriter *Rewriter) { | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatchesCh(Rewriter, '#') || | |
RewriterSourceMatches(Rewriter, "![") || | |
RewriterSourceMatchesCh(Rewriter, '-')) { | |
// Ignore titles and images on their own separate lines (don't wrap them | |
// in paragraphs) | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, '\n')) { | |
RewriterCopyNext(Rewriter); | |
} | |
} else if (RewriterSourceMatches(Rewriter, "<p>")) { | |
// Prevent wrapping a paragraph in another paragraph | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "</p>")) { | |
RewriterSourceAdvance(Rewriter, 4); | |
RewriterPut(Rewriter, "</p>"); | |
break; | |
} | |
RewriterCopyNext(Rewriter); | |
} | |
} else if (!RewriterSourceMatchesCh(Rewriter, ' ') && | |
!RewriterSourceMatchesCh(Rewriter, '\t') && | |
!RewriterSourceMatchesCh(Rewriter, '\n')) { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterPut(Rewriter, "<p>\n"); | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "\n\n")) { | |
RewriterSourceAdvance(Rewriter, 2); | |
break; | |
} | |
RewriterCopyNext(Rewriter); | |
} | |
RewriterPut(Rewriter, "\n</p>\n\n"); | |
} | |
} else { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
} | |
void RewriteTitle(Rule_Rewriter *Rewriter) { | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatchesCh(Rewriter, '#')) { | |
u32 TitleDepth = 0; | |
while (RewriterIsValid(Rewriter) && RewriterSourceMatchesCh(Rewriter, '#')) { | |
++TitleDepth; | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
RewriterSkipWhitespace(Rewriter); | |
RewriterPutf(Rewriter, "<h%d>", TitleDepth); | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, '\n')) { | |
RewriterCopyNext(Rewriter); | |
} | |
RewriterPutf(Rewriter, "</h%d>", TitleDepth); | |
} else { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
} | |
void RewriteExternalLink(Rule_Rewriter *Rewriter) { | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "![")) { | |
// Skip image stuff | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ')')) { | |
RewriterCopyNext(Rewriter); | |
} | |
} else if (RewriterSourceMatchesCh(Rewriter, '[')) { | |
RewriterSourceAdvance(Rewriter, 1); | |
String_UTF8 LinkText = {0}; | |
LinkText.Str = Rewriter->Source.Str + Rewriter->SourceIndex; | |
LinkText.Length = 0; | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ']')) { | |
++LinkText.Length; | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
RewriterSourceAdvance(Rewriter, 1); | |
Assert(RewriterSourceMatchesCh(Rewriter, '(')); | |
RewriterSourceAdvance(Rewriter, 1); | |
String_UTF8 LinkURL = {0}; | |
LinkURL.Str = Rewriter->Source.Str + Rewriter->SourceIndex; | |
LinkURL.Length = 0; | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ')')) { | |
++LinkURL.Length; | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
RewriterSourceAdvance(Rewriter, 1); | |
RewriterPutf( | |
Rewriter, "<a href=\"%.*s\" target=\"_blank\">%.*s</a>", | |
(int)LinkURL.Length, | |
LinkURL.Str, | |
(int)LinkText.Length, | |
LinkText.Str | |
); | |
} else { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
} | |
void RewriterKebabCaseName(Rule_Rewriter *Rewriter, String_UTF8 Name) | |
{ | |
b32 IsFirstSpace = false; | |
for (u32 I = 0; I < Name.Length; ++I) | |
{ | |
if (Name.Str[I] != ' ' && Name.Str[I] != '\t') { | |
RewriterPutc(Rewriter, Name.Str[I]); | |
if (!IsFirstSpace) { | |
IsFirstSpace = true; | |
} | |
} else if (IsFirstSpace) { | |
RewriterPutc(Rewriter, '-'); | |
IsFirstSpace = false; | |
} | |
} | |
} | |
void RewriteInternalLink(Rule_Rewriter *Rewriter) { | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "{{")) { | |
RewriterSourceAdvance(Rewriter, 2); | |
RewriterSkipWhitespace(Rewriter); | |
String_UTF8 PageName = {0}; | |
PageName.Str = Rewriter->Source.Str + Rewriter->SourceIndex; | |
PageName.Length = 0; | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatches(Rewriter, "}}")) { | |
++PageName.Length; | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
Assert(RewriterSourceMatches(Rewriter, "}}")); | |
RewriterSourceAdvance(Rewriter, 2); | |
if (RewriterSourceMatchesCh(Rewriter, '{')) { | |
// Allow for optional link text to follow | |
RewriterSourceAdvance(Rewriter, 1); | |
String_UTF8 LinkText = {0}; | |
LinkText.Str = Rewriter->Source.Str + Rewriter->SourceIndex; | |
LinkText.Length = 0; | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, '}')) { | |
++LinkText.Length; | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
RewriterSourceAdvance(Rewriter, 1); | |
RewriterPut(Rewriter, "<a href=\""); | |
RewriterKebabCaseName(Rewriter, PageName); | |
RewriterPutf(Rewriter, ".html\">%.*s</a>", (int)LinkText.Length, LinkText.Str); | |
} else { | |
// Otherwise, use kebab case of the page name as link name | |
RewriterPut(Rewriter, "<a href=\""); | |
RewriterKebabCaseName(Rewriter, PageName); | |
RewriterPutf(Rewriter, ".html\">%.*s</a>", (int)PageName.Length, PageName.Str); | |
} | |
} else { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
} | |
void RewriteImage(Rule_Rewriter *Rewriter) { | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "![")) { | |
RewriterSourceAdvance(Rewriter, 2); | |
String_UTF8 AltText = {0}; | |
AltText.Str = Rewriter->Source.Str + Rewriter->SourceIndex; | |
AltText.Length = 0; | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ']')) { | |
++AltText.Length; | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
RewriterSourceAdvance(Rewriter, 1); | |
Assert(RewriterSourceMatchesCh(Rewriter, '(')); | |
RewriterSourceAdvance(Rewriter, 1); | |
String_UTF8 ImageSource = {0}; | |
ImageSource.Str = Rewriter->Source.Str + Rewriter->SourceIndex; | |
ImageSource.Length = 0; | |
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ')')) { | |
++ImageSource.Length; | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
RewriterSourceAdvance(Rewriter, 1); | |
RewriterPutf( | |
Rewriter, | |
"<img src=\"%.*s\" alt=\"%.*s\" />", | |
(int)ImageSource.Length, | |
ImageSource.Str, | |
(int)AltText.Length, | |
AltText.Str | |
); | |
} else { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
} | |
void RewriteList(Rule_Rewriter *Rewriter, Rewrite_Rule *Rule) { | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, Rule->List.ItemPrefix)) { | |
RewriterPutf(Rewriter, "%s\n", Rule->List.TagBegin); | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, Rule->List.ItemPrefix)) { | |
RewriterSourceAdvance(Rewriter, 1); | |
RewriterPut(Rewriter, "<li>"); | |
RewriterSkipWhitespace(Rewriter); | |
} else if (RewriterSourceMatchesCh(Rewriter, '\n')) { | |
RewriterPut(Rewriter, "</li>\n"); | |
if (RewriterSourceMatches(Rewriter, "\n\n")) { | |
RewriterSourceAdvance(Rewriter, 2); | |
break; | |
} else { | |
RewriterSourceAdvance(Rewriter, 1); | |
} | |
} else { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
RewriterPutf(Rewriter, "%s\n\n", Rule->List.TagEnd); | |
} else { | |
if (!RewriterSkipCode(Rewriter)) { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
} | |
void RewriteCode(Rule_Rewriter *Rewriter) { | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "```")) { | |
RewriterPut(Rewriter, "<code>"); | |
RewriterSourceAdvance(Rewriter, 3); | |
while (RewriterIsValid(Rewriter)) { | |
if (RewriterSourceMatches(Rewriter, "```")) { | |
RewriterSourceAdvance(Rewriter, 3); | |
RewriterPut(Rewriter, "</code>"); | |
break; | |
} else { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} else { | |
RewriterCopyNext(Rewriter); | |
} | |
} | |
} | |
static Rewrite_Rule Rules[] = { | |
{ "bold", REWRITE_RULE_pair, .Pair = { "**", "<strong>", "</strong>" } }, | |
{ "italic", REWRITE_RULE_pair, .Pair = { "*", "<em>", "</em>" } }, | |
{ "strike", REWRITE_RULE_pair, .Pair = { "~", "<del>", "</del>" } }, | |
{ "underline", REWRITE_RULE_pair, .Pair = { "_", "<u>", "</u>" } }, | |
{ "paragraph", REWRITE_RULE_custom, .Rewrite = RewriteParagraph }, | |
{ "title", REWRITE_RULE_custom, .Rewrite = RewriteTitle }, | |
{ "image", REWRITE_RULE_custom, .Rewrite = RewriteImage }, | |
{ "unordered list", REWRITE_RULE_list, .List = { "-", "<ul>", "</ul>" } }, | |
{ "ordered list", REWRITE_RULE_list, .List = { "+", "<ol>", "</ol>" } }, | |
{ "external links", REWRITE_RULE_custom, .Rewrite = RewriteExternalLink }, | |
{ "internal links", REWRITE_RULE_custom, .Rewrite = RewriteInternalLink }, | |
{ "code", REWRITE_RULE_custom, .Rewrite = RewriteCode }, // code should be applied last | |
}; | |
String_UTF8 JasperstoneProcessFile(char *FilePath, Memory_Arena *Arena) | |
{ | |
Rule_Rewriter Rewriter = RuleRewriterInit(FilePath, Arena); | |
for (u32 RuleIndex = 0; RuleIndex < ArrayCount(Rules); ++RuleIndex) { | |
switch (Rules[RuleIndex].Type) { | |
case REWRITE_RULE_pair: | |
RewritePair(&Rewriter, Rules + RuleIndex); | |
break; | |
case REWRITE_RULE_list: | |
RewriteList(&Rewriter, Rules + RuleIndex); | |
break; | |
case REWRITE_RULE_custom: | |
Rules[RuleIndex].Rewrite(&Rewriter); | |
break; | |
default: | |
break; | |
} | |
RewriterCompleteRule(&Rewriter); | |
} | |
return(Rewriter.Source); | |
} | |
#ifdef JASPERSTONE_MAIN | |
void RewriterTestSuite(Rule_Rewriter *Rewriter) | |
{ | |
// Repeated application of rewriter rules should be safe (should not produce | |
// a change). | |
for (u32 RuleIndex = 0; RuleIndex < ArrayCount(Rules); ++RuleIndex) { | |
for (u32 I = 0; I < 100; ++I) { | |
if (Rules[RuleIndex].Type == REWRITE_RULE_pair) { | |
RewritePair(Rewriter, Rules + RuleIndex); | |
RewriterCompleteRule(Rewriter); | |
} else if (Rules[RuleIndex].Type == REWRITE_RULE_list) { | |
RewriteList(Rewriter, Rules + RuleIndex); | |
RewriterCompleteRule(Rewriter); | |
} else if (Rules[RuleIndex].Type == REWRITE_RULE_custom) { | |
Rules[RuleIndex].Rewrite(Rewriter); | |
RewriterCompleteRule(Rewriter); | |
} | |
} | |
} | |
printf("%.*s", (int)Rewriter->Source.Length, Rewriter->Source.Str); | |
} | |
int main(int argc, char** argv) { | |
int ReturnValue = 0; | |
if (argc >= 2) { | |
char *FilePath = argv[1]; | |
u8 *Memory = (u8*)calloc(1, MemorySize); | |
Memory_Arena Arena = ArenaInit(Memory, MemorySize); | |
#if 0 | |
Rule_Rewriter Rewriter = RuleRewriterInit(FilePath, &Arena); | |
RewriterTestSuite(&Rewriter); | |
#else | |
String_UTF8 Result = JasperstoneProcessFile(FilePath, &Arena); | |
printf("%.*s", (int)Result.Length, Result.Str); | |
#endif | |
free(Memory); | |
} else { | |
fprintf(stderr, "USAGE: jaspdown FILE\n"); | |
ReturnValue = 1; | |
} | |
return(ReturnValue); | |
} | |
#endif // JASPERSTONE_MAIN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef LANGUAGE_LAYER_H | |
#define LANGUAGE_LAYER_H | |
#include <assert.h> | |
#include <inttypes.h> | |
#include <stdbool.h> | |
#include <stdlib.h> | |
#include <string.h> | |
typedef int8_t i8; | |
typedef uint8_t u8; | |
typedef int16_t i16; | |
typedef uint16_t u16; | |
typedef int32_t i32; | |
typedef uint32_t u32; | |
typedef int64_t i64; | |
typedef uint64_t u64; | |
typedef uintptr_t umm; | |
typedef float f32; | |
typedef double f64; | |
typedef i8 b8; | |
typedef i16 b16; | |
typedef i32 b32; | |
#define Assert(X) assert(X) | |
#define Max(X, Y) (((X) > (Y)) ? (X) : (Y)) | |
#define Min(X, Y) (((X) < (Y)) ? (X) : (Y)) | |
#define ArrayCount(Array) ((sizeof(Array))/(sizeof((Array)[0]))) | |
#define Kilobytes(Value) ((Value)*1024LL) | |
#define Megabytes(Value) (Kilobytes(Value)*1024LL) | |
#define Gigabytes(Value) (Megabytes(Value)*1024LL) | |
void ZeroMemory(u8 *Memory, umm SizeBytes) | |
{ | |
memset(Memory, 0, SizeBytes); | |
} | |
#define ClearMemory(Value) ZeroMemory((u8*)&Value, sizeof(Value)); | |
/////////////////////////////////////////////////////////////////////////////// | |
b32 IsCapitalLetter(char Ch) { | |
return(Ch >= 'A' && Ch <= 'Z'); | |
} | |
char ToLower(char Ch) { | |
if (IsCapitalLetter(Ch)) { | |
return(Ch + 0x20); | |
} | |
return(Ch); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
typedef struct { | |
union { | |
void *Data; | |
u8 *Str; | |
}; | |
umm Length; | |
} String_UTF8; | |
String_UTF8 CreateStringUTF8(char *Value) | |
{ | |
String_UTF8 Result = {0}; | |
Result.Str = (u8*)Value; | |
Result.Length = strlen(Value); | |
return(Result); | |
} | |
i32 UTF8CompareCStr(String_UTF8* Left, char* Str) | |
{ | |
i32 Result = 1; | |
u32 StrLength = strlen(Str); | |
if (Left->Length == StrLength) | |
{ | |
Result = memcmp(Left->Str, Str, StrLength); | |
} | |
return(Result); | |
} | |
i32 UTF8Compare(String_UTF8* Left, String_UTF8* Right) | |
{ | |
i32 Result = 1; | |
if (Left->Length == Right->Length) | |
{ | |
Result = memcmp(Left->Str, Right->Str, Left->Length); | |
} | |
return(Result); | |
} | |
b32 UTF8EndsWith(String_UTF8* Str, char *Postfix) | |
{ | |
b32 Result = false; | |
umm PostfixLength = strlen(Postfix); | |
if (Str->Length >= PostfixLength) | |
{ | |
if (memcmp(Str->Str + (Str->Length - PostfixLength), Postfix, PostfixLength) == 0) | |
{ | |
Result = true; | |
} | |
} | |
return(Result); | |
} | |
u32 UTF8CodepointLengthBytes(char Ch) | |
{ | |
if (((Ch >> 7) & 0x1) == 0) { | |
return(1); | |
} else if (((Ch >> 5) & 0x7) == 0x6) { | |
return(2); | |
} else if (((Ch >> 4) & 0xF) == 0xE) { | |
return(3); | |
} | |
return(4); | |
} | |
char* UTF8PreviousChar(char *Ptr, char *Start) | |
{ | |
// Step backwards until we find the first character that has bits 7 and 6 | |
// set. Any non-start multibyte character components will have bit 7 set and | |
// bit 6 clear. | |
do { | |
if (Ptr <= Start) { | |
return NULL; | |
} | |
Ptr--; | |
} while ((*Ptr & 0xC0) == 0x80); | |
return(Ptr); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
typedef struct { | |
String_UTF8 *Data; | |
u8 *At; | |
u8 *End; | |
u8 *Peek; | |
} Text_Stream; | |
Text_Stream StreamInit(String_UTF8 *Data) | |
{ | |
Text_Stream Result = {0}; | |
Result.Data = Data; | |
Result.At = Data->Str; | |
Result.End = Data->Str + Data->Length; | |
Result.Peek = Data->Str + UTF8CodepointLengthBytes(*Result.At); | |
return(Result); | |
} | |
b32 IsValid(Text_Stream *Stream) | |
{ | |
return(Stream->At >= Stream->Data->Str && Stream->At < Stream->End); | |
} | |
void StreamAdvance(Text_Stream *Stream, u32 NumBytes) | |
{ | |
Stream->At = Stream->At + NumBytes; | |
if (IsValid(Stream)) { | |
Stream->Peek = Stream->At + UTF8CodepointLengthBytes(*Stream->At); | |
} | |
} | |
void StreamReverse(Text_Stream *Stream, u32 NumBytes) | |
{ | |
Stream->At = Stream->At - NumBytes; | |
if (IsValid(Stream)) { | |
Stream->Peek = Stream->At + UTF8CodepointLengthBytes(*Stream->At); | |
} | |
} | |
u8* StreamNext(Text_Stream *Stream) | |
{ | |
if (IsValid(Stream)) { | |
Stream->At = Stream->Peek; | |
Stream->Peek = Stream->At + UTF8CodepointLengthBytes(*Stream->At); | |
} | |
return(Stream->At); | |
} | |
u8* StreamPrev(Text_Stream *Stream) | |
{ | |
if (IsValid(Stream)) { | |
Stream->Peek = Stream->At; | |
Stream->At = (u8*)UTF8PreviousChar((char*)Stream->Peek, (char*)Stream->Data->Str); | |
} | |
return(Stream->Peek); | |
} | |
b32 StreamCurrentMatchesCh(Text_Stream *Stream, char Ch) | |
{ | |
return(*Stream->At == Ch); | |
} | |
b32 StreamCurrentMatches(Text_Stream *Stream, char *Str) | |
{ | |
return(memcmp(Stream->At, Str, strlen(Str)) == 0); | |
} | |
b32 StreamIsAlpha(Text_Stream *Stream) | |
{ | |
return((*Stream->At >= 'a' && *Stream->At <= 'z') || (*Stream->At >= 'A' && *Stream->At <= 'Z')); | |
} | |
b32 StreamIsDigit(Text_Stream *Stream) | |
{ | |
return (*Stream->At >= '0' && *Stream->At <= '9'); | |
} | |
b32 StreamIsNumeric(Text_Stream *Stream) | |
{ | |
return ( | |
StreamIsDigit(Stream) || | |
((*Stream->At == '-' || *Stream->At == '+') && (*Stream->Peek >= '0' && *Stream->Peek <= '9')) || | |
((*Stream->At == '-' || *Stream->At == '+') && *Stream->Peek == '.') | |
); | |
} | |
b32 StreamIsEquals(Text_Stream *Stream) | |
{ | |
return (*Stream->At == ':' && *Stream->Peek == '='); | |
} | |
b32 StreamIsWhitespace(Text_Stream *Stream) | |
{ | |
return(*Stream->At == ' ' || *Stream->At == '\t'); | |
} | |
b32 StreamIsNewline(Text_Stream *Stream) | |
{ | |
return(*Stream->At == '\n' || (*Stream->At == '\r' && *Stream->Peek == '\n')); | |
} | |
#endif // LANGUAGE_LAYER_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef MEMORY_ARENA_H | |
#define MEMORY_ARENA_H | |
#include "language_layer.h" | |
#include <stdarg.h> | |
typedef struct Memory_Arena { | |
u8 *Base; | |
umm Size; | |
umm Used; | |
u32 ID; | |
u32 NumChildren; | |
u32 TempCount; | |
struct Memory_Arena *Parent; | |
} Memory_Arena; | |
// Useful macros | |
#define ArenaPushStruct(Arena_, Type_) (Type_*)ArenaAlloc(Arena_, sizeof(Type_)) | |
#define ArenaPushArray(Arena_, Count_, Type_) (Type_*)ArenaAlloc(Arena_, sizeof(Type_) * Count_) | |
Memory_Arena ArenaInit(u8 *Base, umm SizeBytes) | |
{ | |
Memory_Arena Result = {}; | |
Result.Base = Base; | |
Result.Size = SizeBytes; | |
Result.Used = 0; | |
Result.ID = 0; | |
Result.NumChildren = 0; | |
Result.TempCount = 0; | |
Result.Parent = NULL; | |
return(Result); | |
} | |
u8* ArenaAlloc(Memory_Arena* Arena, umm SizeBytes) | |
{ | |
umm TotalSize = Arena->Used + SizeBytes; | |
Assert(TotalSize <= Arena->Size); | |
u8* Result = Arena->Base + Arena->Used; | |
Arena->Used += SizeBytes; | |
return(Result); | |
} | |
void ArenaFree(Memory_Arena *Arena, umm SizeBytes) | |
{ | |
Assert(Arena->Used >= SizeBytes); | |
Arena->Used -= SizeBytes; | |
} | |
char* ArenaSnprintf(Memory_Arena *Arena, char *Format, ...) | |
{ | |
va_list List; | |
va_start(List, Format); | |
umm Available = Arena->Size - Arena->Used; | |
char *String = (char*)(Arena->Base + Arena->Used); | |
i32 Result = vsnprintf(String, Available, Format, List); | |
Assert(Result >= 0); // Crash on error result | |
if (Result <= Available) { | |
Arena->Used += Result; | |
} else if (Result > Available) { | |
Arena->Used += Available; | |
} | |
va_end(List); | |
return(String); | |
} | |
b32 ArenaReadFile(const char *FileName, Memory_Arena *Arena, String_UTF8 *Str) | |
{ | |
b32 Result = false; | |
FILE *File = fopen(FileName, "rb"); | |
if (File != NULL) { | |
fseek(File, 0, SEEK_END); | |
Str->Length = ftell(File); | |
fseek(File, 0, SEEK_SET); | |
Str->Str = ArenaAlloc(Arena, Str->Length); | |
fread(Str->Data, Str->Length, 1, File); | |
fclose(File); | |
Result = true; | |
} | |
return(Result); | |
} | |
b32 ArenaWriteFile(Memory_Arena *Arena, char *FileName) | |
{ | |
b32 Result = true; | |
FILE *DotFile = fopen(FileName, "wb"); | |
if (DotFile) { | |
fwrite(Arena->Base, Arena->Used, 1, DotFile); | |
fclose(DotFile); | |
} else { | |
Result = false; | |
} | |
return(Result); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
// child arenas | |
// ArenaPushChild pushes a child memory arena of Size bytes onto the Parent arena. | |
Memory_Arena ArenaPushChild(Memory_Arena *Parent, umm Size) | |
{ | |
Memory_Arena Result = {}; | |
Result.Base = ArenaAlloc(Parent, Size); | |
Result.Size = Size; | |
Result.Used = 0; | |
Result.Parent = Parent; | |
Result.ID = Parent->NumChildren; | |
Result.NumChildren = 0; | |
Parent->NumChildren++; | |
return(Result); | |
} | |
// ArenaPop removes the child arena from its parent, restoring the space it occupied. | |
void ArenaPopChild(Memory_Arena *Child) | |
{ | |
Memory_Arena *Parent = Child->Parent; | |
assert(Parent); | |
assert((Parent->NumChildren - 1) == Child->ID); | |
Parent->Used -= Child->Size; | |
// Zero out all used memory for later allocations | |
ZeroMemory(Parent->Base + Parent->Used, Child->Used); | |
Child->Parent = NULL; | |
Child->ID = 0; | |
Child->Used = 0; | |
--Parent->NumChildren; | |
} | |
#endif // MEMORY_ARENA_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment