Last active
June 16, 2022 14:04
-
-
Save DavideGalilei/147bf298122e825225829db09f3357a0 to your computer and use it in GitHub Desktop.
Mini XML parser in C (EOF are not handled correctly)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <errno.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include "seq.c" | |
typedef struct XmlNode XmlNode; | |
typedef Seq(XmlNode*) XmlNodes; | |
struct XmlNode { | |
char *name; | |
char *text; | |
XmlNodes *children; | |
}; | |
int fpeek(FILE *stream) { | |
int c; | |
c = fgetc(stream); | |
ungetc(c, stream); | |
return c; | |
} | |
#define INDENTATION 4 | |
void Xml_debug_(XmlNode *node, int depth) { | |
if (node == NULL) return; | |
else if (depth != 0) printf("%*c", depth * INDENTATION, ' '); | |
printf("<%s>\n", node->name); | |
if (node->text != NULL) printf("%*c%s\n", (depth + 1) * INDENTATION, ' ', node->text); | |
if (node->children != NULL) { | |
for (size_t i = 0; i < node->children->len; i++) { | |
Xml_debug_(node->children->data[i], depth + 1); | |
} | |
} | |
if (depth != 0) printf("%*c", depth * INDENTATION, ' '); | |
printf("</%s>\n", node->name); | |
} | |
void Xml_debug(XmlNode *node) { | |
Xml_debug_(node, 0); | |
} | |
void Xml_free(XmlNode *node) { | |
if (node == NULL) return; | |
if (node->name != NULL) free(node->name); | |
if (node->text != NULL) free(node->text); | |
if (node->children != NULL) { | |
for (size_t i = 0; i < node->children->len; i++) { | |
Xml_free(node->children->data[i]); | |
} | |
Seq_destroy(*node->children); | |
} | |
} | |
XmlNode *Xml_parse_file(FILE *stream) { | |
XmlNode *result = calloc(1, sizeof(*result)); | |
result->children = calloc(1, sizeof(*result->children)); | |
char c; | |
while ((c = fgetc(stream)) != EOF) { | |
// printf("%c", c); | |
// fflush(stdout); | |
switch (c) { | |
case ' ': | |
case '\t': | |
case '\r': | |
case '\n': { | |
continue; | |
} | |
case '<': { | |
size_t namelen = 0; | |
while ((c = fgetc(stream)) != '>') namelen++; | |
result->name = malloc(namelen + 1); | |
fseek(stream, -(namelen + 1), SEEK_CUR); | |
for (size_t i = 0; i < namelen; i++) result->name[i] = fgetc(stream); | |
result->name[namelen] = '\0'; | |
// printf("Name: %s\n", result->name); | |
fseek(stream, 1, SEEK_CUR); // Skip ">" | |
Seq(char) text = {0}; | |
while (1) { | |
while ((c = fgetc(stream)) != '<') { | |
if (text.len == 0 && (c == ' ' || c == '\n' || c == '\r' || c == '\t')) continue; | |
Seq_add(text, c); | |
}; | |
if (fpeek(stream) == '/') { | |
while ((c = fgetc(stream)) != '>'); | |
break; | |
} | |
else { | |
fseek(stream, -1, SEEK_CUR); | |
XmlNode *got = Xml_parse_file(stream); | |
// printf("\nNode debug: "); Xml_debug(got); | |
Seq_add((*result->children), got); | |
// printf("Seq len: %ld\n", result->children->len); | |
} | |
} | |
result->text = malloc(text.len + 1); | |
for (size_t i = 0; i < text.len; i++) result->text[i] = text.data[i]; | |
result->text[text.len] = '\0'; | |
Seq_destroy(text); | |
return result; | |
} | |
default: { | |
printf("Unreachable: '%c'!\n", c); | |
abort(); | |
} | |
} | |
} | |
// printf("Name test: %s | Children: %ld\n", result->name, result->children->len); | |
return result; | |
} | |
int main(void) { | |
FILE *file = fopen("test.xml", "r"); | |
if (!file) { | |
printf("Could not open file: %s\n", strerror(errno)); | |
return (EXIT_FAILURE); | |
} | |
/* XmlNodes children = {0}; | |
XmlNode node = { .name = "body", .children = &children }; | |
XmlNode inner = { .name = "test", .text = "asdasd" }; | |
Seq_add(children, &inner); | |
Seq_add(children, &inner); | |
Seq_add(children, &inner); */ | |
XmlNode *node; | |
while (1) { | |
// printf("Parsing...\n"); | |
node = Xml_parse_file(file); | |
if (node->name == NULL) { | |
Xml_free(node); | |
break; | |
} | |
// printf("Result:\n"); | |
Xml_debug(node); | |
Xml_free(node); | |
}; | |
return (EXIT_SUCCESS); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
debug: | |
gcc -o main --std=c99 -Wall -Wextra -Werror -pedantic -g main.c && gdb ./main | |
run: | |
gcc -o main --std=c99 -Wall -Wextra -Werror -pedantic main.c && ./main |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _SEQ_INCLUDED | |
#define _SEQ_INCLUDED | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <inttypes.h> | |
#define Seq(T) struct {T *data; T temp; size_t size; size_t len;} | |
#ifndef ARENA_SIZE | |
#define ARENA_SIZE 1024 | |
#endif | |
#define Seq_add(seq, element) _Seq_add((void**)(&seq.data), &seq.size, &seq.len, sizeof(seq.data[0]), (seq.temp = element, &seq.temp)) | |
#define Seq_pop(seq, index) _Seq_pop((void**)(&seq.data), &seq.size, &seq.len, sizeof(seq.data[0]), index) | |
#define Seq_destroy(seq) _Seq_destroy((seq).data) | |
int _Seq_add(void **data, size_t *data_size, size_t *len, size_t elem_size, void *element) { | |
if (((*len) + 1) * elem_size > (*data_size)) { | |
if ((*data = realloc(*data, *data_size + (ARENA_SIZE * elem_size))) == NULL) return -1; | |
*data_size += (ARENA_SIZE * elem_size); | |
} | |
// printf("Seq size: %d -- Seq len: %d -- Element: %d\n", *data_size, *len, *(int*)element); | |
memcpy((void*)((intptr_t)(*data) + (intptr_t)((*len) * elem_size)), element, elem_size); | |
(*len)++; | |
return 0; | |
} | |
int _Seq_pop(void **data, size_t *data_size, size_t *len, size_t elem_size, size_t index) { | |
if (index >= (*len)) return -1; | |
if (index != (*len - 1)) memmove((void*)((intptr_t)(*data) + (intptr_t)(index * elem_size)), (void*)((intptr_t)(*data) + (intptr_t)(index * elem_size) + (intptr_t)elem_size), ((*len) - index) * elem_size); | |
(*len)--; | |
if (((*len) - 1) * elem_size > (*data_size)) { | |
if ((*data = realloc(*data, *data_size - (ARENA_SIZE * elem_size))) == NULL) return -1; | |
*data_size -= (ARENA_SIZE * elem_size); | |
} | |
return 0; | |
} | |
void _Seq_destroy(void *data) { | |
free(data); | |
} | |
/* int main(void) { | |
Seq(int) numbers = {0}; | |
for (size_t i = 0; i < 20; i++) { | |
Seq_add(numbers, i); | |
printf("Added %d (%d)...\n", i, numbers.data[numbers.len - 1]); | |
if (i % 2) Seq_pop(numbers, numbers.len - 1); | |
} | |
for (size_t i = 0; i < numbers.len; i++) { | |
printf("numbers[%d] = %d (Arena: %d)\n", i, numbers.data[i], numbers.size); | |
} | |
} */ | |
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<note> | |
<to> | |
<user>Jeff</user> | |
</to> | |
<from>Jani</from> | |
<heading>Reminder</heading> | |
<body>Don't forget me this weekend!</body> | |
</note> | |
<note> | |
<to> | |
<user>Dani</user> | |
</to> | |
<from>Jeff</from> | |
<heading>Hello</heading> | |
<body>World</body> | |
</note> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment