Created
August 24, 2025 04:08
-
-
Save sogaiu/a94bc9647449c211d6108df4e162ce62 to your computer and use it in GitHub Desktop.
janet parser notes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| * minor fixes pr candidate items | |
| * change comments | |
| /* Use counter to keep track of number of '=' seen */ <- should be backticks? | |
| * initially missed things | |
| * popstate has a for loop surrounding most of its code | |
| * atsign's non-default cases all return...the default case leads to code | |
| after the switch statement (slightly confusing?) | |
| * unknown items | |
| * JANET_PARSER_GENERATED_ERROR | |
| * parse.c and janet.h | |
| struct JanetParser { | |
| Janet *args; | |
| const char *error; | |
| JanetParseState *states; | |
| uint8_t *buf; | |
| size_t argcount; | |
| size_t argcap; | |
| size_t statecount; | |
| size_t statecap; | |
| size_t bufcount; | |
| size_t bufcap; | |
| size_t line; | |
| size_t column; | |
| size_t pending; | |
| int lookback; | |
| int flag; | |
| }; | |
| // XXX: reordered for comprehension | |
| struct JanetParser { | |
| JanetParseState *states; // parser states | |
| size_t statecount; // number of items in states | |
| size_t statecap; // max capacity of states | |
| Janet *args; // parsed items | |
| size_t argcount; // number of items in args | |
| size_t argcap; // max capacity of args | |
| size_t pending; // # of parsed(?) items in root state (popstate) | |
| uint8_t *buf; // used for token, string, comment (@ (?) and state delims) | |
| size_t bufcount; // number of items in buf | |
| size_t bufcap; // max capacity of buf | |
| int lookback; // last byte(?) handled (if any) | |
| size_t line; | |
| size_t column; | |
| const char *error; // non-empty(?) means => JANET_PARSE_ERROR | |
| int flag; // non-zero(?) means => JANET_PARSE_DEAD | |
| }; | |
| JanetParser (state machine) | |
| * state info | |
| * states (stack) | |
| * statecount (number of items) - when > 1 and parser status not dead or error, means JANET_PARSE_PENDING | |
| * statecap (capacity) | |
| * parsed items | |
| * args (used as stack and queue) | |
| * argcount (number of items) | |
| * argcap (capacity) | |
| * pending - number of parsed items in root state? | |
| * buffer items - for construction of tokens, strings, comments, ...? | |
| * buf (stack) | |
| * bufcount (number of items) | |
| * bufcap (capacity) | |
| * last byte handled (if any) | |
| * form(?) location info | |
| * some status info for determining JANET_PARSE_ERROR and JANET_PARSE_DEAD only | |
| * args | |
| * push_arg (add item treating args as stack) | |
| --- | |
| * janet_parser_produce (remove item treating args as queue) | |
| * janet_parser_produce_wrapped (remove item treating args as queue) | |
| * pending | |
| * popstate (increment - when statecount is 1 and newtop flags has PFLAG_CONTAINER) | |
| /* Keep track of number of values in the root state */ | |
| --- | |
| * janet_parser_produce (decrement) | |
| * janet_parser_produce_wrapped (decrement) | |
| * statecount | |
| * some manual manipulation of statecount | |
| * atsign | |
| * comment | |
| * bufcount | |
| * some manual manipulation of bufcount | |
| * comment | |
| * stringend | |
| * tokenchar | |
| * status | |
| * list of values | |
| * JANET_PARSE_ROOT | |
| * if not any of the other status values | |
| * :root - the parser can either read more values or safely terminate | |
| * read more values == not in the middle of parsing a value(?) | |
| * JANET_PARSE_PENDING | |
| * if (parser->statecount > 1) return JANET_PARSE_PENDING; | |
| * :pending - a value is being parsed | |
| * JANET_PARSE_DEAD | |
| * if (parser->flag) return JANET_PARSE_DEAD; | |
| * JANET_PARSE_ERROR | |
| * if (parser->error) return JANET_PARSE_ERROR; | |
| * janet_parser_error | |
| * janet_parser_status | |
| --- | |
| * cfun_parse_consume | |
| * JANET_PARSE_ROOT | |
| * JANET_PARSE_PENDING | |
| * cfun_parse_status | |
| * JANET_PARSE_ROOT | |
| * JANET_PARSE_PENDING | |
| * JANET_PARSE_DEAD | |
| * JANET_PARSE_ERROR | |
| struct JanetParseState { | |
| int32_t counter; | |
| int32_t argn; | |
| int flags; | |
| size_t line; | |
| size_t column; | |
| Consumer consumer; | |
| }; | |
| * counter | |
| * only used for strings | |
| * escapeh (decrement) | |
| * escapeu (decrement) | |
| * escape1 (set to 2, 4, 6) | |
| * longstring (set to 1, increment, set to 0) | |
| * some manual manipulation of counter | |
| * escapeh | |
| * escapeu | |
| * escape1 | |
| * longstring | |
| * argn | |
| * for containers, represents number of elements (for table/struct, kv pairs)? | |
| (see close_*, popstate) | |
| * for strings, used for XXX | |
| * escapeh (set to ..., lowest byte saved via push_buf, set to 0) | |
| * escapeu (set to ..., ??? saved via write_codepoint (via push_buf), set to 0) | |
| * escape1 (set to 0) | |
| * tokenchar (set to 1) | |
| * longstring (increment) | |
| --- | |
| * janet_parser_produce (decrement) | |
| * janet_parser_produce_unwrapped (decrement) | |
| * cfun_parse_insert (increment) | |
| * some manual manipulation of argn | |
| * escapeh | |
| * escapeu | |
| * escape1 | |
| * tokenchar | |
| * flags | |
| * the PFLAG_* defines are passed (or "or"-ed with others or in the case of reader | |
| macros, the reader macro character) as the third argument of pushstate which | |
| saves / overwrites the flags field of a new JanetParseState which is | |
| subsequently saved as the new top-most state in the JanetParser passed to | |
| pushstate. possibly this might be paraphrased as "let's remember what we saw | |
| for later use". | |
| * PFLAG_CONTAINER 0x100 | |
| * PFLAG_BUFFER 0x200 | |
| * PFLAG_PARENS 0x400 | |
| * PFLAG_SQRBRACKETS 0x800 | |
| * PFLAG_CURLYBRACKETS 0x1000 | |
| * PFLAG_STRING 0x2000 | |
| * PFLAG_LONGSTRING 0x4000 | |
| * PFLAG_READERMAC 0x8000 | |
| * PFLAG_ATSYM 0x10000 | |
| * PFLAG_COMMENT 0x20000 | |
| * PFLAG_TOKEN 0x40000 | |
| * (nothing else in between?) | |
| * PFLAG_IN_STRING 0x100000 | |
| * PFLAG_END_CANDIDATE 0x200000 | |
| * the flags field of JanetParseState is referenced by various functions | |
| such as: | |
| * popstate (container? reader macro?) | |
| * delim_error (parens? square brackets? curly braces? string?, long string?) | |
| * stringend (long string? buffer?) | |
| * longstring (in string? end candidate?) | |
| * root (parens? square brackets? at sym? curly braces?) | |
| --- | |
| * cfun_parse_insert | |
| (comment? container? string? long string?) | |
| * janet_wrap_parse_state | |
| (container? parens? square brackets? at sym? curly braces? string? | |
| long string? buffer? comment? token? at sym? reader mac?) | |
| * parser_state_delimiters | |
| (parens? square brackets? curly braces? string? long string?) | |
| * parser_state_frames | |
| (container?) | |
| * some manual manipulation of flags | |
| * longstring | |
| * consumers | |
| * list of consumers | |
| * root | |
| * stringchar | |
| * longstring | |
| * tokenchar | |
| * comment | |
| * atsign | |
| * escapeh | |
| * escapeu | |
| * escape1 | |
| * each state has an associated consumer, but | |
| sometimes the consumer field is assigned to | |
| directly | |
| * stringchar | |
| * escapeh | |
| * escapeu | |
| * escape1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment