Created
August 26, 2012 19:01
-
-
Save juntalis/3482631 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <io.h> | |
#include "symbols.h" | |
typedef unsigned int uint; | |
typedef unsigned char uchar; | |
#define BSIZE 8192 | |
#define YYCTYPE uchar | |
#define YYCURSOR cursor | |
#define YYLIMIT s->lim | |
#define YYMARKER s->ptr | |
#define YYFILL(n) {cursor = fill(s, cursor);} | |
#define RET(i) {s->cur = cursor; return i;} | |
typedef struct Scanner { | |
int fd; | |
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; | |
uint line; | |
} Scanner; | |
uchar *fill(Scanner *s, uchar *cursor){ | |
if(!s->eof) { | |
uint cnt = s->tok - s->bot; | |
if(cnt){ | |
memcpy(s->bot, s->tok, s->lim - s->tok); | |
s->tok = s->bot; | |
s->ptr -= cnt; | |
cursor -= cnt; | |
s->pos -= cnt; | |
s->lim -= cnt; | |
} | |
if((s->top - s->lim) < BSIZE){ | |
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); | |
memcpy(buf, s->tok, s->lim - s->tok); | |
s->tok = buf; | |
s->ptr = &buf[s->ptr - s->bot]; | |
cursor = &buf[cursor - s->bot]; | |
s->pos = &buf[s->pos - s->bot]; | |
s->lim = &buf[s->lim - s->bot]; | |
s->top = &s->lim[BSIZE]; | |
free(s->bot); | |
s->bot = buf; | |
} | |
if((cnt = _read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ | |
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; | |
} | |
s->lim += cnt; | |
} | |
return cursor; | |
} | |
int scan(Scanner *s){ | |
uchar *cursor = s->cur; | |
std: | |
s->tok = cursor; | |
/*!re2c | |
any = [\000-\377]; | |
O = [0-7]; | |
D = [0-9]; | |
L = [a-zA-Z_]; | |
H = [a-fA-F0-9]; | |
E = [Ee] [+-]? D+; | |
FS = [fFlL]; | |
IS = [uUlL]*; | |
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); | |
*/ | |
/*!re2c | |
"/*" { goto comment; } | |
"//" { goto line_comment; } | |
"#" { goto preprocessor; } | |
"auto" { RET(AUTO); } | |
"break" { RET(BREAK); } | |
"case" { RET(CASE); } | |
"char" { RET(CHAR); } | |
"const" { RET(CONST); } | |
"continue" { RET(CONTINUE); } | |
"default" { RET(DEFAULT); } | |
"do" { RET(DO); } | |
"double" { RET(DOUBLE); } | |
"else" { RET(ELSE); } | |
"enum" { RET(ENUM); } | |
"extern" { RET(EXTERN); } | |
"float" { RET(FLOAT); } | |
"for" { RET(FOR); } | |
"goto" { RET(GOTO); } | |
"if" { RET(IF); } | |
"int" { RET(INT); } | |
"long" { RET(LONG); } | |
"register" { RET(REGISTER); } | |
"return" { RET(RETURN); } | |
"short" { RET(SHORT); } | |
"signed" { RET(SIGNED); } | |
"sizeof" { RET(SIZEOF); } | |
"static" { RET(STATIC); } | |
"struct" { RET(STRUCT); } | |
"switch" { RET(SWITCH); } | |
"typedef" { RET(TYPEDEF); } | |
"union" { RET(UNION); } | |
"unsigned" { RET(UNSIGNED); } | |
"void" { RET(VOID); } | |
"volatile" { RET(VOLATILE); } | |
"while" { RET(WHILE); } | |
"__declspec" { RET(DECLSPEC_DECL); } | |
"__cdecl" { RET(DECLSPEC_CDECL); } | |
"__stdcall" { RET(DECLSPEC_STDCALL); } | |
"__fastcall" { RET(DECLSPEC_FASTCALL); } | |
"dllimport" { RET(DECLSPEC_DLLIMPORT); } | |
"dllexport" { RET(DECLSPEC_DLLEXPORT); } | |
"__VA_ARGS__" { RET(PRE_MACRO_VAARGS); } | |
"__STDC__" { RET(PRE_MACRO_STDC); } | |
"__DATE__" { RET(PRE_MACRO_DATE); } | |
"__FILE__" { RET(PRE_MACRO_FILE); } | |
"__LINE__" { RET(PRE_MACRO_LINE); } | |
"__TIMESTAMP__" { RET(PRE_MACRO_TIMESTAMP); } | |
"__COUNTER__" { RET(PRE_MACRO_COUNTER); } | |
"__cplusplus" { RET(PRE_MACRO_CPP); } | |
"__FUNCTION__" { RET(PRE_MACRO_FUNC); } | |
"__FUNCSIG__" { RET(PRE_MACRO_FUNCSIG); } | |
"__FUNCDNAME__" { RET(PRE_MACRO_FUNCDN); } | |
"_DEBUG" { RET(PRE_MACRO_DEBUG); } | |
"_NDEBUG" { RET(PRE_MACRO_NDEBUG); } | |
"_WIN32" { RET(PRE_MACRO_WIN32); } | |
"_WIN64" { RET(PRE_MACRO_WIN64); } | |
L (L|D)* { RET(ID); } | |
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | | |
(['] (ESC|any\[\n\\'])* [']) | |
{ RET(ICON); } | |
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) | |
{ RET(FCON); } | |
(["] (ESC|any\[\n\\"])* ["]) | |
{ RET(SCON); } | |
"..." { RET(ELLIPSIS); } | |
">>=" { RET(RSHIFTEQ); } | |
"<<=" { RET(LSHIFTEQ); } | |
"+=" { RET(ADDEQ); } | |
"-=" { RET(SUBEQ); } | |
"*=" { RET(MULEQ); } | |
"/=" { RET(DIVEQ); } | |
"%=" { RET(MODEQ); } | |
"&=" { RET(ANDEQ); } | |
"^=" { RET(XOREQ); } | |
"|=" { RET(OREQ); } | |
">>" { RET(RSHIFT); } | |
"<<" { RET(LSHIFT); } | |
"++" { RET(INCR); } | |
"--" { RET(DECR); } | |
"->" { RET(DEREF); } | |
"&&" { RET(ANDAND); } | |
"||" { RET(OROR); } | |
"<=" { RET(LEQ); } | |
">=" { RET(GEQ); } | |
"==" { RET(EQL); } | |
"!=" { RET(NEQ); } | |
";" { RET(SEMICOLON_SYM); } | |
"{" { RET(LBRACE_SYM); } | |
"}" { RET(RBRACE_SYM); } | |
"," { RET(COMMA_SYM); } | |
":" { RET(COLON_SYM); } | |
"=" { RET(SETEQ_SYM); } | |
"(" { RET(LPARN_SYM); } | |
")" { RET(RPARN_SYM); } | |
"[" { RET(LBRACK_SYM); } | |
"]" { RET(RBRACK_SYM); } | |
"." { RET(DOT_SYM); } | |
"&" { RET(AND_SYM); } | |
"!" { RET(EXCLA_SYM); } | |
"~" { RET(TILDE_SYM); } | |
"-" { RET(MINUS_SYM); } | |
"+" { RET(PLUS_SYM); } | |
"*" { RET(MULTI_SYM); } | |
"/" { RET(DIVIDE_SYM); } | |
"%" { RET(PRCNT_SYM); } | |
"<" { RET(LT_SYM); } | |
">" { RET(RT_SYM); } | |
"^" { RET(PWROF_SYM); } | |
"|" { RET(OR_SYM); } | |
"?" { RET(QUMRK); } | |
[ \t\v\f]+ { goto std; } | |
"\n" | |
{ | |
if(cursor == s->eof) RET(EOI); | |
s->pos = cursor; s->line++; | |
goto std; | |
} | |
any | |
{ | |
printf("unexpected character: %c\n", *s->tok); | |
goto std; | |
} | |
*/ | |
comment: | |
/*!re2c | |
"*/" { goto std; } | |
"\n" | |
{ | |
if(cursor == s->eof) RET(EOI); | |
s->tok = s->pos = cursor; s->line++; | |
goto comment; | |
} | |
any { goto comment; } | |
*/ | |
line_comment: | |
/*!re2c | |
"\n" | |
{ | |
if(cursor == s->eof) RET(EOI); | |
s->tok = s->pos = cursor; s->line++; | |
goto std; | |
} | |
any { goto line_comment; } | |
*/ | |
preprocessor: | |
/*!re2c | |
"/*" { goto comment; } | |
"//" { goto line_comment; } | |
"include" { RET(PRE_INC); } | |
"import" { RET(PRE_IMP); } | |
"using" { RET(PRE_USING); } | |
"line" { RET(PRE_LINE); } | |
"pragma" { RET(PRE_PRAGMA); } | |
"error" { RET(PRE_ERROR); } | |
"error" { RET(PRE_); } | |
"defined" { RET(PRE_ISDEF); } | |
"define" { RET(PRE_DEF); } | |
"ifdef" { RET(PRE_IFDEF); } | |
"ifndef" { RET(PRE_IFNDEF); } | |
"elif" { RET(PRE_ELIF); } | |
"if" { RET(PRE_IF); } | |
"else" { RET(PRE_ELSE); } | |
"endif" { RET(PRE_ENDIF); } | |
"##" { RET(PRE_PASTETOK); } | |
"#@" { RET(PRE_CHRTOK); } | |
"#" { RET(PRE_STRTOK); } | |
"alloc_text" { RET(PRE_PRAGMA_ALLOC_TEXT); } | |
"auto_inline" { RET(PRE_PRAGMA_AUTO_INLINE); } | |
"bss_seg" { RET(PRE_PRAGMA_BSS_SEG); } | |
"check_stack" { RET(PRE_PRAGMA_CHECK_STACK); } | |
"code_seg" { RET(PRE_PRAGMA_CODE_SEG); } | |
"comment" { RET(PRE_PRAGMA_COMMENT); } | |
"component" { RET(PRE_PRAGMA_COMPONENT); } | |
"conform1" { RET(PRE_PRAGMA_CONFORM1); } | |
"const_seg" { RET(PRE_PRAGMA_CONST_SEG); } | |
"data_seg" { RET(PRE_PRAGMA_DATA_SEG); } | |
"deprecated" { RET(PRE_PRAGMA_DEPRECATED); } | |
"fenv_access" { RET(PRE_PRAGMA_FENV_ACCESS); } | |
"float_control" { RET(PRE_PRAGMA_FLOAT_CONTROL); } | |
"fp_contract" { RET(PRE_PRAGMA_FP_CONTRACT); } | |
"function" { RET(PRE_PRAGMA_FUNCTION); } | |
"hdrstop" { RET(PRE_PRAGMA_HDRSTOP); } | |
"include_alias" { RET(PRE_PRAGMA_INCLUDE_ALIAS); } | |
"init_seg1" { RET(PRE_PRAGMA_INIT_SEG1); } | |
"inline_depth" { RET(PRE_PRAGMA_INLINE_DEPTH); } | |
"inline_recursion" { RET(PRE_PRAGMA_INLINE_RECURSION); } | |
"intrinsic" { RET(PRE_PRAGMA_INTRINSIC); } | |
"make_public" { RET(PRE_PRAGMA_MAKE_PUBLIC); } | |
"managed" { RET(PRE_PRAGMA_MANAGED); } | |
"message" { RET(PRE_PRAGMA_MESSAGE); } | |
"omp" { RET(PRE_PRAGMA_OMP); } | |
"once" { RET(PRE_PRAGMA_ONCE); } | |
"optimize" { RET(PRE_PRAGMA_OPTIMIZE); } | |
"pack" { RET(PRE_PRAGMA_PACK); } | |
"pointers_to_members1" { RET(PRE_PRAGMA_POINTERS_TO_MEMBERS1); } | |
"pop_macro" { RET(PRE_PRAGMA_POP_MACRO); } | |
"push_macro" { RET(PRE_PRAGMA_PUSH_MACRO); } | |
"region" { RET(PRE_PRAGMA_REGION); } | |
"endregion" { RET(PRE_PRAGMA_ENDREGION); } | |
"runtime_checks" { RET(PRE_PRAGMA_RUNTIME_CHECKS); } | |
"section" { RET(PRE_PRAGMA_SECTION); } | |
"setlocale" { RET(PRE_PRAGMA_SETLOCALE); } | |
"strict_gs_check" { RET(PRE_PRAGMA_STRICT_GS_CHECK); } | |
"unmanaged" { RET(PRE_PRAGMA_UNMANAGED); } | |
"vtordisp1" { RET(PRE_PRAGMA_VTORDISP1); } | |
"warning" { RET(PRE_PRAGMA_WARNING); } | |
"auto" { RET(AUTO); } | |
"break" { RET(BREAK); } | |
"case" { RET(CASE); } | |
"char" { RET(CHAR); } | |
"const" { RET(CONST); } | |
"continue" { RET(CONTINUE); } | |
"default" { RET(DEFAULT); } | |
"do" { RET(DO); } | |
"double" { RET(DOUBLE); } | |
"else" { RET(ELSE); } | |
"enum" { RET(ENUM); } | |
"extern" { RET(EXTERN); } | |
"float" { RET(FLOAT); } | |
"for" { RET(FOR); } | |
"goto" { RET(GOTO); } | |
"if" { RET(IF); } | |
"int" { RET(INT); } | |
"long" { RET(LONG); } | |
"register" { RET(REGISTER); } | |
"return" { RET(RETURN); } | |
"short" { RET(SHORT); } | |
"signed" { RET(SIGNED); } | |
"sizeof" { RET(SIZEOF); } | |
"static" { RET(STATIC); } | |
"struct" { RET(STRUCT); } | |
"switch" { RET(SWITCH); } | |
"typedef" { RET(TYPEDEF); } | |
"union" { RET(UNION); } | |
"unsigned" { RET(UNSIGNED); } | |
"void" { RET(VOID); } | |
"volatile" { RET(VOLATILE); } | |
"while" { RET(WHILE); } | |
"__declspec" { RET(DECLSPEC_DECL); } | |
"__cdecl" { RET(DECLSPEC_CDECL); } | |
"__stdcall" { RET(DECLSPEC_STDCALL); } | |
"__fastcall" { RET(DECLSPEC_FASTCALL); } | |
"dllimport" { RET(DECLSPEC_DLLIMPORT); } | |
"dllexport" { RET(DECLSPEC_DLLEXPORT); } | |
"__VA_ARGS__" { RET(PRE_MACRO_VAARGS); } | |
"__STDC__" { RET(PRE_MACRO_STDC); } | |
"__DATE__" { RET(PRE_MACRO_DATE); } | |
"__FILE__" { RET(PRE_MACRO_FILE); } | |
"__LINE__" { RET(PRE_MACRO_LINE); } | |
"__TIMESTAMP__" { RET(PRE_MACRO_TIMESTAMP); } | |
"__COUNTER__" { RET(PRE_MACRO_COUNTER); } | |
"__cplusplus" { RET(PRE_MACRO_CPP); } | |
"__FUNCTION__" { RET(PRE_MACRO_FUNC); } | |
"__FUNCSIG__" { RET(PRE_MACRO_FUNCSIG); } | |
"__FUNCDNAME__" { RET(PRE_MACRO_FUNCDN); } | |
"_DEBUG" { RET(PRE_MACRO_DEBUG); } | |
"_NDEBUG" { RET(PRE_MACRO_NDEBUG); } | |
"_WIN32" { RET(PRE_MACRO_WIN32); } | |
"_WIN64" { RET(PRE_MACRO_WIN64); } | |
L (L|D)* { RET(ID); } | |
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | | |
(['] (ESC|any\[\n\\'])* [']) | |
{ RET(ICON); } | |
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) | |
{ RET(FCON); } | |
(["] (ESC|any\[\n\\"])* ["]) | |
{ RET(SCON); } | |
"..." { RET(ELLIPSIS); } | |
">>=" { RET(RSHIFTEQ); } | |
"<<=" { RET(LSHIFTEQ); } | |
"+=" { RET(ADDEQ); } | |
"-=" { RET(SUBEQ); } | |
"*=" { RET(MULEQ); } | |
"/=" { RET(DIVEQ); } | |
"%=" { RET(MODEQ); } | |
"&=" { RET(ANDEQ); } | |
"^=" { RET(XOREQ); } | |
"|=" { RET(OREQ); } | |
">>" { RET(RSHIFT); } | |
"<<" { RET(LSHIFT); } | |
"++" { RET(INCR); } | |
"--" { RET(DECR); } | |
"->" { RET(DEREF); } | |
"&&" { RET(ANDAND); } | |
"||" { RET(OROR); } | |
"<=" { RET(LEQ); } | |
">=" { RET(GEQ); } | |
"==" { RET(EQL); } | |
"!=" { RET(NEQ); } | |
";" { RET(SEMICOLON_SYM); } | |
"{" { RET(LBRACE_SYM); } | |
"}" { RET(RBRACE_SYM); } | |
"," { RET(COMMA_SYM); } | |
":" { RET(COLON_SYM); } | |
"=" { RET(SETEQ_SYM); } | |
"(" { RET(LPARN_SYM); } | |
")" { RET(RPARN_SYM); } | |
"[" { RET(LBRACK_SYM); } | |
"]" { RET(RBRACK_SYM); } | |
"." { RET(DOT_SYM); } | |
"&" { RET(AND_SYM); } | |
"!" { RET(EXCLA_SYM); } | |
"~" { RET(TILDE_SYM); } | |
"-" { RET(MINUS_SYM); } | |
"+" { RET(PLUS_SYM); } | |
"*" { RET(MULTI_SYM); } | |
"/" { RET(DIVIDE_SYM); } | |
"%" { RET(PRCNT_SYM); } | |
"<" { RET(LT_SYM); } | |
">" { RET(RT_SYM); } | |
"^" { RET(PWROF_SYM); } | |
"|" { RET(OR_SYM); } | |
"?" { RET(QUMRK); } | |
[ \t\v\f]+ { goto preprocessor; } | |
[\\] "\n" | |
{ | |
if(cursor == s->eof) RET(EOI); | |
s->pos = cursor; s->line++; | |
goto preprocessor; | |
} | |
"\n" | |
{ | |
if(cursor == s->eof) RET(EOI); | |
s->pos = cursor; s->line++; | |
goto std; | |
} | |
any { goto preprocessor; } | |
*/ | |
} | |
main(){ | |
Scanner in; | |
int t; | |
FILE* outf; | |
memset((char*) &in, 0, sizeof(in)); | |
in.fd = 0; | |
in.line = 0; | |
outf = fopen("out.bc", "wb"); | |
while((t = scan(&in)) != EOI) { | |
fputc(t, outf); | |
/*printf("%0u10 [%d]\t\"%.*s\"\n", in.line, t, in.cur - in.tok, in.tok);*/ | |
} | |
fclose(outf); | |
_close(in.fd); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment