Created
November 12, 2019 06:23
-
-
Save Yawenina/c46e41ce27ac655ea1bbea14b767ab7d to your computer and use it in GitHub Desktop.
tokenize js code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function tokenize(code) { | |
const tokens = []; | |
const length = code.length; | |
let char; | |
for (let i = 0; i < length; i++) { | |
char = code.charAt(i); | |
if (/;/.test(char)) { | |
const token = { | |
type: 'sep', | |
value: char, | |
} | |
tokens.push(token); | |
continue; | |
} | |
if (/[()]/.test(char)) { | |
const token = { | |
type: 'parens', | |
value: char | |
} | |
tokens.push(token); | |
continue; | |
} | |
if (/[{}]/.test(char)) { | |
const token = { | |
type: 'brace', | |
value: char | |
} | |
tokens.push(token); | |
continue; | |
} | |
// TODO: 忽略多元运算符 | |
if (/[\+\>\<\-]/.test(char)) { | |
const token = { | |
type: 'operator', | |
value: char | |
} | |
tokens.push(token); | |
continue; | |
} | |
// 组合连续的空格 | |
if (/\s/.test(char)) { | |
const token = { | |
type: 'whitespace', | |
value: char | |
} | |
for (i++; i < length; i++) { | |
char = code.charAt(i); | |
if (/\s/.test(char)) { | |
token.value += char; | |
continue; | |
} else { | |
i--; | |
break; | |
} | |
} | |
tokens.push(token); | |
continue; | |
} | |
if (/["']/.test(char)) { | |
const token = { | |
type: 'string', | |
value: char | |
} | |
for (i++; i < length; i++) { | |
char = code.charAt(i); | |
token.value += char; | |
if (/["']/.test(char)) { | |
break; | |
} | |
} | |
tokens.push(token); | |
continue; | |
} | |
if (/[0-9]/.test(char)) { | |
const token = { | |
type: 'number', | |
value: char | |
} | |
for (i++; i < length; i++) { | |
char = code.charAt(i); | |
if (/[0-9]/.test(char)) { | |
token.value += char; | |
continue; | |
} else { | |
i--; | |
break; | |
} | |
} | |
tokens.push(token); | |
continue; | |
} | |
if (/[\w\$_]/.test(char)) { | |
const token = { | |
type: 'identifier', | |
value: char | |
} | |
for (i++; i < length; i++) { | |
char = code.charAt(i); | |
if (/[\w\$_]/.test(char)) { | |
token.value += char; | |
continue; | |
} else { | |
i--; | |
break; | |
} | |
} | |
tokens.push(token); | |
continue; | |
} | |
throw new Error(`Unexpected token, ${char}`); | |
} | |
return tokens; | |
} | |
const tokens = tokenize(` | |
if (1 > 0) { | |
alert("if 1 > 0"); | |
} | |
`); | |
console.log(tokens); | |
console.log(tokens.length); | |
/** Result | |
[ | |
{ type: 'whitespace', value: '\n' }, | |
{ type: 'identifier', value: 'if' }, | |
{ type: 'whitespace', value: ' ' }, | |
{ type: 'parens', value: '(' }, | |
{ type: 'number', value: '1' }, | |
{ type: 'whitespace', value: ' ' }, | |
{ type: 'operator', value: '>' }, | |
{ type: 'whitespace', value: ' ' }, | |
{ type: 'number', value: '0' }, | |
{ type: 'parens', value: ')' }, | |
{ type: 'whitespace', value: ' ' }, | |
{ type: 'brace', value: '{' }, | |
{ type: 'whitespace', value: '\n ' }, | |
{ type: 'identifier', value: 'alert' }, | |
{ type: 'parens', value: '(' }, | |
{ type: 'string', value: '"if 1 > 0"' }, | |
{ type: 'parens', value: ')' }, | |
{ type: 'sep', value: ';' }, | |
{ type: 'whitespace', value: '\n' }, | |
{ type: 'brace', value: '}' }, | |
{ type: 'whitespace', value: '\n' } | |
] | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment