lilpolymath · April 16, 2021 17:43 · lilpolymath · Apr 10, 2021
diff --git a/LexicalAnalyzer.js b/LexicalAnalyzer.js
 const tokenizer = code => {
  let cursor = 0;

  let tokens = [];

  const KEYWORDS = /\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b/;
  const IDENTIFIERS = /([A-Za-z_][A-Za-z0-9_]*)/;
  const LETTER = /([A-Za-z]|a-z][_])/;
  const DIGITS = /[0-9]+/;
  const NUMBER = /[0-9]/;
  const NUMALPHABET = /([0-9]|[j]|[.])/;
  const FLOAT = /([0-9]*[.])[0-9]+/;
  const IMAGINARY = /([0-9]+[.])*[0-9]+j/;
  const OPERATORS = /[+|-|*|/|//|%-|**|=|!=|<|>|<=|>=|&|\\|||~|^|<<|>]/;
  const LOGIC = /\b(and|in|is|not|or)\b/;
  const QUOTES = /(["]|[']|[’])/;

  const DELIMITERS = /[\\[|\\]|\\{|\\}|.|,|:|;|@|=|\\+=|-=|\\*=]/;

  const COMMENT = /#/;
  const CARRIAGE_RETURN = /\r/;
  const NEWLINE = /\n/;
  const LEFTPAREN = /\(/;
  const RIGHTPAREN = /\)/;
  const WHITESPACE = /\s/;

  while (cursor < code.length) {
    let char = code[cursor];

    if (LEFTPAREN.test(char)) {
      tokens.push({
        type: 'LPAREN',
        token: char,
      });

      cursor++;

      continue;
    }
    
    if (RIGHTPAREN.test(char)) {
      tokens.push({
        type: 'RPAREN',
        token: char,
      });

      cursor++;

      continue;
    }
    
    if (NUMBER.test(char)) {
      let value = '';

      while (NUMALPHABET.test(char)) {
        value += char;
        char = code[++cursor];
      }

      if (FLOAT.test(value)) {
        tokens.push({
          type: 'FLOAT',
          token: value,
        });
      } else if (IMAGINARY.test(value)) {
        tokens.push({
          type: 'IMAGINARY',
          token: value,
        });
      } else if (DIGITS.test(value)) {
        tokens.push({
          type: 'NUMBER',
          token: value,
        });
      }

      continue;
    }
    if (QUOTES.test(char)) {
      const START_QUOTE = char;

      let value = '';

      let next = cursor + 1;
      let next_char = code[next];

      while (next_char !== START_QUOTE && cursor < code.length) {
        value += char;
        ++cursor;
        char = code[cursor];
        next_char = char;
      }

      value += char;
      cursor++;

      tokens.push({
        type: 'STRING',
        token: value,
      });

      continue;
    }
    
    if (LETTER.test(char)) {
      let value = '';

      while (LETTER.test(char)) {
        value += char;
        ++cursor;
        char = code[cursor];
      }

      if (KEYWORDS.test(value)) {
        tokens.push({
          type: 'KEYWORD',
          token: value,
        });
      } else if (LOGIC.test(value)) {
        tokens.push({
          type: 'LOGICAL OPERATOR',
          token: value,
        });
      } else if (char === '.') {
        tokens.push({
          type: 'MODULE',
          token: value,
        });
      } else if (LEFTPAREN.test(char) || char === '.') {
        tokens.push({
          type: 'FUNCTION',
          token: value,
        });
      } else if (IDENTIFIERS.test(value)) {
        tokens.push({
          type: 'IDENTIFIER',
          token: value,
        });
      }
      continue;
    }
    
    if (OPERATORS.test(char)) {
      tokens.push({
        type: 'OPERATOR',
        token: char,
      });

      cursor++;

      continue;
    }
    
    if (NEWLINE.test(char)) {
      tokens.push({
        type: 'NEWLINE',
        token: char,
      });

      cursor++;

      continue;
    }
    
    if (CARRIAGE_RETURN.test(char)) {
      tokens.push({
        type: 'NEWLINE',
        token: char,
      });

      cursor++;

      continue;
    }
    
    if (COMMENT.test(char)) {
      while (!(NEWLINE.test(char) || CARRIAGE_RETURN.test(char))) {
        char = code[++cursor];
      }

      tokens.push({
        type: 'NEWLINE',
        token: char,
      });

      cursor++;

      continue;
    }
    
    if (WHITESPACE.test(char)) {
      tokens.push({
        type: 'WHITESPACE',
        token: char,
      });

      cursor++;

      continue;
    }

    throw new TypeError('I dont know what this character is: ' + char);
  }

  return tokens;
 };

 // Make sure we got a filename on the command line.
 if (process.argv.length < 3) {
  console.log('Usage: node ' + process.argv[1] + ' FILENAME');
  process.exit(1);
 }
 // Read the file and the result of tokenization.
 var fs = require('fs'),
  filename = process.argv[2];

 fs.readFile(filename, 'utf8', function(err, data) {
  if (err) throw err;
  console.log('TOKENS:', tokenizer(data));
 });
	const tokenizer = code => {
	let cursor = 0;

	let tokens = [];

	const KEYWORDS = /\b(False\|None\|True\|and\|as\|assert\|async\|await\|break\|class\|continue\|def\|del\|elif\|else\|except\|finally\|for\|from\|global\|if\|import\|in\|is\|lambda\|nonlocal\|not\|or\|pass\|raise\|return\|try\|while\|with\|yield)\b/;
	const IDENTIFIERS = /([A-Za-z_][A-Za-z0-9_]*)/;
	const LETTER = /([A-Za-z]\|a-z][_])/;
	const DIGITS = /[0-9]+/;
	const NUMBER = /[0-9]/;
	const NUMALPHABET = /([0-9]\|[j]\|[.])/;
	const FLOAT = /([0-9]*[.])[0-9]+/;
	const IMAGINARY = /([0-9]+[.])*[0-9]+j/;
	const OPERATORS = /[+\|-\|\|/\|//\|%-\|*\|=\|!=\|<\|>\|<=\|>=\|&\|\\\|\|\|~\|^\|<<\|>]/;
	const LOGIC = /\b(and\|in\|is\|not\|or)\b/;
	const QUOTES = /(["]\|[']\|[’])/;

	const DELIMITERS = /[\\[\|\\]\|\\{\|\\}\|.\|,\|:\|;\|@\|=\|\\+=\|-=\|\\*=]/;

	const COMMENT = /#/;
	const CARRIAGE_RETURN = /\r/;
	const NEWLINE = /\n/;
	const LEFTPAREN = /\(/;
	const RIGHTPAREN = /\)/;
	const WHITESPACE = /\s/;

	while (cursor < code.length) {
	let char = code[cursor];

	if (LEFTPAREN.test(char)) {
	tokens.push({
	type: 'LPAREN',
	token: char,
	});

	cursor++;

	continue;
	}

	if (RIGHTPAREN.test(char)) {
	tokens.push({
	type: 'RPAREN',
	token: char,
	});

	cursor++;

	continue;
	}

	if (NUMBER.test(char)) {
	let value = '';

	while (NUMALPHABET.test(char)) {
	value += char;
	char = code[++cursor];
	}

	if (FLOAT.test(value)) {
	tokens.push({
	type: 'FLOAT',
	token: value,
	});
	} else if (IMAGINARY.test(value)) {
	tokens.push({
	type: 'IMAGINARY',
	token: value,
	});
	} else if (DIGITS.test(value)) {
	tokens.push({
	type: 'NUMBER',
	token: value,
	});
	}

	continue;
	}
	if (QUOTES.test(char)) {
	const START_QUOTE = char;

	let value = '';

	let next = cursor + 1;
	let next_char = code[next];

	while (next_char !== START_QUOTE && cursor < code.length) {
	value += char;
	++cursor;
	char = code[cursor];
	next_char = char;
	}

	value += char;
	cursor++;

	tokens.push({
	type: 'STRING',
	token: value,
	});

	continue;
	}

	if (LETTER.test(char)) {
	let value = '';

	while (LETTER.test(char)) {
	value += char;
	++cursor;
	char = code[cursor];
	}

	if (KEYWORDS.test(value)) {
	tokens.push({
	type: 'KEYWORD',
	token: value,
	});
	} else if (LOGIC.test(value)) {
	tokens.push({
	type: 'LOGICAL OPERATOR',
	token: value,
	});
	} else if (char === '.') {
	tokens.push({
	type: 'MODULE',
	token: value,
	});
	} else if (LEFTPAREN.test(char) \|\| char === '.') {
	tokens.push({
	type: 'FUNCTION',
	token: value,
	});
	} else if (IDENTIFIERS.test(value)) {
	tokens.push({
	type: 'IDENTIFIER',
	token: value,
	});
	}
	continue;
	}

	if (OPERATORS.test(char)) {
	tokens.push({
	type: 'OPERATOR',
	token: char,
	});

	cursor++;

	continue;
	}

	if (NEWLINE.test(char)) {
	tokens.push({
	type: 'NEWLINE',
	token: char,
	});

	cursor++;

	continue;
	}

	if (CARRIAGE_RETURN.test(char)) {
	tokens.push({
	type: 'NEWLINE',
	token: char,
	});

	cursor++;

	continue;
	}

	if (COMMENT.test(char)) {
	while (!(NEWLINE.test(char) \|\| CARRIAGE_RETURN.test(char))) {
	char = code[++cursor];
	}

	tokens.push({
	type: 'NEWLINE',
	token: char,
	});

	cursor++;

	continue;
	}

	if (WHITESPACE.test(char)) {
	tokens.push({
	type: 'WHITESPACE',
	token: char,
	});

	cursor++;

	continue;
	}

	throw new TypeError('I dont know what this character is: ' + char);
	}

	return tokens;
	};

	// Make sure we got a filename on the command line.
	if (process.argv.length < 3) {
	console.log('Usage: node ' + process.argv[1] + ' FILENAME');
	process.exit(1);
	}
	// Read the file and the result of tokenization.
	var fs = require('fs'),
	filename = process.argv[2];

	fs.readFile(filename, 'utf8', function(err, data) {
	if (err) throw err;
	console.log('TOKENS:', tokenizer(data));
	});