Created
March 9, 2010 09:23
-
-
Save StanAngeloff/326431 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sys: require('sys') | |
Lexer: require('../lib/lexer').Lexer | |
INTERPOLATION : /^\$([a-zA-Z_@]\w*)/ | |
class PartialLexer | |
constructor: (chunk) -> | |
@i: 0 | |
@chunk: chunk | |
@tokens: [] | |
token: (tag, value) -> | |
@tokens.push([tag, value]) | |
tag: -> false | |
# Work area | |
# --------- | |
# Matches regular expression literals. Lexing regular expressions is difficult | |
# to distinguish from division, so we borrow some basic heuristics from | |
# JavaScript and Ruby. | |
regex_token: -> | |
return false unless regex: @balanced_token supress: true, ['/', '/'] | |
return false if regex.match /^\/\s+|\n/ | |
return false if include NOT_REGEX, @tag() | |
flags: ['i', 'm', 'g', 'y'] | |
regex += flags[index] while (index: flags.indexOf @chunk.substr regex.length, 1) >= 0 | |
if regex.indexOf '}' > regex.indexOf '${' | |
[regex, flags]: regex.substring(1).split('/') | |
@tokens: @tokens.concat [['(', '('], ['NEW', 'new'], ['IDENTIFIER', 'RegExp'], ['CALL_START', '(']] | |
@interpolate_string "\"${regex.replace('\\', '\\\\')}\"" | |
@tokens: @tokens.concat [[',', ','], ['STRING', "'$flags'"], ['CALL_END', ')'], [')', ')']] | |
else | |
@token 'REGEX', regex | |
@i += regex.length | |
true | |
# Used methods with no changes | |
# ---------------------------- | |
# Matches a balanced group such as a single or double-quoted string. Pass in | |
# a series of delimiters, all of which must be nested correctly within the | |
# contents of the string. This method allows us to have strings within | |
# interpolations within strings etc... | |
balanced_string: (str, supress, delimited...) -> | |
levels: [] | |
i: 0 | |
while i < str.length | |
for pair in delimited | |
[open, close]: pair | |
if levels.length and starts str, '\\', i | |
i += 1 | |
break | |
else if levels.length and starts(str, close, i) and levels[levels.length - 1] is pair | |
levels.pop() | |
i += close.length - 1 | |
i += 1 unless levels.length | |
break | |
else if starts str, open, i | |
levels.push(pair) | |
i += open.length - 1 | |
break | |
break unless levels.length | |
i += 1 | |
if levels.length | |
throw new Error "SyntaxError: Unterminated ${levels.pop()[0]} starting on line ${@line + 1}" unless supress | |
return false | |
return false if i is 0 | |
return str.substring(0, i) | |
# Matches a token in which which the passed delimiter pairs must be correctly | |
# balanced (ie. strings, JS literals). | |
balanced_token: (supress, delimited...) -> | |
@balanced_string @chunk, supress, delimited... | |
# Expand variables and expressions inside double-quoted strings using | |
# [ECMA Harmony's interpolation syntax](http://wiki.ecmascript.org/doku.php?id=strawman:string_interpolation) | |
# for substitution of bare variables as well as arbitrary expressions. | |
# | |
# "Hello $name." | |
# "Hello ${name.capitalize()}." | |
# | |
# If it encounters an interpolation, this method will recursively create a | |
# new Lexer, tokenize the interpolated contents, and merge them into the | |
# token stream. | |
interpolate_string: (str) -> | |
if str.length < 3 or not starts str, '"' | |
@token 'STRING', str | |
else | |
lexer: new Lexer() | |
tokens: [] | |
quote: str.substring(0, 1) | |
[i, pi]: [1, 1] | |
while i < str.length - 1 | |
if starts str, '\\', i | |
i += 1 | |
else if match: str.substring(i).match INTERPOLATION | |
[group, interp]: match | |
interp: "this.${ interp.substring(1) }" if starts interp, '@' | |
tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i | |
tokens.push ['IDENTIFIER', interp] | |
i += group.length - 1 | |
pi: i + 1 | |
else if (expr: @balanced_string str.substring(i), supress: false, ['${', '}']) | |
tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i | |
inner: expr.substring(2, expr.length - 1) | |
if inner.length | |
nested: lexer.tokenize "($inner)", {rewrite: no, line: @line} | |
nested.pop() | |
tokens.push ['TOKENS', nested] | |
else | |
tokens.push ['STRING', "$quote$quote"] | |
i += expr.length - 1 | |
pi: i + 1 | |
i += 1 | |
tokens.push ['STRING', "$quote${ str.substring(pi, i) }$quote"] if pi < i and pi < str.length - 1 | |
for each, i in tokens | |
if each[0] is 'TOKENS' | |
@tokens: @tokens.concat each[1] | |
else | |
@token each[0], each[1] | |
@token '+', '+' if i < tokens.length - 1 | |
# Does a list include a value? | |
include: (list, value) -> | |
list.indexOf(value) >= 0 | |
# Peek at the beginning of a given string to see if it matches a sequence. | |
starts: (string, literal, start) -> | |
string.substring(start, (start or 0) + literal.length) is literal | |
# Tokens which a regular expression will never immediately follow, but which | |
# a division operator might. | |
# | |
# See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions | |
# | |
# Our list is shorter, due to sans-parentheses method calls. | |
NOT_REGEX: [ | |
'NUMBER', 'REGEX', '++', '--', 'FALSE', 'NULL', 'TRUE' | |
] | |
a: new PartialLexer(/\d+\s+/.toString() + ') was a Happy Bunny.') | |
a.regex_token() | |
puts a.tokens.join '\n' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment