Created
August 6, 2016 16:46
-
-
Save jarek-foksa/77ccfbe0b03b2d7b476d20f965f7350a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// @copyright | |
// © 2009-2012 Nicholas C. Zakas | |
// © 2012-2016 Jarosław Foksa | |
// | |
// @doc | |
// https://drafts.csswg.org/css-syntax | |
// | |
// @info | |
// CSS tokenizer based on TokenStream.js and Tokens.js from CSSLint. | |
// | |
// Note that definitions in the table below are case-insensitive and assume that CSS text was preprocessed to get | |
// rid of all escaped characters. | |
// | |
// ============================================================================================================== | |
// Token type/subtype Definition Introduced in | |
// ============================================================================================================== | |
// S {s} CSS2.1 | |
// COMMENT {comment} CSS2.1 | |
// BAD_COMMENT {badcomment} CSS2.1 | |
// CDO <!-- CSS2.1 | |
// CDC --> CSS2.1 | |
// INCLUDES ~= CSS2.1 | |
// DASHMATCH |= CSS2.1 | |
// PREFIXMATCH ^= CSS3:Selectors | |
// SUFFIXMATCH $= CSS3:Selectors | |
// SUBSTRINGMATCH *= CSS3:Selectors | |
// STRING {string} CSS2.1 | |
// BAD_STRING {badstring} CSS2.1 | |
// URI {uri} CSS2.1 | |
// BAD_URI {baduri} CSS2.1 | |
// IDENT {ident} CSS2.1 | |
// HASH #{name} CSS2.1 | |
// CHARSET_SYM @charset (+trailing space) CSS2.1 | |
// IMPORT_SYM @import CSS2.1 | |
// MEDIA_SYM @media CSS2.1 | |
// FONT_FACE_SYM @font-face CSS3:Syntax | |
// NAMESPACE_SYM @namespace CSS3:Syntax | |
// PAGE_SYM @page CSS2.1 | |
// KEYFRAMES_SYM @keyframes CSS3:Animations | |
// ATKEYWORD @{ident} CSS3:Syntax | |
// IMPORTANT_SYM {important} CSS2.1 | |
// LENGTH {num}(em|ex|ch|rem|vw|vh|vm|cm|mm|in|pt|pc|px) CSS3 Values | |
// ANGLE {num}(deg|rad|grad|turn) CSS3:Values | |
// TIME {num}(ms|s) CSS3 Values | |
// FREQ {num}(hz|khz) CSS3 Values | |
// RESOLUTION {num}(dpi|dpcm) CSS3:Media Queries | |
// DIMENSION {num}{ident} CSS2.1 | |
// PERCENTAGE {num}% CSS2.1 | |
// NUMBER {num} CSS2.1 | |
// FUNCTION {ident}( CSS2.1 | |
// PLUS {w}+ CSS3:Selectors | |
// GREATER {w}> CSS3:Selectors | |
// COMMA {w}, CSS3:Selectors | |
// TILDE {w}~ CSS3:Selectors | |
// NOT :not( CSS3:Selectors | |
// MEDIA_ONLY only CSS3:Media queries (as ONLY) | |
// MEDIA_NOT not CSS3:Media queries (as NOT) | |
// MEDIA_AND and CSS3:Media queries (as AND) | |
// UNICODE_RANGE {unicode_range} CSS3:Fonts | |
// CHAR Anything not matched by other tokens CSS2.1 | |
import StringScanner from "./string-scanner"; | |
export default class CSSTokenizer { | |
tokenize(cssText) { | |
let tokens = []; | |
let token; | |
let c; | |
// Normalize line endings. | |
cssText = cssText.replace(/\n\r?/g, "\n"); | |
this.scanner = new StringScanner(cssText); | |
this.currentTokenStart = [this.scanner.cursor, this.scanner.line, this.scanner.column]; | |
// Iterate over each character in cssText | |
while (c = this.scanner.read()) { | |
switch (c) { | |
// If the char is "/" then potential tokens are: | |
// - COMMENT "/* sample comment */" | |
// - BAD_COMMENT "/* sample unclosed comment" | |
// - CHAR "/" | |
case "/": | |
if (this.scanner.peek() === "*") { | |
token = this._getCommentToken(c); | |
} | |
else { | |
token = this._getCharToken(c); | |
} | |
break; | |
// If the char is either "|", "~", "^", "$", or "*" then potential tokens are: | |
// - DASHMATCH "|=" | |
// - INCLUDES "~=" | |
// - PREFIXMATCH "^=" | |
// - SUFFIXMATCH "$=" | |
// - SUBSTRINGMATCH "*=" | |
// - CHAR "|", "~", "^", "$", or "*" | |
case "|": | |
case "~": | |
case "^": | |
case "$": | |
case "*": | |
if (this.scanner.peek() === "=") { | |
token = this._getMatchToken(c); | |
} | |
else { | |
token = this._getCharToken(c); | |
} | |
break; | |
// If the char is either "'" (quote) or '"' (double quote) then potential tokens are: | |
// - STRING "sample string", 'sample string' | |
// - BAD_STRING "sample bad string, 'sample bad string | |
case '"': | |
case "'": | |
token = this._getStringToken(c); | |
break; | |
// If the char is "#" then potential tokens are: | |
// - HASH #header, #fff | |
// - CHAR # | |
case "#": | |
if (this._isNameChar(this.scanner.peek())) { | |
token = this._getHashToken(c); | |
} | |
else { | |
token = this._getCharToken(c); | |
} | |
break; | |
// If the char is "." then potential tokens are: | |
// - NUMBER .99, .75, .1 | |
// - PERCENTAGE .9%, .51% | |
// - DIMENSION .2gr | |
// - CHAR . | |
case ".": | |
if (this._isDigit(this.scanner.peek())) { | |
token = this._getNumberToken(c); | |
} | |
else { | |
token = this._getCharToken(c); | |
} | |
break; | |
// If the char is "-" then potential tokens are: | |
// - CDC | |
// - NUMBER | |
// - PERCENTAGE | |
// - DIMENSION | |
// - CHAR | |
case "-": | |
if (this.scanner.peek() === "-") { | |
token = this._getHtmlCommentEndToken(c); | |
} | |
else if (this._isDigit(this.scanner.peek())) { | |
token = this._getNumberToken(c); | |
} | |
else { | |
token = this._getCharToken(c); | |
} | |
break; | |
// If the char is "!" then potential tokens are: | |
// - IMPORTANT_SYM | |
// - CHAR | |
case "!": | |
token = this._getImportantToken(c); | |
break; | |
// If the char is @ then potential tokens are: | |
// - IMPORT_SYM "@import" | |
// - FONT_FACE_SYM "@font-face" | |
// - CHARSET_SYM "@charset" | |
// - NAMESPACE_SYM "@namespace" | |
// - PAGE_SYM "@page" | |
// - TOPLEFTCORNER_SYM "@top-left-corner" | |
// - TOPLEFT_SYM "@top-left" | |
// - TOPCENTER_SYM "@top-center" | |
// - TOPRIGHT_SYM "@top-right" | |
// - TOPRIGHTCORNER_SYM "@top-right-corner" | |
// - BOTTOMLEFTCORNER_SYM "@bottom-left-corner" | |
// - BOTTOMLEFT_SYM "@bottom-left" | |
// - BOTTOMCENTER_SYM "@bottom-center" | |
// - BOTTOMRIGHT_SYM "@bottom-right" | |
// - BOTTOMRIGHTCORNER_SYM "@bottom-right-corner" | |
// - LEFTTOP_SYM "@left-top" | |
// - LEFTMIDDLE_SYM "@left-middle" | |
// - LEFTBOTTOM_SYM "@right-bottom" | |
// - RIGHTTOP_SYM "@right-top" | |
// - RIGHTMIDDLE_SYM "@right-middle" | |
// - RIGHTBOTTOM_SYM "@right-bottom" | |
// - KEYFRAMES_SYM "@keyframes" | |
// - WEBKIT_KEYFRAMES_SYM "@-webkit-keyframes" | |
// - ATKEYWORD @{ident} | |
// - CHAR "@" | |
case "@": | |
token = this._getAtToken(c); | |
break; | |
// If the char is ":" then potential tokens are: | |
// - NOT | |
// - CHAR | |
case ":": | |
token = this._getNotToken(c); | |
break; | |
// If the char is "<" then potential tokens are: | |
// - CDO | |
// - CHAR | |
case "<": | |
token = this._getHtmlCommentStartToken(c); | |
break; | |
// If the char is either "U" or "u" followed by "+" then potential tokens are: | |
// - UNICODE_RANGE | |
// - CHAR | |
case "U": | |
case "u": | |
if (this.scanner.peek() === "+") { | |
token = this._getUnicodeRangeToken(c); | |
break; | |
} | |
default: | |
// If the char is a digit then potential tokens are: | |
// - NUMBER | |
// - DIMENSION | |
// - LENGTH | |
// - FREQ | |
// - TIME | |
// - EMS | |
// - EXS | |
// - ANGLE | |
if ( this._isDigit(c) ) { | |
token = this._getNumberToken(c); | |
break; | |
} | |
// If the char is a whitespace (e.g. single space or tab) then potential tokens are: | |
// - S | |
else if ( this._isWhitespace(c) ) { | |
token = this._getWhitespaceToken(c); | |
break; | |
} | |
// if the char could be an identifier start, then potential tokens are: | |
// - URI | |
// - FUNCTION | |
// - IDENT | |
// - MEDIA_ONLY | |
// - MEDIA_NOT | |
// - MEDIA_AND | |
// - CHAR | |
else if ( this._isIdentStart(c) ) { | |
token = this._getIdentToken(c); | |
break; | |
} | |
// If none of the previous conditions was met, get CHAR token | |
else { | |
token = this._getCharToken(c); | |
} | |
} // End of switch | |
// Add the new token to tokens array | |
tokens.push(token); | |
// Increment currentTokenStart counter | |
this.currentTokenStart = [this.scanner.cursor, this.scanner.line, this.scanner.column]; | |
} // End of while | |
return tokens; | |
} | |
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
_createToken(cssText, type) { | |
let token = { | |
cssText: cssText, | |
type: type, | |
start: this.currentTokenStart, | |
end: [this.scanner.cursor, this.scanner.line, this.scanner.column], | |
} | |
return token; | |
} | |
// @info | |
// Get COMMENT or BAD_COMMENT token. | |
// @type | |
// ("/") => Object | |
_getCommentToken(firstChar) { | |
let cssText = this._readComment(firstChar); | |
if (cssText[cssText.length-2] + cssText[cssText.length-1] === "*/") { | |
return this._createToken(cssText, "COMMENT"); | |
} | |
// if the cssText does not end with "*/" then this is a BAD_COMMENT | |
else { | |
return this._createToken(cssText, "BAD_COMMENT"); | |
} | |
} | |
// @info | |
// Get DASHMATCH, INCLUDES, PREFIXMATCH, SUFFIXMATCH or SUBSTRINGMATCH token. | |
// @type | |
// ("|" || "~" || "^" || "$" || "*") => Object | |
_getMatchToken(firstChar) { | |
let secondChar = this.scanner.read(); | |
let cssText = firstChar + secondChar; | |
switch (cssText) { | |
case "|=": | |
return this._createToken(cssText, "DASHMATCH"); | |
case "~=": | |
return this._createToken(cssText, "INCLUDES"); | |
case "^=": | |
return this._createToken(cssText, "PREFIXMATCH"); | |
case "$=": | |
return this._createToken(cssText, "SUFFIXMATCH"); | |
case "*=": | |
return this._createToken(cssText, "SUBSTRINGMATCH"); | |
} | |
} | |
// @info | |
// Get HASH token. | |
// @type | |
// ("#") => Object | |
_getHashToken(firstChar) { | |
let cssText = this._readName(firstChar); | |
return this._createToken(cssText, "HASH"); | |
} | |
// @info | |
// Get NUMBER, LENGTH, ANGLE, TIME, FREQ, DIMENSION or PERCENTAGE token | |
// @type | |
// ("0" || "1" || "2" || "3" || "4" || "5" || "6" || "7" || "8" || "9" || ".") => Object | |
_getNumberToken(firstChar) { | |
let number = this._readNumber(firstChar); | |
let c = this.scanner.peek(); | |
if (this._isIdentStart(c)) { | |
let unit = this._readName(this.scanner.read()); | |
if (/^em$|^ex$|^px$|^gd$|^rem$|^vw$|^vh$|^vm$|^vmin$|^vmax$|^q$|^ch$|^cm$|^mm$|^in$|^pt$|^pc$/i.test(unit)) { | |
return this._createToken(number + unit, "LENGTH"); | |
} | |
else if (/^deg$|^rad$|^grad$/i.test(unit)) { | |
return this._createToken(number + unit, "ANGLE"); | |
} | |
else if (/^ms$|^s$/i.test(unit)) { | |
return this._createToken(number + unit, "TIME"); | |
} | |
else if (/^hz$|^khz$/i.test(unit)) { | |
return this._createToken(number + unit, "FREQ"); | |
} | |
else if (/^dpi$|^dpcm$/i.test(unit)) { | |
return this._createToken(number + unit, "RESOLUTION"); | |
} | |
else { | |
return this._createToken(number + unit, "DIMENSION"); | |
} | |
} | |
else if (c === "%") { | |
let unit = this.scanner.read(); | |
return this._createToken(number + unit, "PERCENTAGE"); | |
} | |
else { | |
return this._createToken(number, "NUMBER"); | |
} | |
} | |
// @info | |
// Get one of the @ tokens. | |
// @type | |
// ("@") => Object | |
_getAtToken(firstChar) { | |
let ident = this._readName(); | |
let cssText = firstChar + ident; | |
// If there is no ident text then this is regular CHAR token | |
if (cssText === "@") { | |
return this._createToken(cssText, "CHAR"); | |
} | |
// If the cssText is exactly "@charset " then it's CHARSET_SYM token. | |
// Note that CHARSET_SYM is unusual - it must be written in lowercase and it must end with one space | |
else if (cssText + this.scanner.peek() === "@charset ") { | |
cssText += this.scanner.read(); | |
return this._createToken(cssText, "CHARSET_SYM"); | |
} | |
// Otherwise it must be one of the tokens below | |
else { | |
switch ( cssText.toLowerCase() ) { | |
case "@import": | |
return this._createToken(cssText, "IMPORT_SYM"); | |
break; | |
case "@page": | |
return this._createToken(cssText, "PAGE_SYM"); | |
break; | |
case "@media": | |
return this._createToken(cssText, "MEDIA_SYM"); | |
break; | |
case "@font-face": | |
return this._createToken(cssText, "FONT_FACE_SYM"); | |
break; | |
case "@namespace": | |
return this._createToken(cssText, "NAMESPACE_SYM"); | |
break; | |
case "@page": | |
return this._createToken(cssText, "PAGE_SYM"); | |
break; | |
case "@top-left-corner": | |
return this._createToken(cssText, "TOPLEFTCORNER_SYM"); | |
break; | |
case "@top-left": | |
return this._createToken(cssText, "TOPLEFT_SYM"); | |
break; | |
case "@top-center": | |
return this._createToken(cssText, "TOPCENTER_SYM"); | |
break; | |
case "@top-right": | |
return this._createToken(cssText, "TOPRIGHT_SYM"); | |
break; | |
case "@top-right-corner": | |
return this._createToken(cssText, "TOPRIGHTCORNER_SYM"); | |
break; | |
case "@bottom-left-corner": | |
return this._createToken(cssText, "BOTTOMLEFTCORNER_SYM"); | |
break; | |
case "@bottom-left": | |
return this._createToken(cssText, "BOTTOMLEFT_SYM"); | |
break; | |
case "@bottom-center": | |
return this._createToken(cssText, "BOTTOMCENTER_SYM"); | |
break; | |
case "@bottom-right": | |
return this._createToken(cssText, "BOTTOMRIGHT_SYM"); | |
break; | |
case "@bottom-right-corner": | |
return this._createToken(cssText, "BOTTOMRIGHTCORNER_SYM"); | |
break; | |
case "@left-top": | |
return this._createToken(cssText, "LEFTTOP_SYM"); | |
break; | |
case "@left-middle": | |
return this._createToken(cssText, "LEFTMIDDLE_SYM"); | |
break; | |
case "@right-bottom": | |
return this._createToken(cssText, "RIGHTBOTTOM_SYM"); | |
break; | |
case "@right-top": | |
return this._createToken(cssText, "RIGHTTOP_SYM"); | |
break; | |
case "@right-middle": | |
return this._createToken(cssText, "RIGHTMIDDLE_SYM"); | |
break; | |
case "@right-bottom": | |
return this._createToken(cssText, "RIGHTBOTTOM_SYM"); | |
break; | |
case "@keyframes": | |
return this._createToken(cssText, "KEYFRAMES_SYM"); | |
break; | |
case "@-webkit-keyframes": | |
return this._createToken(cssText, "KEYFRAMES_SYM"); | |
break; | |
case "@-moz-keyframes": | |
return this._createToken(cssText, "KEYFRAMES_SYM"); | |
break; | |
default: | |
return this._createToken(cssText, "ATKEYWORD"); | |
} | |
} | |
} | |
// @info | |
// Get STRING or BAD_STRING token. | |
// @type | |
// (`"` || `'`) => Object | |
_getStringToken(firstChar) { | |
let delimeter = firstChar; // Delimeter could be either '"' or "'" | |
let cssText = firstChar; | |
let previousChar = firstChar; | |
let c; | |
this.scanner.storePosition(); | |
while (true) { | |
c = this.scanner.read(); | |
cssText = cssText + c; | |
// If the delimiter is found and it is not escaped then return STRING | |
if (c === delimeter && previousChar !== "\\") { | |
return this._createToken(cssText, "STRING"); | |
} | |
// If there's a newline without an escapement then return BAD_STRING | |
else if (this._isNewline(this.scanner.peek()) && c !== "\\") { | |
return this._createToken(cssText, "BAD_STRING"); | |
} | |
// If there are no more chars this means that the string was never closed. Return BAD_STRING | |
else if (!c) { | |
return this._createToken(cssText, "BAD_STRING"); | |
} | |
previousChar = c; | |
} | |
} | |
// @info | |
// Get S token | |
// @type | |
// ("\u0009" || "\u000a" || "\u000c" || "\u000d" | "\u0020") => Object | |
_getWhitespaceToken(firstChar) { | |
let value = firstChar + this._readWhitespace(); | |
return this._createToken(value, "S"); | |
} | |
// @info | |
// Get CDO token. | |
// @type | |
// ("<") => Object | |
_getHtmlCommentStartToken(firstChar) { | |
let cssText = firstChar; | |
this.scanner.storePosition(); | |
cssText += this.scanner.read(3); | |
if (cssText === "<!--") { | |
return this._createToken(cssText, "CDO"); | |
} | |
else { | |
this.scanner.restorePosition(); | |
return this._createToken(firstChar, "CHAR"); | |
} | |
} | |
// @info | |
// Get CDC token. | |
// @type | |
// ("-") => Object | |
_getHtmlCommentEndToken(firstChar) { | |
let text = firstChar; | |
this.scanner.storePosition(); | |
text += this.scanner.read(2); | |
if (text === "-->") { | |
return this._createToken(text, "CDC"); | |
} | |
else { | |
this.scanner.restorePosition(); | |
return this._createToken(firstChar, "CHAR"); | |
} | |
} | |
// @info | |
// Get IDENT token. | |
// @type | |
// (string) => Object | |
_getIdentToken(firstChar) { | |
let cssText = this._readName(firstChar); | |
// If there's left parenthesis at the end of the IDENT then it is URI or FUNCTION | |
if (this.scanner.peek() === "(") { | |
cssText += this.scanner.read(); | |
if (cssText.toLowerCase() === "url(") { | |
cssText = this._readURI(cssText); | |
// If the URL was not valid | |
if (cssText.toLowerCase() === "url(") { | |
return this._createToken(cssText, "FUNCTION"); | |
} | |
else { | |
return this._createToken(cssText, "URI"); | |
} | |
} | |
else { | |
return this._createToken(cssText, "FUNCTION"); | |
} | |
} | |
// Otherwise | |
else { | |
if (cssText.toLowerCase() === "and") { | |
return this._createToken(cssText, "MEDIA_AND"); | |
} | |
else if (cssText.toLowerCase() === "only") { | |
return this._createToken(cssText, "MEDIA_ONLY"); | |
} | |
else if (cssText.toLowerCase() === "not") { | |
return this._createToken(cssText, "MEDIA_NOT"); | |
} | |
else { | |
return this._createToken(cssText, "IDENT"); | |
} | |
} | |
} | |
// @info | |
// Get IMPORTANT_SYM token. | |
// @type | |
// ("!") => Object | |
_getImportantToken(firstChar) { | |
let cssText = firstChar; | |
this.scanner.storePosition(); | |
let c = this.scanner.read(); | |
// If "!" is the last char in file then this can't be IMPORTANT_SYM token, return "!" CHAR | |
if (c === null) { | |
return this._createToken(firstChar, "CHAR"); | |
} | |
while (c) { | |
// There might be a comment between "!" and "important" | |
if (c === "/") { | |
// If the next char after "/" isn't a star, then this isn't a valid IMPORTANT_SYM token, so restore the | |
// scanner and return CHAR instead | |
if (this.scanner.peek() !== "*") { | |
this.scanner.restorePosition(); | |
return this._createToken(firstChar, "CHAR"); | |
} | |
// If comment is invalid get out of the loop, restore the scanner and return CHAR instead | |
else if (this._readComment(c) === "") { | |
this.scanner.restorePosition(); | |
return this._createToken(firstChar, "CHAR"); | |
} | |
} | |
// There might be whitespace between "!" and "important" | |
else if (this._isWhitespace(c)) { | |
cssText += c + this._readWhitespace(); | |
} | |
// If "i" char was found, check whether it's followed by "mportant" string and if so, return "IMPORTANT_SYM" | |
// token, otherwsie restore the scanner and return CHAR instead | |
else if (/i/i.test(c)) { | |
let temp = this.scanner.read(8); | |
if (/mportant/i.test(temp)) { | |
cssText += c + temp; | |
return this._createToken(cssText, "IMPORTANT_SYM"); | |
} | |
else { | |
this.scanner.restorePosition(); | |
return this._createToken(firstChar, "CHAR"); | |
} | |
} | |
// If the char is not a whitespace, comment or "i", restore the scanner and return CHAR | |
else { | |
this.scanner.restorePosition(); | |
return this._createToken(firstChar, "CHAR"); | |
} | |
// Otherwise get next char | |
c = this.scanner.read(); | |
} | |
} | |
// @info | |
// Get NOT token. | |
// @type | |
// (":") => Object | |
_getNotToken(firstChar) { | |
let cssText = firstChar; | |
this.scanner.storePosition(); | |
cssText += this.scanner.read(4); | |
if (cssText.toLowerCase() === ":not(") { | |
return this._createToken(cssText, "NOT"); | |
} | |
else { | |
this.scanner.restorePosition(); | |
return this._createToken(firstChar, "CHAR"); | |
} | |
} | |
// @info | |
// Get UNICODE_RANGE token. | |
// @type | |
// ("u" || "U") => Object | |
_getUnicodeRangeToken(firstChar) { | |
let cssText = firstChar; | |
// if the token is of type UNICODE_RANGE then the second char must be "+" | |
if (this.scanner.peek() === "+") { | |
this.scanner.storePosition(); | |
// Read the "+" char | |
cssText += this.scanner.read(); | |
// Read the first part of unicode range | |
cssText += this._readUnicodeRangePart(true); | |
// If readUnicodeRangePart() function returned empty string then it's not UNICODE_RANGE token - restore the | |
// scanner position and return CHAR token instead | |
if (cssText.length === 2) { | |
this.scanner.restorePosition(); | |
return this._createToken(cssText, "CHAR"); | |
} | |
else { | |
// If there is no "?" char in the first part and next char is "-", then there can be a second part of | |
// unicode range | |
if (cssText.indexOf("?") === -1) { | |
if (this.scanner.peek() === "-") { | |
this.scanner.storePosition(); | |
// Read the "-" char | |
let temp = this.scanner.read(); | |
// read the second part of unicode range | |
temp += this._readUnicodeRangePart(false); | |
// If readUnicodeRangePart() returned empty string for second part, back up and just take the first part | |
if (temp.length === 1) { | |
this.scanner.restorePosition(); | |
} | |
else { | |
cssText += temp; | |
} | |
} | |
} | |
return this._createToken(cssText, "UNICODE_RANGE"); | |
} | |
} | |
} | |
// @info | |
// Get CHAR token. | |
// @type | |
// (string) => void | |
_getCharToken(c) { | |
return this._createToken(c, "CHAR"); | |
} | |
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
_readWhitespace() { | |
let whitespace = ""; | |
let c = this.scanner.peek(); | |
while (this._isWhitespace(c)) { | |
this.scanner.read(); | |
whitespace += c; | |
c = this.scanner.peek(); | |
} | |
return whitespace; | |
} | |
_readComment(first) { | |
let comment = first || ""; | |
let c = this.scanner.read(); | |
if (c === "*") { | |
while (c) { | |
comment += c; | |
// look for end of comment | |
if (c === "*" && this.scanner.peek() === "/") { | |
comment += this.scanner.read(); | |
break; | |
} | |
c = this.scanner.read(); | |
} | |
return comment; | |
} | |
else { | |
return ""; | |
} | |
} | |
_readNumber(first) { | |
let number = first; | |
let hasDot = (first === "."); | |
let c = this.scanner.peek(); | |
while (c) { | |
if (this._isDigit(c)) { | |
number += this.scanner.read(); | |
} | |
else if (c === ".") { | |
if (hasDot) { | |
break; | |
} | |
else { | |
hasDot = true; | |
number += this.scanner.read(); | |
} | |
} | |
else { | |
break; | |
} | |
c = this.scanner.peek(); | |
} | |
return number; | |
} | |
_readString() { | |
let delim = this.scanner.read(); | |
let cssText = delim; | |
let prev = delim; | |
let c = this.scanner.peek(); | |
while (c) { | |
c = this.scanner.read(); | |
cssText += c; | |
// If the delimiter is found with an escapement, we're done. | |
if (c === delim && prev != "\\") { | |
break; | |
} | |
// If there's a newline without an escapement, it's an invalid string | |
if (this._isNewline(this.scanner.peek()) && c != "\\") { | |
cssText = ""; | |
break; | |
} | |
// Save previous and get next | |
prev = c; | |
c = this.scanner.peek(); | |
} | |
// If c is null, that means we're out of input and the string was never closed | |
if (c === null) { | |
cssText = ""; | |
} | |
return cssText; | |
} | |
_readURI(first) { | |
let uri = first; | |
let inner = ""; | |
let c = this.scanner.peek(); | |
this.scanner.storePosition(); | |
// Skip whitespace before | |
while (c && this._isWhitespace(c)) { | |
this.scanner.read(); | |
c = this.scanner.peek(); | |
} | |
// It's a string | |
if (c === "'" || c === '"') { | |
inner = this._readString(); | |
} | |
else { | |
inner = this._readURL(); | |
} | |
c = this.scanner.peek(); | |
// Skip whitespace after | |
while (c && this._isWhitespace(c)) { | |
this.scanner.read(); | |
c = this.scanner.peek(); | |
} | |
// If there was no inner value or the next character isn't closing paren, it's not a URI | |
if (inner === "" || c !== ")") { | |
uri = first; | |
this.scanner.restorePosition(); | |
} | |
else { | |
uri += inner + this.scanner.read(); | |
} | |
return uri; | |
} | |
_readURL() { | |
let url = ""; | |
let c = this.scanner.peek(); | |
while (/^[!#$%&\\*-~]$/.test(c)) { | |
url += this.scanner.read(); | |
c = this.scanner.peek(); | |
} | |
return url; | |
} | |
_readName(firstChar) { | |
let ident = firstChar || ""; | |
let c = this.scanner.peek(); | |
while (true) { | |
if (c === "\\"){ | |
ident += this._readEscape(this.scanner.read()); | |
c = this.scanner.peek(); | |
} | |
else if (c && this._isNameChar(c)){ | |
ident += this.scanner.read(); | |
c = this.scanner.peek(); | |
} | |
else { | |
break; | |
} | |
} | |
return ident; | |
} | |
_readEscape(firstChar){ | |
let cssEscape = firstChar || ""; | |
let i = 0; | |
let c = this.scanner.peek(); | |
if (this._isHexDigit(c)){ | |
do { | |
cssEscape += this.scanner.read(); | |
c = this.scanner.peek(); | |
} | |
while (c && this._isHexDigit(c) && ++i < 6); | |
} | |
if (cssEscape.length === 3 && /\s/.test(c) || cssEscape.length === 7 || cssEscape.length == 1) { | |
this.scanner.read(); | |
} | |
else { | |
c = ""; | |
} | |
return cssEscape + c; | |
} | |
_readUnicodeRangePart(allowQuestionMark) { | |
let part = ""; | |
let c = this.scanner.peek(); | |
// First read hex digits | |
while (this._isHexDigit(c) && part.length < 6) { | |
this.scanner.read(); | |
part += c; | |
c = this.scanner.peek(); | |
} | |
// Then read question marks if allowed | |
if (allowQuestionMark) { | |
while (c === "?" && part.length < 6) { | |
this.scanner.read(); | |
part += c; | |
c = this.scanner.peek(); | |
} | |
} | |
// There can't be any other characters after this point | |
return part; | |
} | |
///////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
// @info | |
// Check if character could be a hexadecimal digit (0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f). | |
// @type | |
// (string) => boolean | |
_isHexDigit(c) { | |
let hex = /^[0-9a-fA-F]$/; | |
return hex.test(c); | |
} | |
// @info | |
// Check if character could be a decimal digit (0,1,2,3,4,5,6,7,8,9). | |
// @type | |
// (string) => boolean | |
_isDigit(c) { | |
let digit = /\d/; | |
return digit.test(c); | |
} | |
// @info | |
// Check if character is a whitespace sign (e.g. space or tab). | |
// @type | |
// (string) => boolean | |
_isWhitespace(c) { | |
let whitespace = /\u0009|\u000a|\u000c|\u000d|\u0020/; | |
return whitespace.test(c); | |
} | |
// @info | |
// Check if character is a new line sign. | |
// @type | |
// (string) => boolean | |
_isNewline(c) { | |
let newline = /\u000a|\u000d\u000a|\u000d|\u000c/; | |
return newline.test(c); | |
} | |
// @info | |
// Check if character could be used as the first char of the name. | |
// @type | |
// (string) => boolean | |
_isNameStart(c) { | |
let namestart = /[a-z_\u0080-\uFFFF\\]/i; | |
return namestart.test(c); | |
} | |
// @info | |
// Check if char could be used inside name. | |
// @type | |
// (string) => boolean | |
_isNameChar(c) { | |
return c && (this._isNameStart(c) || /[0-9\-]/.test(c)); | |
} | |
// @info | |
// Check if char could be used s the first char of an identifier. | |
// @type | |
// (string) => boolean | |
_isIdentStart(c) { | |
return c && (this._isNameStart(c) || c === "-"); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment