Created
September 29, 2021 16:33
-
-
Save thoughtsunificator/5fea511e211a8fad17035054a2d43e1a to your computer and use it in GitHub Desktop.
URI Tokenizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export const STATE_IDENTIFYING = "STATE_IDENTIFYING" | |
export const STATE_TOKENIZING_NORMAL = "STATE_TOKENIZING_NORMAL" | |
export const STATE_TOKENIZING_SPECIAL = "STATE_TOKENIZING_SPECIAL" | |
export const TOKEN_SEPARATOR = "/" | |
export const TOKEN_SPECIAL_PREFIX = "{" | |
export const TOKEN_SPECIAL_SUFFIX = "}" | |
export const TOKEN_TYPE_NORMAL = "normal" | |
export const TOKEN_TYPE_SPECIAL = "special" | |
export const TOKEN_TYPE_SEPARATOR = "separator" | |
/** | |
* Create tokens out of an URI | |
* @param [string] text [description] | |
* @return [array] [description] | |
*/ | |
export const tokenize = function(text) { | |
const tokens = [] | |
const characters = [...text] | |
let state = STATE_IDENTIFYING | |
let token = { | |
"type": null, | |
"buffer": "", | |
"bufferIndex": null | |
} | |
for (const [index, character] of characters.entries()) { | |
if (state === STATE_IDENTIFYING) { | |
if (character === TOKEN_SEPARATOR) { | |
tokens.push({ | |
"type": TOKEN_TYPE_SEPARATOR, | |
"buffer": character, | |
"bufferIndex": index | |
}) | |
} else if (index === characters.length - 1) { | |
token.buffer += character | |
token.type = TOKEN_TYPE_NORMAL | |
tokens.push({...token}) | |
} else if (character === TOKEN_SPECIAL_PREFIX) { | |
token.bufferIndex = index | |
token.buffer += character | |
state = STATE_TOKENIZING_SPECIAL | |
} else { | |
token.bufferIndex = index | |
token.buffer += character | |
state = STATE_TOKENIZING_NORMAL | |
} | |
} else if (state === STATE_TOKENIZING_NORMAL) { | |
if (character === TOKEN_SEPARATOR) { | |
token.type = TOKEN_TYPE_NORMAL | |
tokens.push({...token}) | |
tokens.push({ | |
"type": TOKEN_TYPE_SEPARATOR, | |
"buffer": character, | |
"bufferIndex": index | |
}) | |
token.type = null | |
token.buffer = "" | |
token.bufferIndex = null | |
state = STATE_IDENTIFYING | |
} else if (index === characters.length - 1) { | |
token.buffer += character | |
token.type = TOKEN_TYPE_NORMAL | |
tokens.push({...token}) | |
} else if (character === TOKEN_SPECIAL_PREFIX) { | |
if (token.buffer === TOKEN_SEPARATOR) { | |
token.type = TOKEN_TYPE_SEPARATOR | |
} else { | |
token.type = TOKEN_TYPE_NORMAL | |
} | |
tokens.push({...token}) | |
// reset token | |
token.buffer = character | |
token.type = null | |
token.bufferIndex = index | |
state = STATE_TOKENIZING_SPECIAL | |
} else { | |
token.buffer += character | |
} | |
} else if (state === STATE_TOKENIZING_SPECIAL) { | |
if (character === TOKEN_SPECIAL_SUFFIX) { | |
token.buffer += character | |
token.type = TOKEN_TYPE_SPECIAL | |
tokens.push({...token}) | |
// reset token | |
token.buffer = "" | |
token.type = null | |
token.bufferIndex = null | |
state = STATE_IDENTIFYING | |
} else if (index === characters.length - 1) { | |
token.buffer += character | |
token.type = TOKEN_TYPE_NORMAL | |
tokens.push({...token}) | |
} else { | |
token.buffer += character | |
} | |
} | |
} | |
return tokens | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment