Created
October 19, 2022 13:43
-
-
Save minodisk/3c3b7d9cda0cd5da49c5e45f065f73e7 to your computer and use it in GitHub Desktop.
Adds row and column information to tokens created using the tokenizer for BigQuery implemented in sql-formatter.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { formatters } from "sql-formatter"; | |
import type { Token as OrigToken } from "sql-formatter/lib/src/lexer/token"; | |
export type Token = OrigToken & { | |
line: number; | |
column: number; | |
}; | |
export const createParser = () => { | |
const tokenizer = formatters.bigquery.prototype.tokenizer(); | |
const breaks = (precedingWhitespace: string) => { | |
let newLines = 0; | |
let charactersBeforeLastLine = 0; | |
for (let i = 0; i < precedingWhitespace.length; i++) { | |
const char = precedingWhitespace[i]; | |
if (char === "\n") { | |
newLines += 1; | |
charactersBeforeLastLine = i + 1; | |
} | |
} | |
return { newLines, charactersBeforeLastLine }; | |
}; | |
return { | |
parse(query: string): Array<Token> { | |
const tokens = tokenizer.tokenize(query, {}); | |
let line = 0; | |
let caret = 0; | |
let lineStart = 0; | |
return tokens.map((token) => { | |
if (!token.precedingWhitespace) { | |
const t = { | |
...token, | |
line, | |
column: token.start - lineStart, | |
}; | |
caret += token.raw.length; | |
return t; | |
} | |
const { newLines, charactersBeforeLastLine } = breaks( | |
token.precedingWhitespace | |
); | |
line += newLines; | |
if (newLines) { | |
lineStart = caret + charactersBeforeLastLine; | |
} | |
const t = { | |
...token, | |
line, | |
column: token.start - lineStart, | |
}; | |
caret += token.precedingWhitespace.length + token.raw.length; | |
return t; | |
}); | |
}, | |
}; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment