Last active
January 5, 2021 18:22
-
-
Save dwighthouse/4999a379f3d7afc9d1c264683cfbb806 to your computer and use it in GitHub Desktop.
Second attempt at high quality SVG Path Parser. Should handle all valid and invalid SVG paths with a single pass, without modifying any of the original data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const CommandData = { | |
M: { | |
name: 'MoveTo', | |
chunk: 2, | |
}, | |
L: { | |
name: 'LineTo', | |
chunk: 2, | |
}, | |
H: { | |
name: 'HorizontalLineTo', | |
chunk: 1, | |
}, | |
V: { | |
name: 'VerticalLineTo', | |
chunk: 1, | |
}, | |
C: { | |
name: 'CubicBézieCurve', | |
chunk: 6, | |
}, | |
S: { | |
name: 'SmoothCubicBézieCurve', | |
chunk: 4, | |
}, | |
Q: { | |
name: 'QuadraticBézieCurve', | |
chunk: 4, | |
}, | |
T: { | |
name: 'SmoothQuadraticBézieCurve', | |
chunk: 2, | |
}, | |
A: { | |
name: 'EllipticalArc', | |
chunk: 7, | |
}, | |
Z: { | |
name: 'ClosePath', | |
chunk: 0, | |
}, | |
}; | |
const characters = '\t\n\r +,-.0123456789ACEHLMQSTVZacehlmqstvz'; | |
const char_types = 'wwwws,s.ddddddddddcceccccccccccecccccccc'; | |
// TODO: pop characters from front as they are encountered, remove indexof | |
const type_lookup = []; | |
for (let i = 0; i < 128; i += 1) { | |
type_lookup[i] = char_types[characters.indexOf(String.fromCharCode(i))]; | |
} | |
class ParseError extends Error { | |
constructor(message) { | |
super(message) | |
this.name = "ParseError" | |
} | |
} | |
const isFlag = (data, char, type) => { | |
if (data.length === 0) { | |
return false; | |
} | |
const last_item = data[data.length - 1]; | |
if (last_item.c !== 'A' && last_item.c !== 'a') { | |
return false; | |
} | |
const data_modulus = last_item.n.length % 7 | |
if (data_modulus !== 3 && data_modulus !== 4) { | |
return false; | |
} | |
if (type === ',' || type === 'w') { | |
return false; | |
} | |
if (type !== 'd') { | |
throw new ParseError(`Invalid character used at ${CommandData.A.name} flag position: ${JSON.stringify(char)}`); | |
} | |
if (char !== '0' && char !== '1') { | |
throw new ParseError(`Invalid flag used in ${CommandData.A.name}: ${JSON.stringify(char)}`); | |
} | |
return true; | |
}; | |
const validateChunkSize = (data) => { | |
if (data.length === 0) { | |
return; | |
} | |
const last_item = data[data.length - 1]; | |
const last_command_key = last_item.c.toUpperCase(); | |
// Z should have 0 numbers, H and V can have N because they have a chunk size of 1 | |
if (CommandData[last_command_key].chunk < 2) { | |
return; | |
} | |
if (last_item.n.length % CommandData[last_command_key].chunk !== 0) { | |
throw new ParseError(`Invalid quanity of numbers in Command ${CommandData[last_command_key].name} ("${last_item.c}"); must be a multiple of ${CommandData[last_command_key].chunk}. Instead found ${last_item.n.length}: "${last_item.c} ${last_item.n.join(' ')}"`); | |
} | |
}; | |
// Fully parses and verifies the path without modifying it or changing its format | |
// Path Data Grammar: https://www.w3.org/TR/SVG11/paths.html#PathData | |
const parsePath = (path) => { | |
let working_number = ''; | |
let working_number_dot_or_e = false; | |
let prev_type = ''; | |
let prev_non_whitespace = ''; | |
let prev_non_whitespace_type = ''; | |
let prev_non_whitespace_index = 0; | |
const data = []; | |
const tryFinishNumber = (number) => { | |
if (number !== '') { | |
data[data.length - 1].n.push(Number(number)); | |
working_number = ''; | |
working_number_dot_or_e = false; | |
} | |
}; | |
for (let p = 0; p < path.length; p += 1) { | |
const char = path[p]; | |
const type = type_lookup[char.charCodeAt(0)]; | |
// Basic Path Text Validation | |
// Validate path text at a basic level, mostly ensuring valid number syntax | |
if (!type) { | |
throw new ParseError(`Invalid character used in path: ${JSON.stringify(char)}`); | |
} | |
if (prev_type === '.' && type !== 'd') { | |
throw new ParseError(`Invalid character followed Period: ${JSON.stringify(path.substring(p - 1, p + 1))}`); | |
} | |
if (prev_type === 'e' && (type !== 's' && type !== 'd')) { | |
throw new ParseError(`Invalid character followed Exponential: ${JSON.stringify(path.substring(p - 1, p + 1))}`); | |
} | |
if (prev_type === 's' && (type !== '.' && type !== 'd')) { | |
throw new ParseError(`Invalid character followed Sign: ${JSON.stringify(path.substring(p - 1, p + 1))}`); | |
} | |
if (prev_type === 'w' && type === 'e') { | |
throw new ParseError(`Invalid character followed Whitespace: ${JSON.stringify(path.substring(p - 1, p + 1))}`); | |
} | |
if (prev_non_whitespace === ',' && (type === ',' || type === 'e')) { | |
throw new ParseError(`Invalid character(s) followed Comma: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`); | |
} | |
// If at the beginning of the path | |
if (prev_non_whitespace === '' && (char !== 'M' && char !== 'm' && type !== 'w')) { | |
throw new ParseError(`Path must begin with a ${CommandData.M.name} Command: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`); | |
} | |
const is_previous_non_whitespace_Z = prev_non_whitespace === 'Z' || prev_non_whitespace === 'z'; | |
if (is_previous_non_whitespace_Z && (type !== 'c' && type !== 'w')) { | |
throw new ParseError(`Invalid character(s) following ${CommandData.Z.name} Command: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`); | |
} | |
if ((prev_non_whitespace_type === 'c' && !is_previous_non_whitespace_Z) && (type === ',' || type === 'e' || type === 'c')) { | |
throw new ParseError(`Invalid character(s) followed Command: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`); | |
} | |
// If at the end of the path | |
if (p === path.length - 1) { | |
// Weird quirk: Commas are legal at the end of the path | |
// Except when following a Z or z Command (and optional whitespace) | |
// This would be caught by the above rule specifically for Z Commands | |
if (type === 'w') { | |
if (prev_non_whitespace === '') { | |
// The entire path was whitespace, return no data | |
return []; | |
} | |
if (prev_non_whitespace !== 'Z' && prev_non_whitespace !== 'z' && prev_non_whitespace_type !== ',' && prev_non_whitespace_type !== 'd') { | |
throw new ParseError(`Invalid character(s) at the end of the path: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`); | |
} | |
} | |
else { | |
if (char !== 'Z' && char !== 'z' && type !== ',' && type !== 'd') { | |
throw new ParseError(`Invalid character(s) at the end of the path: ${JSON.stringify(path.substring(p, p + 1))}`); | |
} | |
} | |
} | |
// Do something with the data | |
// More detailed SVG validation is also done here | |
if (isFlag(data, char, type)) { | |
tryFinishNumber(char); | |
} | |
else if (type === 's') { | |
if (prev_type !== 'e') { | |
tryFinishNumber(working_number); | |
} | |
working_number += char; | |
} | |
else if (type === 'd') { | |
working_number += char; | |
} | |
else if (type === '.') { | |
if (working_number_dot_or_e) { | |
tryFinishNumber(working_number); | |
} | |
working_number += char; | |
working_number_dot_or_e = true; | |
} | |
else if (type === 'e') { | |
working_number += char; | |
working_number_dot_or_e = true; | |
} | |
else if (type === 'c') { | |
tryFinishNumber(working_number); | |
validateChunkSize(data); | |
data.push({ c: char, n: [] }); | |
} | |
else { | |
// Else occurs for whitespace or comma | |
tryFinishNumber(working_number); | |
} | |
// Prep for next loop | |
// Retain extra data to avoid unnecessary calculations in subsequent loops | |
prev_type = type; | |
if (type !== 'w') { | |
prev_non_whitespace = char; | |
prev_non_whitespace_type = type; | |
prev_non_whitespace_index = p; | |
} | |
} | |
// Finish out any remaining working number and validate final chunks | |
tryFinishNumber(working_number); | |
validateChunkSize(data); | |
return data; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment