Skip to content

Instantly share code, notes, and snippets.

@dwighthouse
Last active January 5, 2021 18:22
Show Gist options
  • Save dwighthouse/4999a379f3d7afc9d1c264683cfbb806 to your computer and use it in GitHub Desktop.
Save dwighthouse/4999a379f3d7afc9d1c264683cfbb806 to your computer and use it in GitHub Desktop.
Second attempt at high quality SVG Path Parser. Should handle all valid and invalid SVG paths with a single pass, without modifying any of the original data.
const CommandData = {
M: {
name: 'MoveTo',
chunk: 2,
},
L: {
name: 'LineTo',
chunk: 2,
},
H: {
name: 'HorizontalLineTo',
chunk: 1,
},
V: {
name: 'VerticalLineTo',
chunk: 1,
},
C: {
name: 'CubicBézieCurve',
chunk: 6,
},
S: {
name: 'SmoothCubicBézieCurve',
chunk: 4,
},
Q: {
name: 'QuadraticBézieCurve',
chunk: 4,
},
T: {
name: 'SmoothQuadraticBézieCurve',
chunk: 2,
},
A: {
name: 'EllipticalArc',
chunk: 7,
},
Z: {
name: 'ClosePath',
chunk: 0,
},
};
const characters = '\t\n\r +,-.0123456789ACEHLMQSTVZacehlmqstvz';
const char_types = 'wwwws,s.ddddddddddcceccccccccccecccccccc';
// TODO: pop characters from front as they are encountered, remove indexof
const type_lookup = [];
for (let i = 0; i < 128; i += 1) {
type_lookup[i] = char_types[characters.indexOf(String.fromCharCode(i))];
}
class ParseError extends Error {
constructor(message) {
super(message)
this.name = "ParseError"
}
}
const isFlag = (data, char, type) => {
if (data.length === 0) {
return false;
}
const last_item = data[data.length - 1];
if (last_item.c !== 'A' && last_item.c !== 'a') {
return false;
}
const data_modulus = last_item.n.length % 7
if (data_modulus !== 3 && data_modulus !== 4) {
return false;
}
if (type === ',' || type === 'w') {
return false;
}
if (type !== 'd') {
throw new ParseError(`Invalid character used at ${CommandData.A.name} flag position: ${JSON.stringify(char)}`);
}
if (char !== '0' && char !== '1') {
throw new ParseError(`Invalid flag used in ${CommandData.A.name}: ${JSON.stringify(char)}`);
}
return true;
};
const validateChunkSize = (data) => {
if (data.length === 0) {
return;
}
const last_item = data[data.length - 1];
const last_command_key = last_item.c.toUpperCase();
// Z should have 0 numbers, H and V can have N because they have a chunk size of 1
if (CommandData[last_command_key].chunk < 2) {
return;
}
if (last_item.n.length % CommandData[last_command_key].chunk !== 0) {
throw new ParseError(`Invalid quanity of numbers in Command ${CommandData[last_command_key].name} ("${last_item.c}"); must be a multiple of ${CommandData[last_command_key].chunk}. Instead found ${last_item.n.length}: "${last_item.c} ${last_item.n.join(' ')}"`);
}
};
// Fully parses and verifies the path without modifying it or changing its format
// Path Data Grammar: https://www.w3.org/TR/SVG11/paths.html#PathData
const parsePath = (path) => {
let working_number = '';
let working_number_dot_or_e = false;
let prev_type = '';
let prev_non_whitespace = '';
let prev_non_whitespace_type = '';
let prev_non_whitespace_index = 0;
const data = [];
const tryFinishNumber = (number) => {
if (number !== '') {
data[data.length - 1].n.push(Number(number));
working_number = '';
working_number_dot_or_e = false;
}
};
for (let p = 0; p < path.length; p += 1) {
const char = path[p];
const type = type_lookup[char.charCodeAt(0)];
// Basic Path Text Validation
// Validate path text at a basic level, mostly ensuring valid number syntax
if (!type) {
throw new ParseError(`Invalid character used in path: ${JSON.stringify(char)}`);
}
if (prev_type === '.' && type !== 'd') {
throw new ParseError(`Invalid character followed Period: ${JSON.stringify(path.substring(p - 1, p + 1))}`);
}
if (prev_type === 'e' && (type !== 's' && type !== 'd')) {
throw new ParseError(`Invalid character followed Exponential: ${JSON.stringify(path.substring(p - 1, p + 1))}`);
}
if (prev_type === 's' && (type !== '.' && type !== 'd')) {
throw new ParseError(`Invalid character followed Sign: ${JSON.stringify(path.substring(p - 1, p + 1))}`);
}
if (prev_type === 'w' && type === 'e') {
throw new ParseError(`Invalid character followed Whitespace: ${JSON.stringify(path.substring(p - 1, p + 1))}`);
}
if (prev_non_whitespace === ',' && (type === ',' || type === 'e')) {
throw new ParseError(`Invalid character(s) followed Comma: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`);
}
// If at the beginning of the path
if (prev_non_whitespace === '' && (char !== 'M' && char !== 'm' && type !== 'w')) {
throw new ParseError(`Path must begin with a ${CommandData.M.name} Command: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`);
}
const is_previous_non_whitespace_Z = prev_non_whitespace === 'Z' || prev_non_whitespace === 'z';
if (is_previous_non_whitespace_Z && (type !== 'c' && type !== 'w')) {
throw new ParseError(`Invalid character(s) following ${CommandData.Z.name} Command: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`);
}
if ((prev_non_whitespace_type === 'c' && !is_previous_non_whitespace_Z) && (type === ',' || type === 'e' || type === 'c')) {
throw new ParseError(`Invalid character(s) followed Command: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`);
}
// If at the end of the path
if (p === path.length - 1) {
// Weird quirk: Commas are legal at the end of the path
// Except when following a Z or z Command (and optional whitespace)
// This would be caught by the above rule specifically for Z Commands
if (type === 'w') {
if (prev_non_whitespace === '') {
// The entire path was whitespace, return no data
return [];
}
if (prev_non_whitespace !== 'Z' && prev_non_whitespace !== 'z' && prev_non_whitespace_type !== ',' && prev_non_whitespace_type !== 'd') {
throw new ParseError(`Invalid character(s) at the end of the path: ${JSON.stringify(path.substring(prev_non_whitespace_index, p + 1))}`);
}
}
else {
if (char !== 'Z' && char !== 'z' && type !== ',' && type !== 'd') {
throw new ParseError(`Invalid character(s) at the end of the path: ${JSON.stringify(path.substring(p, p + 1))}`);
}
}
}
// Do something with the data
// More detailed SVG validation is also done here
if (isFlag(data, char, type)) {
tryFinishNumber(char);
}
else if (type === 's') {
if (prev_type !== 'e') {
tryFinishNumber(working_number);
}
working_number += char;
}
else if (type === 'd') {
working_number += char;
}
else if (type === '.') {
if (working_number_dot_or_e) {
tryFinishNumber(working_number);
}
working_number += char;
working_number_dot_or_e = true;
}
else if (type === 'e') {
working_number += char;
working_number_dot_or_e = true;
}
else if (type === 'c') {
tryFinishNumber(working_number);
validateChunkSize(data);
data.push({ c: char, n: [] });
}
else {
// Else occurs for whitespace or comma
tryFinishNumber(working_number);
}
// Prep for next loop
// Retain extra data to avoid unnecessary calculations in subsequent loops
prev_type = type;
if (type !== 'w') {
prev_non_whitespace = char;
prev_non_whitespace_type = type;
prev_non_whitespace_index = p;
}
}
// Finish out any remaining working number and validate final chunks
tryFinishNumber(working_number);
validateChunkSize(data);
return data;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment