Last active
August 7, 2022 15:35
-
-
Save sfengyuan/473e626dff10914ea274e4b6efa6e6f5 to your computer and use it in GitHub Desktop.
clean bad Unicode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
date: 2022/8/7 23:34 | |
*/ | |
export default function (str) { | |
const garbageSpaces = [ | |
'\u00A0', // No-Break Space | |
'\u3000', // ideographic space | |
'\u2000', // En quad space | |
'\u2001', // Em quad space | |
'\u2002', // En space | |
'\u2003', // Em space | |
'\u2004', // 3/em space | |
'\u2005', // 4/em space | |
'\u2006', // 6/em space | |
'\u2007', // figure space | |
'\u2008', // punctuation space | |
'\u2009', // thin space | |
'\u200A', // hair space | |
'\u200B', // zero width space | |
'\u202F', // narrow no break space | |
'\u205F', // math space | |
'\u303F', // ideographic half space | |
'\uFEFF', // zero width No Break Space | |
'\u{E0020}' // tag space | |
] | |
const ASCIIVariants = [ | |
'\uFF01!', | |
'\uFF02"', | |
'\uFF03#', | |
'\uFF04$', | |
'\uFF05%', | |
'\uFF06&', | |
'\uFF07\'', | |
'\uFF08(', | |
'\uFF09)', | |
'\uFF0A*', | |
'\uFF0B+', | |
'\uFF0C,', | |
'\uFF0D-', | |
'\uFF0E.', | |
'\uFF0F/', | |
'\uFF100', | |
'\uFF111', | |
'\uFF122', | |
'\uFF133', | |
'\uFF144', | |
'\uFF155', | |
'\uFF166', | |
'\uFF177', | |
'\uFF188', | |
'\uFF199', | |
'\uFF1A:', | |
'\uFF1B;', | |
'\uFF1C<', | |
'\uFF1D=', | |
'\uFF1E>', | |
'\uFF1F?', | |
'\uFF20@', | |
'\uFF21A', | |
'\uFF22B', | |
'\uFF23C', | |
'\uFF24D', | |
'\uFF25E', | |
'\uFF26F', | |
'\uFF27G', | |
'\uFF28H', | |
'\uFF29I', | |
'\uFF2AJ', | |
'\uFF2BK', | |
'\uFF2CL', | |
'\uFF2DM', | |
'\uFF2EN', | |
'\uFF2FO', | |
'\uFF30P', | |
'\uFF31Q', | |
'\uFF32R', | |
'\uFF33S', | |
'\uFF34T', | |
'\uFF35U', | |
'\uFF36V', | |
'\uFF37W', | |
'\uFF38X', | |
'\uFF39Y', | |
'\uFF3AZ', | |
'\uFF3B[', | |
'\uFF3C\\', | |
'\uFF3D]', | |
'\uFF3E^', | |
'\uFF3F_', | |
'\uFF40`', | |
'\uFF41a', | |
'\uFF42b', | |
'\uFF43c', | |
'\uFF44d', | |
'\uFF45e', | |
'\uFF46f', | |
'\uFF47g', | |
'\uFF48h', | |
'\uFF49i', | |
'\uFF4Aj', | |
'\uFF4Bk', | |
'\uFF4Cl', | |
'\uFF4Dm', | |
'\uFF4En', | |
'\uFF4Fo', | |
'\uFF50p', | |
'\uFF51q', | |
'\uFF52r', | |
'\uFF53s', | |
'\uFF54t', | |
'\uFF55u', | |
'\uFF56v', | |
'\uFF57w', | |
'\uFF58x', | |
'\uFF59y', | |
'\uFF5Az', | |
'\uFF5B{', | |
'\uFF5C|', | |
'\uFF5D}', | |
'\uFF5E~', | |
] | |
garbageSpaces.forEach(s => { str = str.replaceAll(s, ' ') }) | |
ASCIIVariants.forEach(c => { str = str.replaceAll(c[0], c[1]) }) | |
return str | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment