Last active
May 10, 2025 12:45
-
-
Save fabiolimace/495bd143e9add13a4656a52228d5bfe3 to your computer and use it in GitHub Desktop.
Corretor de caracteres especiais - ISO-8859-1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html> | |
| <!-- Yes, I know the jokes about long if-else chains. I just don't feel like using a hash table here. --> | |
| <head> | |
| <title>Corretor de Caracteres Especiais - v20220721</title> | |
| <meta charset="UTF-8"> | |
| <meta name="author" content="Fabio Lima"> | |
| <meta name="keywords" content="Codificação, Caracteres, ISO-8859-1, UTF-8, WINDOWS-1252"> | |
| <meta name="description" content="Sustitui caracteres invalidos por caracteres equivalentes"> | |
| <script> | |
| var entities = [["&", "\u0026"], ["<", "\u003C"], [">", "\u003E"], [" ", "\u00A0"], ["¡", "\u00A1"], ["¢", "\u00A2"], ["£", "\u00A3"], ["¤", "\u00A4"], ["¥", "\u00A5"], ["¦", "\u00A6"], ["§", "\u00A7"], ["¨", "\u00A8"], ["©", "\u00A9"], ["ª", "\u00AA"], ["«", "\u00AB"], ["¬", "\u00AC"], ["­", "\u00AD"], ["®", "\u00AE"], ["¯", "\u00AF"], ["°", "\u00B0"], ["±", "\u00B1"], ["²", "\u00B2"], ["³", "\u00B3"], ["´", "\u00B4"], ["µ", "\u00B5"], ["¶", "\u00B6"], ["·", "\u00B7"], ["¸", "\u00B8"], ["¹", "\u00B9"], ["º", "\u00BA"], ["»", "\u00BB"], ["¼", "\u00BC"], ["½", "\u00BD"], ["¾", "\u00BE"], ["¿", "\u00BF"], ["À", "\u00C0"], ["Á", "\u00C1"], ["Â", "\u00C2"], ["Ã", "\u00C3"], ["Ä", "\u00C4"], ["Å", "\u00C5"], ["Æ", "\u00C6"], ["Ç", "\u00C7"], ["È", "\u00C8"], ["É", "\u00C9"], ["Ê", "\u00CA"], ["Ë", "\u00CB"], ["Ì", "\u00CC"], ["Í", "\u00CD"], ["Î", "\u00CE"], ["Ï", "\u00CF"], ["Ð", "\u00D0"], ["Ñ", "\u00D1"], ["Ò", "\u00D2"], ["Ó", "\u00D3"], ["Ô", "\u00D4"], ["Õ", "\u00D5"], ["Ö", "\u00D6"], ["×", "\u00D7"], ["Ø", "\u00D8"], ["Ù", "\u00D9"], ["Ú", "\u00DA"], ["Û", "\u00DB"], ["Ü", "\u00DC"], ["Ý", "\u00DD"], ["Þ", "\u00DE"], ["ß", "\u00DF"], ["à", "\u00E0"], ["á", "\u00E1"], ["â", "\u00E2"], ["ã", "\u00E3"], ["ä", "\u00E4"], ["å", "\u00E5"], ["æ", "\u00E6"], ["ç", "\u00E7"], ["è", "\u00E8"], ["é", "\u00E9"], ["ê", "\u00EA"], ["ë", "\u00EB"], ["ì", "\u00EC"], ["í", "\u00ED"], ["î", "\u00EE"], ["ï", "\u00EF"], ["ð", "\u00F0"], ["ñ", "\u00F1"], ["ò", "\u00F2"], ["ó", "\u00F3"], ["ô", "\u00F4"], ["õ", "\u00F5"], ["ö", "\u00F6"], ["÷", "\u00F7"], ["ø", "\u00F8"], ["ù", "\u00F9"], ["ú", "\u00FA"], ["û", "\u00FB"], ["ü", "\u00FC"], ["ý", "\u00FD"], ["þ", "\u00FE"], ["ÿ", "\u00FF"], ["ƒ", "\u0192"], ["Α", "\u0391"], ["Β", "\u0392"], ["Γ", "\u0393"], ["Δ", "\u0394"], ["Ε", "\u0395"], ["Ζ", "\u0396"], ["Η", "\u0397"], ["Θ", "\u0398"], ["Ι", "\u0399"], ["Κ", "\u039A"], ["Λ", "\u039B"], ["Μ", "\u039C"], ["Ν", "\u039D"], ["Ξ", "\u039E"], ["Ο", "\u039F"], ["Π", "\u03A0"], ["Ρ", "\u03A1"], ["Σ", "\u03A3"], ["Τ", "\u03A4"], ["Υ", "\u03A5"], ["Φ", "\u03A6"], ["Χ", "\u03A7"], ["Ψ", "\u03A8"], ["Ω", "\u03A9"], ["α", "\u03B1"], ["β", "\u03B2"], ["γ", "\u03B3"], ["δ", "\u03B4"], ["ε", "\u03B5"], ["ζ", "\u03B6"], ["η", "\u03B7"], ["θ", "\u03B8"], ["ι", "\u03B9"], ["κ", "\u03BA"], ["λ", "\u03BB"], ["μ", "\u03BC"], ["ν", "\u03BD"], ["ξ", "\u03BE"], ["ο", "\u03BF"], ["π", "\u03C0"], ["ρ", "\u03C1"], ["ς", "\u03C2"], ["σ", "\u03C3"], ["τ", "\u03C4"], ["υ", "\u03C5"], ["φ", "\u03C6"], ["χ", "\u03C7"], ["ψ", "\u03C8"], ["ω", "\u03C9"], ["ϑ", "\u03D1"], ["ϒ", "\u03D2"], ["ϖ", "\u03D6"], ["•", "\u2022"], ["…", "\u2026"], ["′", "\u2032"], ["″", "\u2033"], ["‾", "\u203E"], ["⁄", "\u2044"], ["℘", "\u2118"], ["ℑ", "\u2111"], ["ℜ", "\u211C"], ["™", "\u2122"], ["ℵ", "\u2135"], ["←", "\u2190"], ["↑", "\u2191"], ["→", "\u2192"], ["↓", "\u2193"], ["↔", "\u2194"], ["↵", "\u21B5"], ["⇐", "\u21D0"], ["⇑", "\u21D1"], ["⇒", "\u21D2"], ["⇓", "\u21D3"], ["⇔", "\u21D4"], ["∀", "\u2200"], ["∂", "\u2202"], ["∃", "\u2203"], ["∅", "\u2205"], ["∇", "\u2207"], ["∈", "\u2208"], ["∉", "\u2209"], ["∋", "\u220B"], ["∏", "\u220F"], ["∑", "\u2211"], ["−", "\u2212"], ["∗", "\u2217"], ["√", "\u221A"], ["∝", "\u221D"], ["∞", "\u221E"], ["∠", "\u2220"], ["∧", "\u2227"], ["∨", "\u2228"], ["∩", "\u2229"], ["∪", "\u222A"], ["∫", "\u222B"], ["∴", "\u2234"], ["∼", "\u223C"], ["≅", "\u2245"], ["≈", "\u2248"], ["≠", "\u2260"], ["≡", "\u2261"], ["≤", "\u2264"], ["≥", "\u2265"], ["⊂", "\u2282"], ["⊃", "\u2283"], ["⊄", "\u2284"], ["⊆", "\u2286"], ["⊇", "\u2287"], ["⊕", "\u2295"], ["⊗", "\u2297"], ["⊥", "\u22A5"], ["⋅", "\u22C5"], ["⌈", "\u2308"], ["⌉", "\u2309"], ["⌊", "\u230A"], ["⌋", "\u230B"], ["⟨", "\u2329"], ["⟩", "\u232A"], ["◊", "\u25CA"], ["♠", "\u2660"], ["♣", "\u2663"], ["♥", "\u2665"], ["♦", "\u2666"]]; | |
| function getAnchor(chr, index) { | |
| return '<span id="index' + index + '">' + chr + '</span>'; | |
| } | |
| function mensagem(chr, subst, index) { | |
| var texto = "Substituido o caractere [" + chr + "] por [" + subst + "]"; | |
| var elemento = document.getElementById("mensagens"); | |
| var anchor = "index" + index; | |
| elemento.innerHTML += '<br/><a href="javascript:jump(' + "'" + anchor + "'" + '); selectText(' + "'" + anchor + "'" + '); " >' + texto + '</span>'; | |
| } | |
| function limparMensagens() { | |
| var elemento = document.getElementById("mensagens"); | |
| elemento.innerHTML = ''; | |
| } | |
| function selectText(containerid) { | |
| if (document.selection) { | |
| var range = document.body.createTextRange(); | |
| range.moveToElementText(document.getElementById(containerid)); | |
| range.select(); | |
| } else if (window.getSelection) { | |
| var range = document.createRange(); | |
| range.selectNode(document.getElementById(containerid)); | |
| window.getSelection().addRange(range); | |
| } | |
| } | |
| function jump(anchor) { | |
| window.location.href = "#" + anchor; | |
| } | |
| function escape(text) { | |
| return text.replaceAll(/</g, '<').replaceAll(/>/g, '>'); | |
| } | |
| function removeComments(html) { | |
| var matches = html.match(/<!--.{0,15}/g); | |
| if (matches) { | |
| matches.forEach(match => { | |
| var elemento = document.getElementById("mensagens"); | |
| var texto = "Removido comentário HTML: " + escape(match); | |
| elemento.innerHTML += '<br/><span>' + texto + '...</span>'; | |
| }); | |
| // Remove all HTML comments, | |
| // such as those of Outlook: | |
| // <!--[if gte mso 9]><![endif]--> | |
| regexp = /<!--([^>]|[^-]>)*-->/ig; | |
| return html.replaceAll(regexp, ''); | |
| } | |
| return html; | |
| } | |
| function replaceEntitiesMessage(entity, subst) { | |
| var elemento = document.getElementById("mensagens"); | |
| var texto = "Substituido o HTML entity [" + entity.substring(1, entity.length - 1) + "] por [" + subst + "]"; | |
| elemento.innerHTML += '<br/><span>' + texto + '</span>'; | |
| } | |
| function replaceEntities(html) { | |
| var matches = html.match(/&#?[\w]+;/g); | |
| if (matches) { | |
| let uniq = matches.filter((element, index) => { | |
| return matches.indexOf(element) == index; | |
| }); | |
| uniq.forEach(m => { | |
| if (m.charAt(1) == '#') { | |
| if (m.charAt(2) == 'x') { | |
| // Formato hexadecimal | |
| var num = parseInt("0x" + m.substring(3, m.length - 1)); | |
| var chr = String.fromCharCode(num); | |
| replaceEntitiesMessage(m, chr); | |
| html = html.replaceAll(m, chr); | |
| } else { | |
| // Formato decimal | |
| var num = parseInt(m.substring(2, m.length - 1)); | |
| var char = String.fromCharCode(num); | |
| replaceEntitiesMessage(m, chr); | |
| html = html.replaceAll(m, chr); | |
| } | |
| } else { | |
| // Formato nomeado | |
| entities.forEach(e => { | |
| if (m == e[0]) { | |
| var chr = e[1]; | |
| replaceEntitiesMessage(m, chr); | |
| html = html.replaceAll(m, chr); | |
| } | |
| }); | |
| } | |
| }); | |
| } | |
| return html; | |
| } | |
| function corrigir() { | |
| limparMensagens(); | |
| var novoHtml = ""; | |
| var elemento = document.getElementById("texto"); | |
| var html = elemento.innerHTML; | |
| // Remove comments if any... | |
| html = removeComments(html); | |
| html = replaceEntities(html); | |
| for (var i = 0; i < html.length; i++) { | |
| t = ' '; | |
| c = html.charAt(i); | |
| // Basic Latin e Latin1 Supplement | |
| if (c > '\u00FF') { | |
| // Combining Diacritical Marks | |
| if ((c >= '\u0300') && (c <= '\u036F')) { | |
| var x = html.charAt(i - 1); | |
| novoHtml = novoHtml.substring(0, novoHtml.length - 1); | |
| t = translateDiacritic(x, c); | |
| t = getAnchor(t, i); | |
| mensagem(x + ' ' + c, t, i); | |
| } | |
| // General Punctuation | |
| else if ((c >= '\u2000') && (c <= '\u206F')) { | |
| t = translateGeneralPunctuation(c); | |
| t = getAnchor(t, i); | |
| mensagem(c, t, i); | |
| } | |
| // Other characters | |
| else { | |
| t = '\u00BF'; // INVERTED QUESTION MARK | |
| t = getAnchor(t, i); | |
| mensagem(c, t, i); | |
| } | |
| } else { | |
| // C1 Control | |
| if ((c >= '\u0080') && (c <= '\u009F')) { | |
| t = translateC1(c); | |
| t = getAnchor(t, i); | |
| mensagem(c, t, i); | |
| } | |
| // NO-BREAK SPACE (NBSP) | |
| else if (c == '\u00A0') { | |
| t = ' '; // invisible | |
| t = getAnchor(t, i); | |
| mensagem("nbsp", t, i); | |
| } | |
| // SOFT HYPHEN (SHY) | |
| else if (c == '\u00AD') { | |
| t = '-' // invisible | |
| t = getAnchor(t, i); | |
| mensagem("shy", t, i); | |
| } | |
| else { | |
| t = c; | |
| } | |
| } | |
| novoHtml += t; | |
| } | |
| elemento.innerHTML = novoHtml; | |
| alert("Finalizado. Confira as alterações."); | |
| } | |
| // Code points for diacritic marks which are always combined with letters | |
| function translateDiacritic(letter, diacritic) { | |
| var l = letter; | |
| var d = diacritic; | |
| var c = '\u00BF'; // INVERTED QUESTION MARK | |
| switch (d) { | |
| case '\u0300': // COMBINING GRAVE ACCENT | |
| if (l == 'a') c = '\u00E0'; | |
| else if (l == 'e') c = '\u00E8'; | |
| else if (l == 'i') c = '\u00EC'; | |
| else if (l == 'o') c = '\u00F2'; | |
| else if (l == 'u') c = '\u00F9'; | |
| else if (l == 'A') c = '\u00C0'; | |
| else if (l == 'E') c = '\u00C8'; | |
| else if (l == 'I') c = '\u00CC'; | |
| else if (l == 'O') c = '\u00D2'; | |
| else if (l == 'U') c = '\u00D9'; | |
| break; | |
| case '\u0301': // COMBINING ACUTE ACCENT | |
| if (l == 'a') c = '\u00E1'; | |
| else if (l == 'e') c = '\u00E9'; | |
| else if (l == 'i') c = '\u00ED'; | |
| else if (l == 'o') c = '\u00F3'; | |
| else if (l == 'u') c = '\u00FA'; | |
| else if (l == 'A') c = '\u00C1'; | |
| else if (l == 'E') c = '\u00C9'; | |
| else if (l == 'I') c = '\u00CD'; | |
| else if (l == 'O') c = '\u00D3'; | |
| else if (l == 'U') c = '\u00DA'; | |
| // LATIN LETTER Y WITH ACUTE | |
| else if (l == 'y') c = '\u00FD'; | |
| else if (l == 'Y') c = '\u00DD'; | |
| break; | |
| case '\u0302': // COMBINING CIRCUMFLEX ACCENT | |
| if (l == 'a') c = '\u00E2'; | |
| else if (l == 'e') c = '\u00EA'; | |
| else if (l == 'i') c = '\u00EE'; | |
| else if (l == 'o') c = '\u00F4'; | |
| else if (l == 'u') c = '\u00FB'; | |
| else if (l == 'A') c = '\u00C2'; | |
| else if (l == 'E') c = '\u00CA'; | |
| else if (l == 'I') c = '\u00CE'; | |
| else if (l == 'O') c = '\u00D4'; | |
| else if (l == 'U') c = '\u00DB'; | |
| break; | |
| case '\u0303': // COMBINING TILDE | |
| if (l == 'a') c = '\u00E3'; | |
| else if (l == 'o') c = '\u00F5'; | |
| else if (l == 'n') c = '\u00F1'; | |
| else if (l == 'A') c = '\u00C3'; | |
| else if (l == 'O') c = '\u00D5'; | |
| else if (l == 'N') c = '\u00D1'; | |
| break; | |
| case '\u0308': //COMBINING DIAERESIS | |
| if (l == 'a') c = '\u00E4'; | |
| else if (l == 'e') c = '\u00EB'; | |
| else if (l == 'i') c = '\u00EF'; | |
| else if (l == 'o') c = '\u00F6'; | |
| else if (l == 'u') c = '\u00FC'; | |
| else if (l == 'A') c = '\u00C4'; | |
| else if (l == 'E') c = '\u00CB'; | |
| else if (l == 'I') c = '\u00CF'; | |
| else if (l == 'O') c = '\u00D6'; | |
| else if (l == 'U') c = '\u00DC'; | |
| // LATIN LETTER Y WITH DIAERESIS | |
| else if (l == 'y') c = '\u00FF'; | |
| break; | |
| case '\u0327': // COMBINING CEDILLA | |
| if (l == 'c') c = '\u00E7'; | |
| else if (l == 'C') c = '\u00C7'; | |
| break; | |
| case '\u030A': // CCOMBINING RING ABOVE | |
| if (l == 'a') c = '\u00E5'; | |
| else if (l == 'A') c = '\u00C5'; | |
| break; | |
| default: | |
| break; | |
| } | |
| return c; | |
| } | |
| // Code points from Unicode's general punctuation | |
| function translateGeneralPunctuation(punctuation) { | |
| var p = punctuation; | |
| var c = '\u00BF'; // INVERTED QUESTION MARK | |
| switch (p) { | |
| case '\u2010': // HYPHEN | |
| c = '-'; | |
| break; | |
| case '\u2011': // NON-BREAKING HYPHEN | |
| c = '-'; | |
| break; | |
| case '\u2012': // FIGURE DASH (used to separate digits in telephone numbers) | |
| c = '-'; | |
| break; | |
| case '\u2013': // EN DASH (used, for instance, to indicate a range) | |
| c = '-'; | |
| break; | |
| case '\u2014': // EM DASH (used to demarcate a parenthetical thought or to indicate a break, or for emphasis) | |
| c = '--'; | |
| break; | |
| case '\u2015': // HORIZONTAL BAR (introduces quoted text) | |
| c = '--'; | |
| break; | |
| case '\u2018': // LEFT SINGLE QUOTATION MARK | |
| c = '\''; | |
| break; | |
| case '\u2019': // RIGHT SINGLE QUOTATION MARK | |
| c = '\''; | |
| break; | |
| case '\u201A': // SINGLE LOW-9 QUOTATION MARK | |
| c = '\''; | |
| break; | |
| case '\u201B': // SINGLE HIGH-REVERSED-9 QUOTATION MARK | |
| c = '\''; | |
| break; | |
| case '\u201C': // LEFT DOUBLE QUOTATION MARK | |
| c = '\"'; | |
| break; | |
| case '\u201D': // RIGHT DOUBLE QUOTATION MARK | |
| c = '\"'; | |
| break; | |
| case '\u201E': // DOUBLE LOW-9 QUOTATION MARK | |
| c = '\"'; | |
| break; | |
| case '\u201F': // DOUBLE HIGH-REVERSED-9 QUOTATION MARK | |
| c = '\"'; | |
| break; | |
| case '\u2022': // BULLET | |
| c = '*'; | |
| break; | |
| case '\u2023': // TRIANGULAR BULLET | |
| c = '-'; | |
| break; | |
| case '\u2024': // ONE DOT LEADER | |
| c = '.'; | |
| break; | |
| case '\u2025': // TWO DOT LEADER | |
| c = '..'; | |
| break; | |
| case '\u2026': // HORIZONTAL ELLIPSIS | |
| c = '...'; | |
| break; | |
| case '\u2027': // HYPHENATION POINT | |
| c = '\u00B7'; // MIDDLE DOT | |
| break; | |
| } | |
| return c; | |
| } | |
| // C1 Control (used by WINDOWS-1252) | |
| function translateC1(punctuation) { | |
| var p = punctuation; | |
| var c = '\u00BF'; // INVERTED QUESTION MARK | |
| switch (p) { | |
| case '\u0081': // <control> | |
| c = 'EUR'; | |
| break; | |
| case '\u0081': // <control> | |
| break; | |
| case '\u0082': // BREAK PERMITTED HERE | |
| c = '\''; | |
| break; | |
| case '\u0083': // NO BREAK HERE | |
| c = 'f'; | |
| break; | |
| case '\u0084': // <control> | |
| c = '\"'; | |
| break; | |
| case '\u0085': // NEXT LINE (NEL) | |
| c = '...'; | |
| break; | |
| case '\u0086': // END OF SELECTED AREA | |
| break; | |
| case '\u0087': // END OF SELECTED AREA | |
| break; | |
| case '\u0088': // CHARACTER TABULATION SET | |
| c = '^'; | |
| break; | |
| case '\u0089': // CHARACTER TABULATION WITH JUSTIFICATION | |
| break; | |
| case '\u008A': // LINE TABULATION SET | |
| c = 'S'; | |
| break; | |
| case '\u008B': // PARTIAL LINE FORWARD | |
| break; | |
| case '\u008C': // PARTIAL LINE BACKWARD | |
| c = 'OE'; | |
| break; | |
| case '\u008D': // REVERSE LINE FEED | |
| break; | |
| case '\u008E': // SINGLE SHIFT TWO | |
| c = 'Z'; | |
| break; | |
| case '\u008F': // SINGLE SHIFT THREE | |
| break; | |
| case '\u0090': // DEVICE CONTROL STRING | |
| break; | |
| case '\u0091': // PRIVATE USE ONE | |
| c = '\''; | |
| break; | |
| case '\u0092': // PRIVATE USE TWO | |
| c = '\''; | |
| break; | |
| case '\u0093': // SET TRANSMIT STATE | |
| c = '\"'; | |
| break; | |
| case '\u0094': // CANCEL CHARACTER | |
| c = '\"'; | |
| break; | |
| case '\u0095': // MESSAGE WAITING | |
| c = '*'; | |
| break; | |
| case '\u0096': // START OF GUARDED AREA | |
| c = '-'; | |
| break; | |
| case '\u0097': // END OF GUARDED AREA | |
| c = '--'; | |
| break; | |
| case '\u0098': // START OF STRING | |
| c = '~'; | |
| break; | |
| case '\u0099': // <control> | |
| c = 'TM'; | |
| break; | |
| case '\u009A': // SINGLE CHARACTER INTRODUCER | |
| c = 's'; | |
| break; | |
| case '\u009B': // CONTROL SEQUENCE INTRODUCER | |
| break; | |
| case '\u009C': // STRING TERMINATOR | |
| c = 'oe'; | |
| break; | |
| case '\u009D': // OPERATING SYSTEM COMMAND | |
| break; | |
| case '\u009E': // PRIVACY MESSAGE | |
| c = 'z'; | |
| break; | |
| case '\u009F': // APPLICATION PROGRAM COMMAND | |
| c = 'Y'; | |
| break; | |
| } | |
| return c; | |
| } | |
| </script> | |
| </head> | |
| <body> | |
| <h1>Corretor de caracteres especiais</h1> | |
| <p>Esta ferramenta procura caracteres inválidos para a codificação ISO-8859-1 e os substitui por caracteres | |
| compatíveis.</p> | |
| <p>Siga os passos abaixo para corrigir o texto com caracteres especiais:</p> | |
| <ol> | |
| <li>Cole o texto na caixa de texto abaixo;</li> | |
| <li>Clique no botão "Corrigir";</li> | |
| <li>Clique em cada item da caixa "Alterações realizadas" para conferir as correções;</li> | |
| <li>Para finalizar, copie o texto corrigido para o destino desejado.</li> | |
| </ol> | |
| <div style="min-height:200px; border: solid 1px;" id="texto" contenteditable="true"></div> | |
| <button type="button" onclick="corrigir();">Corrigir</button> | |
| <br> | |
| <br> | |
| <span style="color: red; font-weight: bold; font-size: 150%;">Alterações realizadas</span><br> | |
| <span>(Clique em uma mensagem abaixo para ir para a linha correspondente)</span> <br> | |
| <div id="mensagens" style="min-height:100px; border: dashed 1px;" contenteditable="false"></div> | |
| </body> | |
| </html> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment