Skip to content

Instantly share code, notes, and snippets.

@Luiz-Monad
Created January 22, 2020 21:33
Show Gist options
  • Save Luiz-Monad/7ddff82fbc928227333d580876cddd4d to your computer and use it in GitHub Desktop.
Save Luiz-Monad/7ddff82fbc928227333d580876cddd4d to your computer and use it in GitHub Desktop.
// FORK from : https://github.com/fsharp/FSharp.Data
// --------------------------------------------------------------------------------------
// Helper operations for converting converting string values to other types
// --------------------------------------------------------------------------------------
open System
open System.Globalization
open System.Text.RegularExpressions
// --------------------------------------------------------------------------------------
module UnicodeHelper =
// used http://en.wikipedia.org/wiki/UTF-16#Code_points_U.2B010000_to_U.2B10FFFF as a guide below
let getUnicodeSurrogatePair num =
// only code points U+010000 to U+10FFFF supported
// for coversion to UTF16 surrogate pair
let codePoint = num - 0x010000u
let HIGH_TEN_BIT_MASK = 0xFFC00u // 1111|1111|1100|0000|0000
let LOW_TEN_BIT_MASK = 0x003FFu // 0000|0000|0011|1111|1111
let leadSurrogate = (codePoint &&& HIGH_TEN_BIT_MASK >>> 10) + 0xD800u
let trailSurrogate = (codePoint &&& LOW_TEN_BIT_MASK) + 0xDC00u
char leadSurrogate, char trailSurrogate
open System
open System.Globalization
module HtmlCharRefs =
let private refs =
[|
"Á", "\u00C1";
"&Aacute", "\u00C1";
"á", "\u00E1";
"&aacute", "\u00E1";
"Ă", "\u0102";
"ă", "\u0103";
"∾", "\u223E";
"∿", "\u223F";
"∾̳", "\u223E\u0333";
"Â", "\u00C2";
"&Acirc", "\u00C2";
"â", "\u00E2";
"&acirc", "\u00E2";
"´", "\u00B4";
"&acute", "\u00B4";
"А", "\u0410";
"а", "\u0430";
"Æ", "\u00C6";
"&AElig", "\u00C6";
"æ", "\u00E6";
"&aelig", "\u00E6";
"⁡", "\u2061";
"𝔄", "\uD835\uDD04";
"𝔞", "\uD835\uDD1E";
"À", "\u00C0";
"&Agrave", "\u00C0";
"à", "\u00E0";
"&agrave", "\u00E0";
"ℵ", "\u2135";
"ℵ", "\u2135";
"Α", "\u0391";
"α", "\u03B1";
"Ā", "\u0100";
"ā", "\u0101";
"⨿", "\u2A3F";
"&", "\u0026";
"&amp", "\u0026";
"&", "\u0026";
"&AMP", "\u0026";
"⩕", "\u2A55";
"⩓", "\u2A53";
"∧", "\u2227";
"⩜", "\u2A5C";
"⩘", "\u2A58";
"⩚", "\u2A5A";
"∠", "\u2220";
"⦤", "\u29A4";
"∠", "\u2220";
"⦨", "\u29A8";
"⦩", "\u29A9";
"⦪", "\u29AA";
"⦫", "\u29AB";
"⦬", "\u29AC";
"⦭", "\u29AD";
"⦮", "\u29AE";
"⦯", "\u29AF";
"∡", "\u2221";
"∟", "\u221F";
"⊾", "\u22BE";
"⦝", "\u299D";
"∢", "\u2222";
"Å", "\u00C5";
"⍼", "\u237C";
"Ą", "\u0104";
"ą", "\u0105";
"𝔸", "\uD835\uDD38";
"𝕒", "\uD835\uDD52";
"⩯", "\u2A6F";
"≈", "\u2248";
"⩰", "\u2A70";
"≊", "\u224A";
"≋", "\u224B";
"'", "\u0027";
"⁡", "\u2061";
"≈", "\u2248";
"≊", "\u224A";
"Å", "\u00C5";
"&Aring", "\u00C5";
"å", "\u00E5";
"&aring", "\u00E5";
"𝒜", "\uD835\uDC9C";
"𝒶", "\uD835\uDCB6";
"≔", "\u2254";
"*", "\u002A";
"≈", "\u2248";
"≍", "\u224D";
"Ã", "\u00C3";
"&Atilde", "\u00C3";
"ã", "\u00E3";
"&atilde", "\u00E3";
"Ä", "\u00C4";
"&Auml", "\u00C4";
"ä", "\u00E4";
"&auml", "\u00E4";
"∳", "\u2233";
"⨑", "\u2A11";
"≌", "\u224C";
"϶", "\u03F6";
"‵", "\u2035";
"∽", "\u223D";
"⋍", "\u22CD";
"∖", "\u2216";
"⫧", "\u2AE7";
"⊽", "\u22BD";
"⌅", "\u2305";
"⌆", "\u2306";
"⌅", "\u2305";
"⎵", "\u23B5";
"⎶", "\u23B6";
"≌", "\u224C";
"Б", "\u0411";
"б", "\u0431";
"„", "\u201E";
"∵", "\u2235";
"∵", "\u2235";
"∵", "\u2235";
"⦰", "\u29B0";
"϶", "\u03F6";
"ℬ", "\u212C";
"ℬ", "\u212C";
"Β", "\u0392";
"β", "\u03B2";
"ℶ", "\u2136";
"≬", "\u226C";
"𝔅", "\uD835\uDD05";
"𝔟", "\uD835\uDD1F";
"⋂", "\u22C2";
"◯", "\u25EF";
"⋃", "\u22C3";
"⨀", "\u2A00";
"⨁", "\u2A01";
"⨂", "\u2A02";
"⨆", "\u2A06";
"★", "\u2605";
"▽", "\u25BD";
"△", "\u25B3";
"⨄", "\u2A04";
"⋁", "\u22C1";
"⋀", "\u22C0";
"⤍", "\u290D";
"⧫", "\u29EB";
"▪", "\u25AA";
"▴", "\u25B4";
"▾", "\u25BE";
"◂", "\u25C2";
"▸", "\u25B8";
"␣", "\u2423";
"▒", "\u2592";
"░", "\u2591";
"▓", "\u2593";
"█", "\u2588";
"=⃥", "\u003D\u20E5";
"≡⃥", "\u2261\u20E5";
"⫭", "\u2AED";
"⌐", "\u2310";
"𝔹", "\uD835\uDD39";
"𝕓", "\uD835\uDD53";
"⊥", "\u22A5";
"⊥", "\u22A5";
"⋈", "\u22C8";
"⧉", "\u29C9";
"┐", "\u2510";
"╕", "\u2555";
"╖", "\u2556";
"╗", "\u2557";
"┌", "\u250C";
"╒", "\u2552";
"╓", "\u2553";
"╔", "\u2554";
"─", "\u2500";
"═", "\u2550";
"┬", "\u252C";
"╤", "\u2564";
"╥", "\u2565";
"╦", "\u2566";
"┴", "\u2534";
"╧", "\u2567";
"╨", "\u2568";
"╩", "\u2569";
"⊟", "\u229F";
"⊞", "\u229E";
"⊠", "\u22A0";
"┘", "\u2518";
"╛", "\u255B";
"╜", "\u255C";
"╝", "\u255D";
"└", "\u2514";
"╘", "\u2558";
"╙", "\u2559";
"╚", "\u255A";
"│", "\u2502";
"║", "\u2551";
"┼", "\u253C";
"╪", "\u256A";
"╫", "\u256B";
"╬", "\u256C";
"┤", "\u2524";
"╡", "\u2561";
"╢", "\u2562";
"╣", "\u2563";
"├", "\u251C";
"╞", "\u255E";
"╟", "\u255F";
"╠", "\u2560";
"‵", "\u2035";
"˘", "\u02D8";
"˘", "\u02D8";
"¦", "\u00A6";
"&brvbar", "\u00A6";
"𝒷", "\uD835\uDCB7";
"ℬ", "\u212C";
"⁏", "\u204F";
"∽", "\u223D";
"⋍", "\u22CD";
"⧅", "\u29C5";
"\", "\u005C";
"⟈", "\u27C8";
"•", "\u2022";
"•", "\u2022";
"≎", "\u224E";
"⪮", "\u2AAE";
"≏", "\u224F";
"≎", "\u224E";
"≏", "\u224F";
"Ć", "\u0106";
"ć", "\u0107";
"⩄", "\u2A44";
"⩉", "\u2A49";
"⩋", "\u2A4B";
"∩", "\u2229";
"⋒", "\u22D2";
"⩇", "\u2A47";
"⩀", "\u2A40";
"ⅅ", "\u2145";
"∩︀", "\u2229\uFE00";
"⁁", "\u2041";
"ˇ", "\u02C7";
"ℭ", "\u212D";
"⩍", "\u2A4D";
"Č", "\u010C";
"č", "\u010D";
"Ç", "\u00C7";
"&Ccedil", "\u00C7";
"ç", "\u00E7";
"&ccedil", "\u00E7";
"Ĉ", "\u0108";
"ĉ", "\u0109";
"∰", "\u2230";
"⩌", "\u2A4C";
"⩐", "\u2A50";
"Ċ", "\u010A";
"ċ", "\u010B";
"¸", "\u00B8";
"&cedil", "\u00B8";
"¸", "\u00B8";
"⦲", "\u29B2";
"¢", "\u00A2";
"&cent", "\u00A2";
"·", "\u00B7";
"·", "\u00B7";
"𝔠", "\uD835\uDD20";
"ℭ", "\u212D";
"Ч", "\u0427";
"ч", "\u0447";
"✓", "\u2713";
"✓", "\u2713";
"Χ", "\u03A7";
"χ", "\u03C7";
"ˆ", "\u02C6";
"≗", "\u2257";
"↺", "\u21BA";
"↻", "\u21BB";
"⊛", "\u229B";
"⊚", "\u229A";
"⊝", "\u229D";
"⊙", "\u2299";
"®", "\u00AE";
"Ⓢ", "\u24C8";
"⊖", "\u2296";
"⊕", "\u2295";
"⊗", "\u2297";
"○", "\u25CB";
"⧃", "\u29C3";
"≗", "\u2257";
"⨐", "\u2A10";
"⫯", "\u2AEF";
"⧂", "\u29C2";
"∲", "\u2232";
"”", "\u201D";
"’", "\u2019";
"♣", "\u2663";
"♣", "\u2663";
":", "\u003A";
"∷", "\u2237";
"⩴", "\u2A74";
"≔", "\u2254";
"≔", "\u2254";
",", "\u002C";
"@", "\u0040";
"∁", "\u2201";
"∘", "\u2218";
"∁", "\u2201";
"ℂ", "\u2102";
"≅", "\u2245";
"⩭", "\u2A6D";
"≡", "\u2261";
"∮", "\u222E";
"∯", "\u222F";
"∮", "\u222E";
"𝕔", "\uD835\uDD54";
"ℂ", "\u2102";
"∐", "\u2210";
"∐", "\u2210";
"©", "\u00A9";
"&copy", "\u00A9";
"©", "\u00A9";
"&COPY", "\u00A9";
"℗", "\u2117";
"&CounterClockwiseContourIntegral;", "\u2233";
"↵", "\u21B5";
"✗", "\u2717";
"⨯", "\u2A2F";
"𝒞", "\uD835\uDC9E";
"𝒸", "\uD835\uDCB8";
"⫏", "\u2ACF";
"⫑", "\u2AD1";
"⫐", "\u2AD0";
"⫒", "\u2AD2";
"⋯", "\u22EF";
"⤸", "\u2938";
"⤵", "\u2935";
"⋞", "\u22DE";
"⋟", "\u22DF";
"↶", "\u21B6";
"⤽", "\u293D";
"⩈", "\u2A48";
"⩆", "\u2A46";
"≍", "\u224D";
"∪", "\u222A";
"⋓", "\u22D3";
"⩊", "\u2A4A";
"⊍", "\u228D";
"⩅", "\u2A45";
"∪︀", "\u222A\uFE00";
"↷", "\u21B7";
"⤼", "\u293C";
"⋞", "\u22DE";
"⋟", "\u22DF";
"⋎", "\u22CE";
"⋏", "\u22CF";
"¤", "\u00A4";
"&curren", "\u00A4";
"↶", "\u21B6";
"↷", "\u21B7";
"⋎", "\u22CE";
"⋏", "\u22CF";
"∲", "\u2232";
"∱", "\u2231";
"⌭", "\u232D";
"†", "\u2020";
"‡", "\u2021";
"ℸ", "\u2138";
"↓", "\u2193";
"↡", "\u21A1";
"⇓", "\u21D3";
"‐", "\u2010";
"⫤", "\u2AE4";
"⊣", "\u22A3";
"⤏", "\u290F";
"˝", "\u02DD";
"Ď", "\u010E";
"ď", "\u010F";
"Д", "\u0414";
"д", "\u0434";
"‡", "\u2021";
"⇊", "\u21CA";
"ⅅ", "\u2145";
"ⅆ", "\u2146";
"⤑", "\u2911";
"⩷", "\u2A77";
"°", "\u00B0";
"&deg", "\u00B0";
"∇", "\u2207";
"Δ", "\u0394";
"δ", "\u03B4";
"⦱", "\u29B1";
"⥿", "\u297F";
"𝔇", "\uD835\uDD07";
"𝔡", "\uD835\uDD21";
"⥥", "\u2965";
"⇃", "\u21C3";
"⇂", "\u21C2";
"´", "\u00B4";
"˙", "\u02D9";
"˝", "\u02DD";
"`", "\u0060";
"˜", "\u02DC";
"⋄", "\u22C4";
"⋄", "\u22C4";
"⋄", "\u22C4";
"♦", "\u2666";
"♦", "\u2666";
"¨", "\u00A8";
"ⅆ", "\u2146";
"ϝ", "\u03DD";
"⋲", "\u22F2";
"÷", "\u00F7";
"÷", "\u00F7";
"&divide", "\u00F7";
"⋇", "\u22C7";
"⋇", "\u22C7";
"Ђ", "\u0402";
"ђ", "\u0452";
"⌞", "\u231E";
"⌍", "\u230D";
"$", "\u0024";
"𝔻", "\uD835\uDD3B";
"𝕕", "\uD835\uDD55";
"¨", "\u00A8";
"˙", "\u02D9";
"⃜", "\u20DC";
"≐", "\u2250";
"≑", "\u2251";
"≐", "\u2250";
"∸", "\u2238";
"∔", "\u2214";
"⊡", "\u22A1";
"⌆", "\u2306";
"∯", "\u222F";
"¨", "\u00A8";
"⇓", "\u21D3";
"⇐", "\u21D0";
"⇔", "\u21D4";
"⫤", "\u2AE4";
"⟸", "\u27F8";
"⟺", "\u27FA";
"⟹", "\u27F9";
"⇒", "\u21D2";
"⊨", "\u22A8";
"⇑", "\u21D1";
"⇕", "\u21D5";
"∥", "\u2225";
"⤓", "\u2913";
"↓", "\u2193";
"↓", "\u2193";
"⇓", "\u21D3";
"⇵", "\u21F5";
"̑", "\u0311";
"⇊", "\u21CA";
"⇃", "\u21C3";
"⇂", "\u21C2";
"⥐", "\u2950";
"⥞", "\u295E";
"⥖", "\u2956";
"↽", "\u21BD";
"⥟", "\u295F";
"⥗", "\u2957";
"⇁", "\u21C1";
"↧", "\u21A7";
"⊤", "\u22A4";
"⤐", "\u2910";
"⌟", "\u231F";
"⌌", "\u230C";
"𝒟", "\uD835\uDC9F";
"𝒹", "\uD835\uDCB9";
"Ѕ", "\u0405";
"ѕ", "\u0455";
"⧶", "\u29F6";
"Đ", "\u0110";
"đ", "\u0111";
"⋱", "\u22F1";
"▿", "\u25BF";
"▾", "\u25BE";
"⇵", "\u21F5";
"⥯", "\u296F";
"⦦", "\u29A6";
"Џ", "\u040F";
"џ", "\u045F";
"⟿", "\u27FF";
"É", "\u00C9";
"&Eacute", "\u00C9";
"é", "\u00E9";
"&eacute", "\u00E9";
"⩮", "\u2A6E";
"Ě", "\u011A";
"ě", "\u011B";
"Ê", "\u00CA";
"&Ecirc", "\u00CA";
"ê", "\u00EA";
"&ecirc", "\u00EA";
"≖", "\u2256";
"≕", "\u2255";
"Э", "\u042D";
"э", "\u044D";
"⩷", "\u2A77";
"Ė", "\u0116";
"ė", "\u0117";
"≑", "\u2251";
"ⅇ", "\u2147";
"≒", "\u2252";
"𝔈", "\uD835\uDD08";
"𝔢", "\uD835\uDD22";
"⪚", "\u2A9A";
"È", "\u00C8";
"&Egrave", "\u00C8";
"è", "\u00E8";
"&egrave", "\u00E8";
"⪖", "\u2A96";
"⪘", "\u2A98";
"⪙", "\u2A99";
"∈", "\u2208";
"⏧", "\u23E7";
"ℓ", "\u2113";
"⪕", "\u2A95";
"⪗", "\u2A97";
"Ē", "\u0112";
"ē", "\u0113";
"∅", "\u2205";
"∅", "\u2205";
"◻", "\u25FB";
"∅", "\u2205";
"▫", "\u25AB";
" ", "\u2004";
" ", "\u2005";
" ", "\u2003";
"Ŋ", "\u014A";
"ŋ", "\u014B";
" ", "\u2002";
"Ę", "\u0118";
"ę", "\u0119";
"𝔼", "\uD835\uDD3C";
"𝕖", "\uD835\uDD56";
"⋕", "\u22D5";
"⧣", "\u29E3";
"⩱", "\u2A71";
"ε", "\u03B5";
"Ε", "\u0395";
"ε", "\u03B5";
"ϵ", "\u03F5";
"≖", "\u2256";
"≕", "\u2255";
"≂", "\u2242";
"⪖", "\u2A96";
"⪕", "\u2A95";
"⩵", "\u2A75";
"=", "\u003D";
"≂", "\u2242";
"≟", "\u225F";
"⇌", "\u21CC";
"≡", "\u2261";
"⩸", "\u2A78";
"⧥", "\u29E5";
"⥱", "\u2971";
"≓", "\u2253";
"ℯ", "\u212F";
"ℰ", "\u2130";
"≐", "\u2250";
"⩳", "\u2A73";
"≂", "\u2242";
"Η", "\u0397";
"η", "\u03B7";
"Ð", "\u00D0";
"&ETH", "\u00D0";
"ð", "\u00F0";
"&eth", "\u00F0";
"Ë", "\u00CB";
"&Euml", "\u00CB";
"ë", "\u00EB";
"&euml", "\u00EB";
"€", "\u20AC";
"!", "\u0021";
"∃", "\u2203";
"∃", "\u2203";
"ℰ", "\u2130";
"ⅇ", "\u2147";
"ⅇ", "\u2147";
"≒", "\u2252";
"Ф", "\u0424";
"ф", "\u0444";
"♀", "\u2640";
"ffi", "\uFB03";
"ff", "\uFB00";
"ffl", "\uFB04";
"𝔉", "\uD835\uDD09";
"𝔣", "\uD835\uDD23";
"fi", "\uFB01";
"◼", "\u25FC";
"▪", "\u25AA";
"fj", "\u0066\u006A";
"♭", "\u266D";
"fl", "\uFB02";
"▱", "\u25B1";
"ƒ", "\u0192";
"𝔽", "\uD835\uDD3D";
"𝕗", "\uD835\uDD57";
"∀", "\u2200";
"∀", "\u2200";
"⋔", "\u22D4";
"⫙", "\u2AD9";
"ℱ", "\u2131";
"⨍", "\u2A0D";
"½", "\u00BD";
"&frac12", "\u00BD";
"⅓", "\u2153";
"¼", "\u00BC";
"&frac14", "\u00BC";
"⅕", "\u2155";
"⅙", "\u2159";
"⅛", "\u215B";
"⅔", "\u2154";
"⅖", "\u2156";
"¾", "\u00BE";
"&frac34", "\u00BE";
"⅗", "\u2157";
"⅜", "\u215C";
"⅘", "\u2158";
"⅚", "\u215A";
"⅝", "\u215D";
"⅞", "\u215E";
"⁄", "\u2044";
"⌢", "\u2322";
"𝒻", "\uD835\uDCBB";
"ℱ", "\u2131";
"ǵ", "\u01F5";
"Γ", "\u0393";
"γ", "\u03B3";
"Ϝ", "\u03DC";
"ϝ", "\u03DD";
"⪆", "\u2A86";
"Ğ", "\u011E";
"ğ", "\u011F";
"Ģ", "\u0122";
"Ĝ", "\u011C";
"ĝ", "\u011D";
"Г", "\u0413";
"г", "\u0433";
"Ġ", "\u0120";
"ġ", "\u0121";
"≥", "\u2265";
"≧", "\u2267";
"⪌", "\u2A8C";
"⋛", "\u22DB";
"≥", "\u2265";
"≧", "\u2267";
"⩾", "\u2A7E";
"⪩", "\u2AA9";
"⩾", "\u2A7E";
"⪀", "\u2A80";
"⪂", "\u2A82";
"⪄", "\u2A84";
"⋛︀", "\u22DB\uFE00";
"⪔", "\u2A94";
"𝔊", "\uD835\uDD0A";
"𝔤", "\uD835\uDD24";
"≫", "\u226B";
"⋙", "\u22D9";
"⋙", "\u22D9";
"ℷ", "\u2137";
"Ѓ", "\u0403";
"ѓ", "\u0453";
"⪥", "\u2AA5";
"≷", "\u2277";
"⪒", "\u2A92";
"⪤", "\u2AA4";
"⪊", "\u2A8A";
"⪊", "\u2A8A";
"⪈", "\u2A88";
"≩", "\u2269";
"⪈", "\u2A88";
"≩", "\u2269";
"⋧", "\u22E7";
"𝔾", "\uD835\uDD3E";
"𝕘", "\uD835\uDD58";
"`", "\u0060";
"≥", "\u2265";
"⋛", "\u22DB";
"≧", "\u2267";
"⪢", "\u2AA2";
"≷", "\u2277";
"⩾", "\u2A7E";
"≳", "\u2273";
"𝒢", "\uD835\uDCA2";
"ℊ", "\u210A";
"≳", "\u2273";
"⪎", "\u2A8E";
"⪐", "\u2A90";
"⪧", "\u2AA7";
"⩺", "\u2A7A";
">", "\u003E";
"&gt", "\u003E";
">", "\u003E";
"&GT", "\u003E";
"≫", "\u226B";
"⋗", "\u22D7";
"⦕", "\u2995";
"⩼", "\u2A7C";
"⪆", "\u2A86";
"⥸", "\u2978";
"⋗", "\u22D7";
"⋛", "\u22DB";
"⪌", "\u2A8C";
"≷", "\u2277";
"≳", "\u2273";
"≩︀", "\u2269\uFE00";
"≩︀", "\u2269\uFE00";
"ˇ", "\u02C7";
" ", "\u200A";
"½", "\u00BD";
"ℋ", "\u210B";
"Ъ", "\u042A";
"ъ", "\u044A";
"⥈", "\u2948";
"↔", "\u2194";
"⇔", "\u21D4";
"↭", "\u21AD";
"^", "\u005E";
"ℏ", "\u210F";
"Ĥ", "\u0124";
"ĥ", "\u0125";
"♥", "\u2665";
"♥", "\u2665";
"…", "\u2026";
"⊹", "\u22B9";
"𝔥", "\uD835\uDD25";
"ℌ", "\u210C";
"ℋ", "\u210B";
"⤥", "\u2925";
"⤦", "\u2926";
"⇿", "\u21FF";
"∻", "\u223B";
"↩", "\u21A9";
"↪", "\u21AA";
"𝕙", "\uD835\uDD59";
"ℍ", "\u210D";
"―", "\u2015";
"─", "\u2500";
"𝒽", "\uD835\uDCBD";
"ℋ", "\u210B";
"ℏ", "\u210F";
"Ħ", "\u0126";
"ħ", "\u0127";
"≎", "\u224E";
"≏", "\u224F";
"⁃", "\u2043";
"‐", "\u2010";
"Í", "\u00CD";
"&Iacute", "\u00CD";
"í", "\u00ED";
"&iacute", "\u00ED";
"⁣", "\u2063";
"Î", "\u00CE";
"&Icirc", "\u00CE";
"î", "\u00EE";
"&icirc", "\u00EE";
"И", "\u0418";
"и", "\u0438";
"İ", "\u0130";
"Е", "\u0415";
"е", "\u0435";
"¡", "\u00A1";
"&iexcl", "\u00A1";
"⇔", "\u21D4";
"𝔦", "\uD835\uDD26";
"ℑ", "\u2111";
"Ì", "\u00CC";
"&Igrave", "\u00CC";
"ì", "\u00EC";
"&igrave", "\u00EC";
"ⅈ", "\u2148";
"⨌", "\u2A0C";
"∭", "\u222D";
"⧜", "\u29DC";
"℩", "\u2129";
"IJ", "\u0132";
"ij", "\u0133";
"Ī", "\u012A";
"ī", "\u012B";
"ℑ", "\u2111";
"ⅈ", "\u2148";
"ℐ", "\u2110";
"ℑ", "\u2111";
"ı", "\u0131";
"ℑ", "\u2111";
"⊷", "\u22B7";
"Ƶ", "\u01B5";
"⇒", "\u21D2";
"℅", "\u2105";
"∈", "\u2208";
"∞", "\u221E";
"⧝", "\u29DD";
"ı", "\u0131";
"⊺", "\u22BA";
"∫", "\u222B";
"∬", "\u222C";
"ℤ", "\u2124";
"∫", "\u222B";
"⊺", "\u22BA";
"⋂", "\u22C2";
"⨗", "\u2A17";
"⨼", "\u2A3C";
"⁣", "\u2063";
"⁢", "\u2062";
"Ё", "\u0401";
"ё", "\u0451";
"Į", "\u012E";
"į", "\u012F";
"𝕀", "\uD835\uDD40";
"𝕚", "\uD835\uDD5A";
"Ι", "\u0399";
"ι", "\u03B9";
"⨼", "\u2A3C";
"¿", "\u00BF";
"&iquest", "\u00BF";
"𝒾", "\uD835\uDCBE";
"ℐ", "\u2110";
"∈", "\u2208";
"⋵", "\u22F5";
"⋹", "\u22F9";
"⋴", "\u22F4";
"⋳", "\u22F3";
"∈", "\u2208";
"⁢", "\u2062";
"Ĩ", "\u0128";
"ĩ", "\u0129";
"І", "\u0406";
"і", "\u0456";
"Ï", "\u00CF";
"&Iuml", "\u00CF";
"ï", "\u00EF";
"&iuml", "\u00EF";
"Ĵ", "\u0134";
"ĵ", "\u0135";
"Й", "\u0419";
"й", "\u0439";
"𝔍", "\uD835\uDD0D";
"𝔧", "\uD835\uDD27";
"ȷ", "\u0237";
"𝕁", "\uD835\uDD41";
"𝕛", "\uD835\uDD5B";
"𝒥", "\uD835\uDCA5";
"𝒿", "\uD835\uDCBF";
"Ј", "\u0408";
"ј", "\u0458";
"Є", "\u0404";
"є", "\u0454";
"Κ", "\u039A";
"κ", "\u03BA";
"ϰ", "\u03F0";
"Ķ", "\u0136";
"ķ", "\u0137";
"К", "\u041A";
"к", "\u043A";
"𝔎", "\uD835\uDD0E";
"𝔨", "\uD835\uDD28";
"ĸ", "\u0138";
"Х", "\u0425";
"х", "\u0445";
"Ќ", "\u040C";
"ќ", "\u045C";
"𝕂", "\uD835\uDD42";
"𝕜", "\uD835\uDD5C";
"𝒦", "\uD835\uDCA6";
"𝓀", "\uD835\uDCC0";
"⇚", "\u21DA";
"Ĺ", "\u0139";
"ĺ", "\u013A";
"⦴", "\u29B4";
"ℒ", "\u2112";
"Λ", "\u039B";
"λ", "\u03BB";
"⟨", "\u27E8";
"⟪", "\u27EA";
"⦑", "\u2991";
"⟨", "\u27E8";
"⪅", "\u2A85";
"ℒ", "\u2112";
"«", "\u00AB";
"&laquo", "\u00AB";
"⇤", "\u21E4";
"⤟", "\u291F";
"←", "\u2190";
"↞", "\u219E";
"⇐", "\u21D0";
"⤝", "\u291D";
"↩", "\u21A9";
"↫", "\u21AB";
"⤹", "\u2939";
"⥳", "\u2973";
"↢", "\u21A2";
"⤙", "\u2919";
"⤛", "\u291B";
"⪫", "\u2AAB";
"⪭", "\u2AAD";
"⪭︀", "\u2AAD\uFE00";
"⤌", "\u290C";
"⤎", "\u290E";
"❲", "\u2772";
"{", "\u007B";
"[", "\u005B";
"⦋", "\u298B";
"⦏", "\u298F";
"⦍", "\u298D";
"Ľ", "\u013D";
"ľ", "\u013E";
"Ļ", "\u013B";
"ļ", "\u013C";
"⌈", "\u2308";
"{", "\u007B";
"Л", "\u041B";
"л", "\u043B";
"⤶", "\u2936";
"“", "\u201C";
"„", "\u201E";
"⥧", "\u2967";
"⥋", "\u294B";
"↲", "\u21B2";
"≤", "\u2264";
"≦", "\u2266";
"⟨", "\u27E8";
"⇤", "\u21E4";
"←", "\u2190";
"←", "\u2190";
"⇐", "\u21D0";
"⇆", "\u21C6";
"↢", "\u21A2";
"⌈", "\u2308";
"⟦", "\u27E6";
"⥡", "\u2961";
"⥙", "\u2959";
"⇃", "\u21C3";
"⌊", "\u230A";
"↽", "\u21BD";
"↼", "\u21BC";
"⇇", "\u21C7";
"↔", "\u2194";
"↔", "\u2194";
"⇔", "\u21D4";
"⇆", "\u21C6";
"⇋", "\u21CB";
"↭", "\u21AD";
"⥎", "\u294E";
"↤", "\u21A4";
"⊣", "\u22A3";
"⥚", "\u295A";
"⋋", "\u22CB";
"⧏", "\u29CF";
"⊲", "\u22B2";
"⊴", "\u22B4";
"⥑", "\u2951";
"⥠", "\u2960";
"⥘", "\u2958";
"↿", "\u21BF";
"⥒", "\u2952";
"↼", "\u21BC";
"⪋", "\u2A8B";
"⋚", "\u22DA";
"≤", "\u2264";
"≦", "\u2266";
"⩽", "\u2A7D";
"⪨", "\u2AA8";
"⩽", "\u2A7D";
"⩿", "\u2A7F";
"⪁", "\u2A81";
"⪃", "\u2A83";
"⋚︀", "\u22DA\uFE00";
"⪓", "\u2A93";
"⪅", "\u2A85";
"⋖", "\u22D6";
"⋚", "\u22DA";
"⪋", "\u2A8B";
"⋚", "\u22DA";
"≦", "\u2266";
"≶", "\u2276";
"≶", "\u2276";
"⪡", "\u2AA1";
"≲", "\u2272";
"⩽", "\u2A7D";
"≲", "\u2272";
"⥼", "\u297C";
"⌊", "\u230A";
"𝔏", "\uD835\uDD0F";
"𝔩", "\uD835\uDD29";
"≶", "\u2276";
"⪑", "\u2A91";
"⥢", "\u2962";
"↽", "\u21BD";
"↼", "\u21BC";
"⥪", "\u296A";
"▄", "\u2584";
"Љ", "\u0409";
"љ", "\u0459";
"⇇", "\u21C7";
"≪", "\u226A";
"⋘", "\u22D8";
"⌞", "\u231E";
"⇚", "\u21DA";
"⥫", "\u296B";
"◺", "\u25FA";
"Ŀ", "\u013F";
"ŀ", "\u0140";
"⎰", "\u23B0";
"⎰", "\u23B0";
"⪉", "\u2A89";
"⪉", "\u2A89";
"⪇", "\u2A87";
"≨", "\u2268";
"⪇", "\u2A87";
"≨", "\u2268";
"⋦", "\u22E6";
"⟬", "\u27EC";
"⇽", "\u21FD";
"⟦", "\u27E6";
"⟵", "\u27F5";
"⟵", "\u27F5";
"⟸", "\u27F8";
"⟷", "\u27F7";
"⟷", "\u27F7";
"⟺", "\u27FA";
"⟼", "\u27FC";
"⟶", "\u27F6";
"⟶", "\u27F6";
"⟹", "\u27F9";
"↫", "\u21AB";
"↬", "\u21AC";
"⦅", "\u2985";
"𝕃", "\uD835\uDD43";
"𝕝", "\uD835\uDD5D";
"⨭", "\u2A2D";
"⨴", "\u2A34";
"∗", "\u2217";
"_", "\u005F";
"↙", "\u2199";
"↘", "\u2198";
"◊", "\u25CA";
"◊", "\u25CA";
"⧫", "\u29EB";
"(", "\u0028";
"⦓", "\u2993";
"⇆", "\u21C6";
"⌟", "\u231F";
"⇋", "\u21CB";
"⥭", "\u296D";
"‎", "\u200E";
"⊿", "\u22BF";
"‹", "\u2039";
"𝓁", "\uD835\uDCC1";
"ℒ", "\u2112";
"↰", "\u21B0";
"↰", "\u21B0";
"≲", "\u2272";
"⪍", "\u2A8D";
"⪏", "\u2A8F";
"[", "\u005B";
"‘", "\u2018";
"‚", "\u201A";
"Ł", "\u0141";
"ł", "\u0142";
"⪦", "\u2AA6";
"⩹", "\u2A79";
"<", "\u003C";
"&lt", "\u003C";
"<", "\u003C";
"&LT", "\u003C";
"≪", "\u226A";
"⋖", "\u22D6";
"⋋", "\u22CB";
"⋉", "\u22C9";
"⥶", "\u2976";
"⩻", "\u2A7B";
"◃", "\u25C3";
"⊴", "\u22B4";
"◂", "\u25C2";
"⦖", "\u2996";
"⥊", "\u294A";
"⥦", "\u2966";
"≨︀", "\u2268\uFE00";
"≨︀", "\u2268\uFE00";
"¯", "\u00AF";
"&macr", "\u00AF";
"♂", "\u2642";
"✠", "\u2720";
"✠", "\u2720";
"⤅", "\u2905";
"↦", "\u21A6";
"↦", "\u21A6";
"↧", "\u21A7";
"↤", "\u21A4";
"↥", "\u21A5";
"▮", "\u25AE";
"⨩", "\u2A29";
"М", "\u041C";
"м", "\u043C";
"—", "\u2014";
"∺", "\u223A";
"∡", "\u2221";
" ", "\u205F";
"ℳ", "\u2133";
"𝔐", "\uD835\uDD10";
"𝔪", "\uD835\uDD2A";
"℧", "\u2127";
"µ", "\u00B5";
"&micro", "\u00B5";
"*", "\u002A";
"⫰", "\u2AF0";
"∣", "\u2223";
"·", "\u00B7";
"&middot", "\u00B7";
"⊟", "\u229F";
"−", "\u2212";
"∸", "\u2238";
"⨪", "\u2A2A";
"∓", "\u2213";
"⫛", "\u2ADB";
"…", "\u2026";
"∓", "\u2213";
"⊧", "\u22A7";
"𝕄", "\uD835\uDD44";
"𝕞", "\uD835\uDD5E";
"∓", "\u2213";
"𝓂", "\uD835\uDCC2";
"ℳ", "\u2133";
"∾", "\u223E";
"Μ", "\u039C";
"μ", "\u03BC";
"⊸", "\u22B8";
"⊸", "\u22B8";
"∇", "\u2207";
"Ń", "\u0143";
"ń", "\u0144";
"∠⃒", "\u2220\u20D2";
"≉", "\u2249";
"⩰̸", "\u2A70\u0338";
"≋̸", "\u224B\u0338";
"ʼn", "\u0149";
"≉", "\u2249";
"♮", "\u266E";
"ℕ", "\u2115";
"♮", "\u266E";
" ", "\u00A0";
"&nbsp", "\u00A0";
"≎̸", "\u224E\u0338";
"≏̸", "\u224F\u0338";
"⩃", "\u2A43";
"Ň", "\u0147";
"ň", "\u0148";
"Ņ", "\u0145";
"ņ", "\u0146";
"≇", "\u2247";
"⩭̸", "\u2A6D\u0338";
"⩂", "\u2A42";
"Н", "\u041D";
"н", "\u043D";
"–", "\u2013";
"⤤", "\u2924";
"↗", "\u2197";
"⇗", "\u21D7";
"↗", "\u2197";
"≠", "\u2260";
"≐̸", "\u2250\u0338";
"​", "\u200B";
"​", "\u200B";
"​", "\u200B";
"​", "\u200B";
"≢", "\u2262";
"⤨", "\u2928";
"≂̸", "\u2242\u0338";
"≫", "\u226B";
"≪", "\u226A";
"
", "\u000A";
"∄", "\u2204";
"∄", "\u2204";
"𝔑", "\uD835\uDD11";
"𝔫", "\uD835\uDD2B";
"≧̸", "\u2267\u0338";
"≱", "\u2271";
"≱", "\u2271";
"≧̸", "\u2267\u0338";
"⩾̸", "\u2A7E\u0338";
"⩾̸", "\u2A7E\u0338";
"⋙̸", "\u22D9\u0338";
"≵", "\u2275";
"≫⃒", "\u226B\u20D2";
"≯", "\u226F";
"≯", "\u226F";
"≫̸", "\u226B\u0338";
"↮", "\u21AE";
"⇎", "\u21CE";
"⫲", "\u2AF2";
"∋", "\u220B";
"⋼", "\u22FC";
"⋺", "\u22FA";
"∋", "\u220B";
"Њ", "\u040A";
"њ", "\u045A";
"↚", "\u219A";
"⇍", "\u21CD";
"‥", "\u2025";
"≦̸", "\u2266\u0338";
"≰", "\u2270";
"↚", "\u219A";
"⇍", "\u21CD";
"↮", "\u21AE";
"⇎", "\u21CE";
"≰", "\u2270";
"≦̸", "\u2266\u0338";
"⩽̸", "\u2A7D\u0338";
"⩽̸", "\u2A7D\u0338";
"≮", "\u226E";
"⋘̸", "\u22D8\u0338";
"≴", "\u2274";
"≪⃒", "\u226A\u20D2";
"≮", "\u226E";
"⋪", "\u22EA";
"⋬", "\u22EC";
"≪̸", "\u226A\u0338";
"∤", "\u2224";
"⁠", "\u2060";
" ", "\u00A0";
"𝕟", "\uD835\uDD5F";
"ℕ", "\u2115";
"⫬", "\u2AEC";
"¬", "\u00AC";
"&not", "\u00AC";
"≢", "\u2262";
"≭", "\u226D";
"∦", "\u2226";
"∉", "\u2209";
"≠", "\u2260";
"≂̸", "\u2242\u0338";
"∄", "\u2204";
"≯", "\u226F";
"≱", "\u2271";
"≧̸", "\u2267\u0338";
"≫̸", "\u226B\u0338";
"≹", "\u2279";
"⩾̸", "\u2A7E\u0338";
"≵", "\u2275";
"≎̸", "\u224E\u0338";
"≏̸", "\u224F\u0338";
"∉", "\u2209";
"⋵̸", "\u22F5\u0338";
"⋹̸", "\u22F9\u0338";
"∉", "\u2209";
"⋷", "\u22F7";
"⋶", "\u22F6";
"⧏̸", "\u29CF\u0338";
"⋪", "\u22EA";
"⋬", "\u22EC";
"≮", "\u226E";
"≰", "\u2270";
"≸", "\u2278";
"≪̸", "\u226A\u0338";
"⩽̸", "\u2A7D\u0338";
"≴", "\u2274";
"⪢̸", "\u2AA2\u0338";
"⪡̸", "\u2AA1\u0338";
"∌", "\u220C";
"∌", "\u220C";
"⋾", "\u22FE";
"⋽", "\u22FD";
"⊀", "\u2280";
"⪯̸", "\u2AAF\u0338";
"⋠", "\u22E0";
"∌", "\u220C";
"⧐̸", "\u29D0\u0338";
"⋫", "\u22EB";
"⋭", "\u22ED";
"⊏̸", "\u228F\u0338";
"⋢", "\u22E2";
"⊐̸", "\u2290\u0338";
"⋣", "\u22E3";
"⊂⃒", "\u2282\u20D2";
"⊈", "\u2288";
"⊁", "\u2281";
"⪰̸", "\u2AB0\u0338";
"⋡", "\u22E1";
"≿̸", "\u227F\u0338";
"⊃⃒", "\u2283\u20D2";
"⊉", "\u2289";
"≁", "\u2241";
"≄", "\u2244";
"≇", "\u2247";
"≉", "\u2249";
"∤", "\u2224";
"∦", "\u2226";
"∦", "\u2226";
"⫽⃥", "\u2AFD\u20E5";
"∂̸", "\u2202\u0338";
"⨔", "\u2A14";
"⊀", "\u2280";
"⋠", "\u22E0";
"⊀", "\u2280";
"⪯̸", "\u2AAF\u0338";
"⪯̸", "\u2AAF\u0338";
"⤳̸", "\u2933\u0338";
"↛", "\u219B";
"⇏", "\u21CF";
"↝̸", "\u219D\u0338";
"↛", "\u219B";
"⇏", "\u21CF";
"⋫", "\u22EB";
"⋭", "\u22ED";
"⊁", "\u2281";
"⋡", "\u22E1";
"⪰̸", "\u2AB0\u0338";
"𝒩", "\uD835\uDCA9";
"𝓃", "\uD835\uDCC3";
"∤", "\u2224";
"∦", "\u2226";
"≁", "\u2241";
"≄", "\u2244";
"≄", "\u2244";
"∤", "\u2224";
"∦", "\u2226";
"⋢", "\u22E2";
"⋣", "\u22E3";
"⊄", "\u2284";
"⫅̸", "\u2AC5\u0338";
"⊈", "\u2288";
"⊂⃒", "\u2282\u20D2";
"⊈", "\u2288";
"⫅̸", "\u2AC5\u0338";
"⊁", "\u2281";
"⪰̸", "\u2AB0\u0338";
"⊅", "\u2285";
"⫆̸", "\u2AC6\u0338";
"⊉", "\u2289";
"⊃⃒", "\u2283\u20D2";
"⊉", "\u2289";
"⫆̸", "\u2AC6\u0338";
"≹", "\u2279";
"Ñ", "\u00D1";
"&Ntilde", "\u00D1";
"ñ", "\u00F1";
"&ntilde", "\u00F1";
"≸", "\u2278";
"⋪", "\u22EA";
"⋬", "\u22EC";
"⋫", "\u22EB";
"⋭", "\u22ED";
"Ν", "\u039D";
"ν", "\u03BD";
"#", "\u0023";
"№", "\u2116";
" ", "\u2007";
"≍⃒", "\u224D\u20D2";
"⊬", "\u22AC";
"⊭", "\u22AD";
"⊮", "\u22AE";
"⊯", "\u22AF";
"≥⃒", "\u2265\u20D2";
">⃒", "\u003E\u20D2";
"⤄", "\u2904";
"⧞", "\u29DE";
"⤂", "\u2902";
"≤⃒", "\u2264\u20D2";
"<⃒", "\u003C\u20D2";
"⊴⃒", "\u22B4\u20D2";
"⤃", "\u2903";
"⊵⃒", "\u22B5\u20D2";
"∼⃒", "\u223C\u20D2";
"⤣", "\u2923";
"↖", "\u2196";
"⇖", "\u21D6";
"↖", "\u2196";
"⤧", "\u2927";
"Ó", "\u00D3";
"&Oacute", "\u00D3";
"ó", "\u00F3";
"&oacute", "\u00F3";
"⊛", "\u229B";
"Ô", "\u00D4";
"&Ocirc", "\u00D4";
"ô", "\u00F4";
"&ocirc", "\u00F4";
"⊚", "\u229A";
"О", "\u041E";
"о", "\u043E";
"⊝", "\u229D";
"Ő", "\u0150";
"ő", "\u0151";
"⨸", "\u2A38";
"⊙", "\u2299";
"⦼", "\u29BC";
"Œ", "\u0152";
"œ", "\u0153";
"⦿", "\u29BF";
"𝔒", "\uD835\uDD12";
"𝔬", "\uD835\uDD2C";
"˛", "\u02DB";
"Ò", "\u00D2";
"&Ograve", "\u00D2";
"ò", "\u00F2";
"&ograve", "\u00F2";
"⧁", "\u29C1";
"⦵", "\u29B5";
"Ω", "\u03A9";
"∮", "\u222E";
"↺", "\u21BA";
"⦾", "\u29BE";
"⦻", "\u29BB";
"‾", "\u203E";
"⧀", "\u29C0";
"Ō", "\u014C";
"ō", "\u014D";
"Ω", "\u03A9";
"ω", "\u03C9";
"Ο", "\u039F";
"ο", "\u03BF";
"⦶", "\u29B6";
"⊖", "\u2296";
"𝕆", "\uD835\uDD46";
"𝕠", "\uD835\uDD60";
"⦷", "\u29B7";
"“", "\u201C";
"‘", "\u2018";
"⦹", "\u29B9";
"⊕", "\u2295";
"↻", "\u21BB";
"⩔", "\u2A54";
"∨", "\u2228";
"⩝", "\u2A5D";
"ℴ", "\u2134";
"ℴ", "\u2134";
"ª", "\u00AA";
"&ordf", "\u00AA";
"º", "\u00BA";
"&ordm", "\u00BA";
"⊶", "\u22B6";
"⩖", "\u2A56";
"⩗", "\u2A57";
"⩛", "\u2A5B";
"Ⓢ", "\u24C8";
"𝒪", "\uD835\uDCAA";
"ℴ", "\u2134";
"Ø", "\u00D8";
"&Oslash", "\u00D8";
"ø", "\u00F8";
"&oslash", "\u00F8";
"⊘", "\u2298";
"Õ", "\u00D5";
"&Otilde", "\u00D5";
"õ", "\u00F5";
"&otilde", "\u00F5";
"⨶", "\u2A36";
"⨷", "\u2A37";
"⊗", "\u2297";
"Ö", "\u00D6";
"&Ouml", "\u00D6";
"ö", "\u00F6";
"&ouml", "\u00F6";
"⌽", "\u233D";
"‾", "\u203E";
"⏞", "\u23DE";
"⎴", "\u23B4";
"⏜", "\u23DC";
"¶", "\u00B6";
"&para", "\u00B6";
"∥", "\u2225";
"∥", "\u2225";
"⫳", "\u2AF3";
"⫽", "\u2AFD";
"∂", "\u2202";
"∂", "\u2202";
"П", "\u041F";
"п", "\u043F";
"%", "\u0025";
".", "\u002E";
"‰", "\u2030";
"⊥", "\u22A5";
"‱", "\u2031";
"𝔓", "\uD835\uDD13";
"𝔭", "\uD835\uDD2D";
"Φ", "\u03A6";
"φ", "\u03C6";
"ϕ", "\u03D5";
"ℳ", "\u2133";
"☎", "\u260E";
"Π", "\u03A0";
"π", "\u03C0";
"⋔", "\u22D4";
"ϖ", "\u03D6";
"ℏ", "\u210F";
"ℎ", "\u210E";
"ℏ", "\u210F";
"⨣", "\u2A23";
"⊞", "\u229E";
"⨢", "\u2A22";
"+", "\u002B";
"∔", "\u2214";
"⨥", "\u2A25";
"⩲", "\u2A72";
"±", "\u00B1";
"±", "\u00B1";
"&plusmn", "\u00B1";
"⨦", "\u2A26";
"⨧", "\u2A27";
"±", "\u00B1";
"ℌ", "\u210C";
"⨕", "\u2A15";
"𝕡", "\uD835\uDD61";
"ℙ", "\u2119";
"£", "\u00A3";
"&pound", "\u00A3";
"⪷", "\u2AB7";
"⪻", "\u2ABB";
"≺", "\u227A";
"≼", "\u227C";
"⪷", "\u2AB7";
"≺", "\u227A";
"≼", "\u227C";
"≺", "\u227A";
"⪯", "\u2AAF";
"≼", "\u227C";
"≾", "\u227E";
"⪯", "\u2AAF";
"⪹", "\u2AB9";
"⪵", "\u2AB5";
"⋨", "\u22E8";
"⪯", "\u2AAF";
"⪳", "\u2AB3";
"≾", "\u227E";
"′", "\u2032";
"″", "\u2033";
"ℙ", "\u2119";
"⪹", "\u2AB9";
"⪵", "\u2AB5";
"⋨", "\u22E8";
"∏", "\u220F";
"∏", "\u220F";
"⌮", "\u232E";
"⌒", "\u2312";
"⌓", "\u2313";
"∝", "\u221D";
"∝", "\u221D";
"∷", "\u2237";
"∝", "\u221D";
"≾", "\u227E";
"⊰", "\u22B0";
"𝒫", "\uD835\uDCAB";
"𝓅", "\uD835\uDCC5";
"Ψ", "\u03A8";
"ψ", "\u03C8";
" ", "\u2008";
"𝔔", "\uD835\uDD14";
"𝔮", "\uD835\uDD2E";
"⨌", "\u2A0C";
"𝕢", "\uD835\uDD62";
"ℚ", "\u211A";
"⁗", "\u2057";
"𝒬", "\uD835\uDCAC";
"𝓆", "\uD835\uDCC6";
"ℍ", "\u210D";
"⨖", "\u2A16";
"?", "\u003F";
"≟", "\u225F";
""", "\u0022";
"&quot", "\u0022";
""", "\u0022";
"&QUOT", "\u0022";
"⇛", "\u21DB";
"∽̱", "\u223D\u0331";
"Ŕ", "\u0154";
"ŕ", "\u0155";
"√", "\u221A";
"⦳", "\u29B3";
"⟩", "\u27E9";
"⟫", "\u27EB";
"⦒", "\u2992";
"⦥", "\u29A5";
"⟩", "\u27E9";
"»", "\u00BB";
"&raquo", "\u00BB";
"⥵", "\u2975";
"⇥", "\u21E5";
"⤠", "\u2920";
"⤳", "\u2933";
"→", "\u2192";
"↠", "\u21A0";
"⇒", "\u21D2";
"⤞", "\u291E";
"↪", "\u21AA";
"↬", "\u21AC";
"⥅", "\u2945";
"⥴", "\u2974";
"⤖", "\u2916";
"↣", "\u21A3";
"↝", "\u219D";
"⤚", "\u291A";
"⤜", "\u291C";
"∶", "\u2236";
"ℚ", "\u211A";
"⤍", "\u290D";
"⤏", "\u290F";
"⤐", "\u2910";
"❳", "\u2773";
"}", "\u007D";
"]", "\u005D";
"⦌", "\u298C";
"⦎", "\u298E";
"⦐", "\u2990";
"Ř", "\u0158";
"ř", "\u0159";
"Ŗ", "\u0156";
"ŗ", "\u0157";
"⌉", "\u2309";
"}", "\u007D";
"Р", "\u0420";
"р", "\u0440";
"⤷", "\u2937";
"⥩", "\u2969";
"”", "\u201D";
"”", "\u201D";
"↳", "\u21B3";
"ℜ", "\u211C";
"ℛ", "\u211B";
"ℜ", "\u211C";
"ℝ", "\u211D";
"ℜ", "\u211C";
"▭", "\u25AD";
"®", "\u00AE";
"&reg", "\u00AE";
"®", "\u00AE";
"&REG", "\u00AE";
"∋", "\u220B";
"⇋", "\u21CB";
"⥯", "\u296F";
"⥽", "\u297D";
"⌋", "\u230B";
"𝔯", "\uD835\uDD2F";
"ℜ", "\u211C";
"⥤", "\u2964";
"⇁", "\u21C1";
"⇀", "\u21C0";
"⥬", "\u296C";
"Ρ", "\u03A1";
"ρ", "\u03C1";
"ϱ", "\u03F1";
"⟩", "\u27E9";
"⇥", "\u21E5";
"→", "\u2192";
"→", "\u2192";
"⇒", "\u21D2";
"⇄", "\u21C4";
"↣", "\u21A3";
"⌉", "\u2309";
"⟧", "\u27E7";
"⥝", "\u295D";
"⥕", "\u2955";
"⇂", "\u21C2";
"⌋", "\u230B";
"⇁", "\u21C1";
"⇀", "\u21C0";
"⇄", "\u21C4";
"⇌", "\u21CC";
"⇉", "\u21C9";
"↝", "\u219D";
"↦", "\u21A6";
"⊢", "\u22A2";
"⥛", "\u295B";
"⋌", "\u22CC";
"⧐", "\u29D0";
"⊳", "\u22B3";
"⊵", "\u22B5";
"⥏", "\u294F";
"⥜", "\u295C";
"⥔", "\u2954";
"↾", "\u21BE";
"⥓", "\u2953";
"⇀", "\u21C0";
"˚", "\u02DA";
"≓", "\u2253";
"⇄", "\u21C4";
"⇌", "\u21CC";
"‏", "\u200F";
"⎱", "\u23B1";
"⎱", "\u23B1";
"⫮", "\u2AEE";
"⟭", "\u27ED";
"⇾", "\u21FE";
"⟧", "\u27E7";
"⦆", "\u2986";
"𝕣", "\uD835\uDD63";
"ℝ", "\u211D";
"⨮", "\u2A2E";
"⨵", "\u2A35";
"⥰", "\u2970";
")", "\u0029";
"⦔", "\u2994";
"⨒", "\u2A12";
"⇉", "\u21C9";
"⇛", "\u21DB";
"›", "\u203A";
"𝓇", "\uD835\uDCC7";
"ℛ", "\u211B";
"↱", "\u21B1";
"↱", "\u21B1";
"]", "\u005D";
"’", "\u2019";
"’", "\u2019";
"⋌", "\u22CC";
"⋊", "\u22CA";
"▹", "\u25B9";
"⊵", "\u22B5";
"▸", "\u25B8";
"⧎", "\u29CE";
"⧴", "\u29F4";
"⥨", "\u2968";
"℞", "\u211E";
"Ś", "\u015A";
"ś", "\u015B";
"‚", "\u201A";
"⪸", "\u2AB8";
"Š", "\u0160";
"š", "\u0161";
"⪼", "\u2ABC";
"≻", "\u227B";
"≽", "\u227D";
"⪰", "\u2AB0";
"⪴", "\u2AB4";
"Ş", "\u015E";
"ş", "\u015F";
"Ŝ", "\u015C";
"ŝ", "\u015D";
"⪺", "\u2ABA";
"⪶", "\u2AB6";
"⋩", "\u22E9";
"⨓", "\u2A13";
"≿", "\u227F";
"С", "\u0421";
"с", "\u0441";
"⊡", "\u22A1";
"⋅", "\u22C5";
"⩦", "\u2A66";
"⤥", "\u2925";
"↘", "\u2198";
"⇘", "\u21D8";
"↘", "\u2198";
"§", "\u00A7";
"&sect", "\u00A7";
";", "\u003B";
"⤩", "\u2929";
"∖", "\u2216";
"∖", "\u2216";
"✶", "\u2736";
"𝔖", "\uD835\uDD16";
"𝔰", "\uD835\uDD30";
"⌢", "\u2322";
"♯", "\u266F";
"Щ", "\u0429";
"щ", "\u0449";
"Ш", "\u0428";
"ш", "\u0448";
"↓", "\u2193";
"←", "\u2190";
"∣", "\u2223";
"∥", "\u2225";
"→", "\u2192";
"↑", "\u2191";
"­", "\u00AD";
"&shy", "\u00AD";
"Σ", "\u03A3";
"σ", "\u03C3";
"ς", "\u03C2";
"ς", "\u03C2";
"∼", "\u223C";
"⩪", "\u2A6A";
"≃", "\u2243";
"≃", "\u2243";
"⪞", "\u2A9E";
"⪠", "\u2AA0";
"⪝", "\u2A9D";
"⪟", "\u2A9F";
"≆", "\u2246";
"⨤", "\u2A24";
"⥲", "\u2972";
"←", "\u2190";
"∘", "\u2218";
"∖", "\u2216";
"⨳", "\u2A33";
"⧤", "\u29E4";
"∣", "\u2223";
"⌣", "\u2323";
"⪪", "\u2AAA";
"⪬", "\u2AAC";
"⪬︀", "\u2AAC\uFE00";
"Ь", "\u042C";
"ь", "\u044C";
"⌿", "\u233F";
"⧄", "\u29C4";
"/", "\u002F";
"𝕊", "\uD835\uDD4A";
"𝕤", "\uD835\uDD64";
"♠", "\u2660";
"♠", "\u2660";
"∥", "\u2225";
"⊓", "\u2293";
"⊓︀", "\u2293\uFE00";
"⊔", "\u2294";
"⊔︀", "\u2294\uFE00";
"√", "\u221A";
"⊏", "\u228F";
"⊑", "\u2291";
"⊏", "\u228F";
"⊑", "\u2291";
"⊐", "\u2290";
"⊒", "\u2292";
"⊐", "\u2290";
"⊒", "\u2292";
"□", "\u25A1";
"□", "\u25A1";
"⊓", "\u2293";
"⊏", "\u228F";
"⊑", "\u2291";
"⊐", "\u2290";
"⊒", "\u2292";
"⊔", "\u2294";
"▪", "\u25AA";
"□", "\u25A1";
"▪", "\u25AA";
"→", "\u2192";
"𝒮", "\uD835\uDCAE";
"𝓈", "\uD835\uDCC8";
"∖", "\u2216";
"⌣", "\u2323";
"⋆", "\u22C6";
"⋆", "\u22C6";
"☆", "\u2606";
"★", "\u2605";
"ϵ", "\u03F5";
"ϕ", "\u03D5";
"¯", "\u00AF";
"⊂", "\u2282";
"⋐", "\u22D0";
"⪽", "\u2ABD";
"⫅", "\u2AC5";
"⊆", "\u2286";
"⫃", "\u2AC3";
"⫁", "\u2AC1";
"⫋", "\u2ACB";
"⊊", "\u228A";
"⪿", "\u2ABF";
"⥹", "\u2979";
"⊂", "\u2282";
"⋐", "\u22D0";
"⊆", "\u2286";
"⫅", "\u2AC5";
"⊆", "\u2286";
"⊊", "\u228A";
"⫋", "\u2ACB";
"⫇", "\u2AC7";
"⫕", "\u2AD5";
"⫓", "\u2AD3";
"⪸", "\u2AB8";
"≻", "\u227B";
"≽", "\u227D";
"≻", "\u227B";
"⪰", "\u2AB0";
"≽", "\u227D";
"≿", "\u227F";
"⪰", "\u2AB0";
"⪺", "\u2ABA";
"⪶", "\u2AB6";
"⋩", "\u22E9";
"≿", "\u227F";
"∋", "\u220B";
"∑", "\u2211";
"∑", "\u2211";
"♪", "\u266A";
"¹", "\u00B9";
"&sup1", "\u00B9";
"²", "\u00B2";
"&sup2", "\u00B2";
"³", "\u00B3";
"&sup3", "\u00B3";
"⊃", "\u2283";
"⋑", "\u22D1";
"⪾", "\u2ABE";
"⫘", "\u2AD8";
"⫆", "\u2AC6";
"⊇", "\u2287";
"⫄", "\u2AC4";
"⊃", "\u2283";
"⊇", "\u2287";
"⟉", "\u27C9";
"⫗", "\u2AD7";
"⥻", "\u297B";
"⫂", "\u2AC2";
"⫌", "\u2ACC";
"⊋", "\u228B";
"⫀", "\u2AC0";
"⊃", "\u2283";
"⋑", "\u22D1";
"⊇", "\u2287";
"⫆", "\u2AC6";
"⊋", "\u228B";
"⫌", "\u2ACC";
"⫈", "\u2AC8";
"⫔", "\u2AD4";
"⫖", "\u2AD6";
"⤦", "\u2926";
"↙", "\u2199";
"⇙", "\u21D9";
"↙", "\u2199";
"⤪", "\u292A";
"ß", "\u00DF";
"&szlig", "\u00DF";
"	", "\u0009";
"⌖", "\u2316";
"Τ", "\u03A4";
"τ", "\u03C4";
"⎴", "\u23B4";
"Ť", "\u0164";
"ť", "\u0165";
"Ţ", "\u0162";
"ţ", "\u0163";
"Т", "\u0422";
"т", "\u0442";
"⃛", "\u20DB";
"⌕", "\u2315";
"𝔗", "\uD835\uDD17";
"𝔱", "\uD835\uDD31";
"∴", "\u2234";
"∴", "\u2234";
"∴", "\u2234";
"Θ", "\u0398";
"θ", "\u03B8";
"ϑ", "\u03D1";
"ϑ", "\u03D1";
"≈", "\u2248";
"∼", "\u223C";
"  ", "\u205F\u200A";
" ", "\u2009";
" ", "\u2009";
"≈", "\u2248";
"∼", "\u223C";
"Þ", "\u00DE";
"&THORN", "\u00DE";
"þ", "\u00FE";
"&thorn", "\u00FE";
"˜", "\u02DC";
"∼", "\u223C";
"≃", "\u2243";
"≅", "\u2245";
"≈", "\u2248";
"⨱", "\u2A31";
"⊠", "\u22A0";
"×", "\u00D7";
"&times", "\u00D7";
"⨰", "\u2A30";
"∭", "\u222D";
"⤨", "\u2928";
"⌶", "\u2336";
"⫱", "\u2AF1";
"⊤", "\u22A4";
"𝕋", "\uD835\uDD4B";
"𝕥", "\uD835\uDD65";
"⫚", "\u2ADA";
"⤩", "\u2929";
"‴", "\u2034";
"™", "\u2122";
"™", "\u2122";
"▵", "\u25B5";
"▿", "\u25BF";
"◃", "\u25C3";
"⊴", "\u22B4";
"≜", "\u225C";
"▹", "\u25B9";
"⊵", "\u22B5";
"◬", "\u25EC";
"≜", "\u225C";
"⨺", "\u2A3A";
"⃛", "\u20DB";
"⨹", "\u2A39";
"⧍", "\u29CD";
"⨻", "\u2A3B";
"⏢", "\u23E2";
"𝒯", "\uD835\uDCAF";
"𝓉", "\uD835\uDCC9";
"Ц", "\u0426";
"ц", "\u0446";
"Ћ", "\u040B";
"ћ", "\u045B";
"Ŧ", "\u0166";
"ŧ", "\u0167";
"≬", "\u226C";
"↞", "\u219E";
"↠", "\u21A0";
"Ú", "\u00DA";
"&Uacute", "\u00DA";
"ú", "\u00FA";
"&uacute", "\u00FA";
"↑", "\u2191";
"↟", "\u219F";
"⇑", "\u21D1";
"⥉", "\u2949";
"Ў", "\u040E";
"ў", "\u045E";
"Ŭ", "\u016C";
"ŭ", "\u016D";
"Û", "\u00DB";
"&Ucirc", "\u00DB";
"û", "\u00FB";
"&ucirc", "\u00FB";
"У", "\u0423";
"у", "\u0443";
"⇅", "\u21C5";
"Ű", "\u0170";
"ű", "\u0171";
"⥮", "\u296E";
"⥾", "\u297E";
"𝔘", "\uD835\uDD18";
"𝔲", "\uD835\uDD32";
"Ù", "\u00D9";
"&Ugrave", "\u00D9";
"ù", "\u00F9";
"&ugrave", "\u00F9";
"⥣", "\u2963";
"↿", "\u21BF";
"↾", "\u21BE";
"▀", "\u2580";
"⌜", "\u231C";
"⌜", "\u231C";
"⌏", "\u230F";
"◸", "\u25F8";
"Ū", "\u016A";
"ū", "\u016B";
"¨", "\u00A8";
"&uml", "\u00A8";
"_", "\u005F";
"⏟", "\u23DF";
"⎵", "\u23B5";
"⏝", "\u23DD";
"⋃", "\u22C3";
"⊎", "\u228E";
"Ų", "\u0172";
"ų", "\u0173";
"𝕌", "\uD835\uDD4C";
"𝕦", "\uD835\uDD66";
"⤒", "\u2912";
"↑", "\u2191";
"↑", "\u2191";
"⇑", "\u21D1";
"⇅", "\u21C5";
"↕", "\u2195";
"↕", "\u2195";
"⇕", "\u21D5";
"⥮", "\u296E";
"↿", "\u21BF";
"↾", "\u21BE";
"⊎", "\u228E";
"↖", "\u2196";
"↗", "\u2197";
"υ", "\u03C5";
"ϒ", "\u03D2";
"ϒ", "\u03D2";
"Υ", "\u03A5";
"υ", "\u03C5";
"↥", "\u21A5";
"⊥", "\u22A5";
"⇈", "\u21C8";
"⌝", "\u231D";
"⌝", "\u231D";
"⌎", "\u230E";
"Ů", "\u016E";
"ů", "\u016F";
"◹", "\u25F9";
"𝒰", "\uD835\uDCB0";
"𝓊", "\uD835\uDCCA";
"⋰", "\u22F0";
"Ũ", "\u0168";
"ũ", "\u0169";
"▵", "\u25B5";
"▴", "\u25B4";
"⇈", "\u21C8";
"Ü", "\u00DC";
"&Uuml", "\u00DC";
"ü", "\u00FC";
"&uuml", "\u00FC";
"⦧", "\u29A7";
"⦜", "\u299C";
"ϵ", "\u03F5";
"ϰ", "\u03F0";
"∅", "\u2205";
"ϕ", "\u03D5";
"ϖ", "\u03D6";
"∝", "\u221D";
"↕", "\u2195";
"⇕", "\u21D5";
"ϱ", "\u03F1";
"ς", "\u03C2";
"⊊︀", "\u228A\uFE00";
"⫋︀", "\u2ACB\uFE00";
"⊋︀", "\u228B\uFE00";
"⫌︀", "\u2ACC\uFE00";
"ϑ", "\u03D1";
"⊲", "\u22B2";
"⊳", "\u22B3";
"⫨", "\u2AE8";
"⫫", "\u2AEB";
"⫩", "\u2AE9";
"В", "\u0412";
"в", "\u0432";
"⊢", "\u22A2";
"⊨", "\u22A8";
"⊩", "\u22A9";
"⊫", "\u22AB";
"⫦", "\u2AE6";
"⊻", "\u22BB";
"∨", "\u2228";
"⋁", "\u22C1";
"≚", "\u225A";
"⋮", "\u22EE";
"|", "\u007C";
"‖", "\u2016";
"|", "\u007C";
"‖", "\u2016";
"∣", "\u2223";
"|", "\u007C";
"❘", "\u2758";
"≀", "\u2240";
" ", "\u200A";
"𝔙", "\uD835\uDD19";
"𝔳", "\uD835\uDD33";
"⊲", "\u22B2";
"⊂⃒", "\u2282\u20D2";
"⊃⃒", "\u2283\u20D2";
"𝕍", "\uD835\uDD4D";
"𝕧", "\uD835\uDD67";
"∝", "\u221D";
"⊳", "\u22B3";
"𝒱", "\uD835\uDCB1";
"𝓋", "\uD835\uDCCB";
"⫋︀", "\u2ACB\uFE00";
"⊊︀", "\u228A\uFE00";
"⫌︀", "\u2ACC\uFE00";
"⊋︀", "\u228B\uFE00";
"⊪", "\u22AA";
"⦚", "\u299A";
"Ŵ", "\u0174";
"ŵ", "\u0175";
"⩟", "\u2A5F";
"∧", "\u2227";
"⋀", "\u22C0";
"≙", "\u2259";
"℘", "\u2118";
"𝔚", "\uD835\uDD1A";
"𝔴", "\uD835\uDD34";
"𝕎", "\uD835\uDD4E";
"𝕨", "\uD835\uDD68";
"℘", "\u2118";
"≀", "\u2240";
"≀", "\u2240";
"𝒲", "\uD835\uDCB2";
"𝓌", "\uD835\uDCCC";
"⋂", "\u22C2";
"◯", "\u25EF";
"⋃", "\u22C3";
"▽", "\u25BD";
"𝔛", "\uD835\uDD1B";
"𝔵", "\uD835\uDD35";
"⟷", "\u27F7";
"⟺", "\u27FA";
"Ξ", "\u039E";
"ξ", "\u03BE";
"⟵", "\u27F5";
"⟸", "\u27F8";
"⟼", "\u27FC";
"⋻", "\u22FB";
"⨀", "\u2A00";
"𝕏", "\uD835\uDD4F";
"𝕩", "\uD835\uDD69";
"⨁", "\u2A01";
"⨂", "\u2A02";
"⟶", "\u27F6";
"⟹", "\u27F9";
"𝒳", "\uD835\uDCB3";
"𝓍", "\uD835\uDCCD";
"⨆", "\u2A06";
"⨄", "\u2A04";
"△", "\u25B3";
"⋁", "\u22C1";
"⋀", "\u22C0";
"Ý", "\u00DD";
"&Yacute", "\u00DD";
"ý", "\u00FD";
"&yacute", "\u00FD";
"Я", "\u042F";
"я", "\u044F";
"Ŷ", "\u0176";
"ŷ", "\u0177";
"Ы", "\u042B";
"ы", "\u044B";
"¥", "\u00A5";
"&yen", "\u00A5";
"𝔜", "\uD835\uDD1C";
"𝔶", "\uD835\uDD36";
"Ї", "\u0407";
"ї", "\u0457";
"𝕐", "\uD835\uDD50";
"𝕪", "\uD835\uDD6A";
"𝒴", "\uD835\uDCB4";
"𝓎", "\uD835\uDCCE";
"Ю", "\u042E";
"ю", "\u044E";
"ÿ", "\u00FF";
"&yuml", "\u00FF";
"Ÿ", "\u0178";
"Ź", "\u0179";
"ź", "\u017A";
"Ž", "\u017D";
"ž", "\u017E";
"З", "\u0417";
"з", "\u0437";
"Ż", "\u017B";
"ż", "\u017C";
"ℨ", "\u2128";
"​", "\u200B";
"Ζ", "\u0396";
"ζ", "\u03B6";
"𝔷", "\uD835\uDD37";
"ℨ", "\u2128";
"Ж", "\u0416";
"ж", "\u0436";
"⇝", "\u21DD";
"𝕫", "\uD835\uDD6B";
"ℤ", "\u2124";
"𝒵", "\uD835\uDCB5";
"𝓏", "\uD835\uDCCF";
"‍", "\u200D";
"‌", "\u200C";
|] |> Map.ofArray
let (|Number|Lookup|) (orig:string) =
let s = orig.TrimEnd([|';'|])
if s.Length > 2
then
let (delimeters, discriminator) = s.ToLowerInvariant() |> (fun ref -> (ref.[0..1], ref.[ref.Length - 1]), ref.[2])
match delimeters with
| ("&#", _) ->
let num =
if discriminator <> 'x'
then s.Substring(2, s.Length - 2)
else s.Substring(3, s.Length - 3)
match UInt32.TryParse(num, NumberStyles.Integer, CultureInfo.InvariantCulture) with
| true, i -> Number(i)
| false, _ -> Lookup(orig)
| ("&x", _) ->
let num = s.Substring(2, s.Length - 2)
match UInt32.TryParse(num, NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture) with
| true, i -> Number(i)
| false, _ -> Lookup(orig)
| _ -> Lookup(orig)
else Lookup(orig)
let substitute (ref:string) =
match ref with
| Number(num) ->
if num > 65535u then
let lead, tail = UnicodeHelper.getUnicodeSurrogatePair num
string lead + string tail
else
string (char num)
| Lookup(ref) -> defaultArg (refs.TryFind ref) ref
open System
open System.ComponentModel
open System.IO
open System.Text
open System.Text.RegularExpressions
open System.Runtime.InteropServices
// --------------------------------------------------------------------------------------
/// Represents an HTML attribute. The name is always normalized to lowercase
type XHtmlAttribute =
| XHtmlAttribute of name:string * value:string
with
/// <summary>
/// Creates an html attribute
/// </summary>
/// <param name="name">The name of the attribute</param>
/// <param name="value">The value of the attribute</param>
static member NewAttribute(name:string, value:string) =
XHtmlAttribute(name.ToLowerInvariant(), value)
[<StructuredFormatDisplay("{_Print}")>]
/// Represents an HTML node. The names of elements are always normalized to lowercase
type XHtmlNode =
| HtmlElement of name:string * attributes:XHtmlAttribute list * elements:XHtmlNode list
| HtmlText of content:string
| HtmlComment of content:string
| HtmlCData of content:string
with
/// <summary>
/// Creates an html element
/// </summary>
/// <param name="name">The name of the element</param>
static member NewElement(name:string) =
HtmlElement(name.ToLowerInvariant(), [], [])
/// <summary>
/// Creates an html element
/// </summary>
/// <param name="name">The name of the element</param>
/// <param name="attrs">The XHtmlAttribute(s) of the element</param>
static member NewElement(name:string, attrs:seq<_>) =
let attrs = attrs |> Seq.map XHtmlAttribute.NewAttribute |> Seq.toList
HtmlElement(name.ToLowerInvariant(), attrs, [])
/// <summary>
/// Creates an html element
/// </summary>
/// <param name="name">The name of the element</param>
/// <param name="children">The children elements of this element</param>
static member NewElement(name:string, children:seq<_>) =
HtmlElement(name.ToLowerInvariant(), [], List.ofSeq children)
/// <summary>
/// Creates an html element
/// </summary>
/// <param name="name">The name of the element</param>
/// <param name="attrs">The XHtmlAttribute(s) of the element</param>
/// <param name="children">The children elements of this element</param>
static member NewElement(name:string, attrs:seq<_>, children:seq<_>) =
let attrs = attrs |> Seq.map XHtmlAttribute.NewAttribute |> Seq.toList
HtmlElement(name.ToLowerInvariant(), attrs, List.ofSeq children)
/// <summary>
/// Creates a text content element
/// </summary>
/// <param name="content">The actual content</param>
static member NewText content = HtmlText(content)
/// <summary>
/// Creates a comment element
/// </summary>
/// <param name="content">The actual content</param>
static member NewComment content = HtmlComment(content)
/// <summary>
/// Creates a CData element
/// </summary>
/// <param name="content">The actual content</param>
static member NewCData content = HtmlCData(content)
override x.ToString() =
let rec serialize (sb:StringBuilder) indentation canAddNewLine html =
let append (str:string) = sb.Append str |> ignore
let appendEndTag name =
append "</"
append name
append ">"
let shouldAppendEndTag name =
name = "textarea"
let newLine plus =
sb.AppendLine() |> ignore
String(' ', indentation + plus) |> append
match html with
| HtmlElement(name, attributes, elements) ->
let onlyText = elements |> List.forall (function HtmlText _ -> true | _ -> false)
if canAddNewLine && not onlyText then
newLine 0
append "<"
append name
for XHtmlAttribute(name, value) in attributes do
append " "
append name
append "=\""
append value
append "\""
if elements.IsEmpty then
if shouldAppendEndTag name then
append ">"
appendEndTag name
else
append " />"
else
append ">"
if not onlyText then
newLine 2
let mutable canAddNewLine = false
for element in elements do
serialize sb (indentation + 2) canAddNewLine element
canAddNewLine <- true
if not onlyText then
newLine 0
appendEndTag name
| HtmlText str -> append str
| HtmlComment str ->
append "<!--"
append str
append "-->"
| HtmlCData str ->
append "<![CDATA["
append str
append "]]>"
let sb = StringBuilder()
serialize sb 0 false x |> ignore
sb.ToString()
/// [omit]
[<EditorBrowsableAttribute(EditorBrowsableState.Never)>]
[<CompilerMessageAttribute("This method is intended for use in generated code only.", 10001, IsHidden=true, IsError=false)>]
member x._Print = x.ToString()
[<StructuredFormatDisplay("{_Print}")>]
/// Represents an HTML document
type XHtmlDocument =
| HtmlDocument of docType:string * elements:XHtmlNode list
with
/// <summary>
/// Creates an html document
/// </summary>
/// <param name="docType">The document type specifier string</param>
/// <param name="children">The child elements of this document</param>
static member New(docType, children:seq<_>) =
HtmlDocument(docType, List.ofSeq children)
/// <summary>
/// Creates an html document
/// </summary>
/// <param name="children">The child elements of this document</param>
static member New(children:seq<_>) =
HtmlDocument("", List.ofSeq children)
override x.ToString() =
match x with
| HtmlDocument(docType, elements) ->
(if String.IsNullOrEmpty docType then "" else "<!DOCTYPE " + docType + ">" + Environment.NewLine)
+
(elements |> List.map (fun x -> x.ToString()) |> String.Concat)
/// [omit]
[<EditorBrowsableAttribute(EditorBrowsableState.Never)>]
[<CompilerMessageAttribute("This method is intended for use in generated code only.", 10001, IsHidden=true, IsError=false)>]
member x._Print = x.ToString()
// --------------------------------------------------------------------------------------
module private TextParser =
let toPattern f c = if f c then Some c else None
let (|EndOfFile|_|) (c : char) =
let value = c |> int
if (value = -1 || value = 65535) then Some c else None
let (|Whitespace|_|) = toPattern Char.IsWhiteSpace
let (|LetterDigit|_|) = toPattern Char.IsLetterOrDigit
let (|Letter|_|) = toPattern Char.IsLetter
// --------------------------------------------------------------------------------------
module HtmlParser =
let wsRegex = lazy Regex("\\s+", RegexOptions.Compiled)
let invalidTypeNameRegex = lazy Regex("[^0-9a-zA-Z_]+", RegexOptions.Compiled)
let headingRegex = lazy Regex("""h\d""", RegexOptions.Compiled)
type XHtmlToken =
| DocType of string
| Tag of isSelfClosing:bool * name:string * attrs:XHtmlAttribute list
| TagEnd of string
| Text of string
| Comment of string
| CData of string
| EOF
override x.ToString() =
match x with
| DocType dt -> sprintf "doctype %s" dt
| Tag(selfClose,name,_) -> sprintf "tag %b %s" selfClose name
| TagEnd name -> sprintf "tagEnd %s" name
| Text _ -> "text"
| Comment _ -> "comment"
| EOF -> "eof"
| CData _ -> "cdata"
member x.IsEndTag name =
match x with
| TagEnd(endName) when name = endName -> true
| _ -> false
type TextReader with
member x.PeekChar() = x.Peek() |> char
member x.ReadChar() = x.Read() |> char
member x.ReadNChar(n) =
let buffer = Array.zeroCreate n
x.ReadBlock(buffer, 0, n) |> ignore
String(buffer)
type CharList =
{ mutable Contents : char list }
static member Empty = { Contents = [] }
override x.ToString() = String(x.Contents |> List.rev |> List.toArray)
member x.Cons(c) = x.Contents <- c :: x.Contents
member x.Length = x.Contents.Length
member x.Clear() = x.Contents <- []
type InsertionMode =
| DefaultMode
| FormattedMode
| ScriptMode
| CharRefMode
| CommentMode
| DocTypeMode
| CDATAMode
override x.ToString() =
match x with
| DefaultMode -> "default"
| FormattedMode -> "formatted"
| ScriptMode -> "script"
| CharRefMode -> "charref"
| CommentMode -> "comment"
| DocTypeMode -> "doctype"
| CDATAMode -> "cdata"
type XHtmlState =
{ Attributes : (CharList * CharList) list ref
CurrentTag : CharList ref
Content : CharList ref
InsertionMode : InsertionMode ref
Tokens : XHtmlToken list ref
Reader : TextReader }
static member Create (reader:TextReader) =
{ Attributes = ref []
CurrentTag = ref CharList.Empty
Content = ref CharList.Empty
InsertionMode = ref DefaultMode
Tokens = ref []
Reader = reader }
member x.Pop() = x.Reader.Read() |> ignore
member x.Peek() = x.Reader.PeekChar()
member x.Pop(count) =
[|0..(count-1)|] |> Array.map (fun _ -> x.Reader.ReadChar())
member x.Contents = (!x.Content).ToString()
member x.ContentLength = (!x.Content).Length
member x.NewAttribute() = x.Attributes := (CharList.Empty, CharList.Empty) :: (!x.Attributes)
member x.ConsAttrName() =
match !x.Attributes with
| [] -> x.NewAttribute(); x.ConsAttrName()
| (h,_) :: _ -> h.Cons(Char.ToLowerInvariant(x.Reader.ReadChar()))
member x.CurrentTagName() =
(!x.CurrentTag).ToString().Trim()
member x.CurrentAttrName() =
match !x.Attributes with
| [] -> String.Empty
| (h,_) :: _ -> h.ToString()
member x.ConsAttrValue(c) =
match !x.Attributes with
| [] -> x.NewAttribute(); x.ConsAttrValue(c)
| (_,h) :: _ -> h.Cons(c)
member x.ConsAttrValue() =
x.ConsAttrValue(x.Reader.ReadChar())
member x.GetAttributes() =
!x.Attributes
|> List.choose (fun (key, value) ->
if key.Length > 0
then Some <| XHtmlAttribute(key.ToString(), value.ToString())
else None)
|> List.rev
member x.EmitSelfClosingTag() =
let name = (!x.CurrentTag).ToString().Trim()
let result = Tag(true, name, x.GetAttributes())
x.CurrentTag := CharList.Empty
x.InsertionMode := DefaultMode
x.Attributes := []
x.Tokens := result :: !x.Tokens
member x.IsFormattedTag
with get() =
match x.CurrentTagName() with
| "pre" | "code" -> true
| _ -> false
member x.IsScriptTag
with get() =
match x.CurrentTagName() with
| "script" | "style" -> true
| _ -> false
member x.EmitTag(isEnd) =
let name = (!x.CurrentTag).ToString().Trim()
let result =
if isEnd
then
if x.ContentLength > 0
then x.Emit(); TagEnd(name)
else TagEnd(name)
else Tag(false, name, x.GetAttributes())
x.InsertionMode :=
if x.IsFormattedTag && (not isEnd) then FormattedMode
elif x.IsScriptTag && (not isEnd) then ScriptMode
else DefaultMode
x.CurrentTag := CharList.Empty
x.Attributes := []
x.Tokens := result :: !x.Tokens
member x.EmitToAttributeValue() =
assert (!x.InsertionMode = InsertionMode.CharRefMode)
let content = (!x.Content).ToString() |> HtmlCharRefs.substitute
for c in content.ToCharArray() do
x.ConsAttrValue c
x.Content := CharList.Empty
x.InsertionMode := DefaultMode
member x.Emit() : unit =
let result =
let content = (!x.Content).ToString()
match !x.InsertionMode with
| DefaultMode ->
let normalizedContent = wsRegex.Value.Replace(content, " ")
if normalizedContent = " " then Text "" else Text normalizedContent
| FormattedMode -> content |> Text
| ScriptMode -> content |> Text
| CharRefMode -> content.Trim() |> HtmlCharRefs.substitute |> Text
| CommentMode -> Comment content
| DocTypeMode -> DocType content
| CDATAMode -> CData (content.Replace("<![CDATA[", "").Replace("]]>", ""))
x.Content := CharList.Empty
x.InsertionMode := DefaultMode
match result with
| Text t when String.IsNullOrEmpty(t) -> ()
| _ -> x.Tokens := result :: !x.Tokens
member x.Cons() = (!x.Content).Cons(x.Reader.ReadChar())
member x.Cons(char) = (!x.Content).Cons(char)
member x.Cons(char) = Array.iter ((!x.Content).Cons) char
member x.Cons(char : string) = x.Cons(char.ToCharArray())
member x.ConsTag() =
match x.Reader.ReadChar() with
| TextParser.Whitespace _ -> ()
| a -> (!x.CurrentTag).Cons(Char.ToLowerInvariant a)
member x.ClearContent() =
(!x.Content).Clear()
// Tokenises a stream into a sequence of HTML tokens.
let private tokenise reader =
let state = XHtmlState.Create reader
let rec data (state:XHtmlState) =
match state.Peek() with
| '<' ->
if state.ContentLength > 0
then state.Emit();
else state.Pop(); tagOpen state
| TextParser.EndOfFile _ -> state.Tokens := EOF :: !state.Tokens
| '&' ->
if state.ContentLength > 0
then state.Emit();
else
state.InsertionMode := CharRefMode
charRef state
| _ ->
match !state.InsertionMode with
| DefaultMode -> state.Cons(); data state
| ScriptMode -> script state;
| FormattedMode -> state.Cons(); data state
| CharRefMode -> charRef state
| DocTypeMode -> docType state
| CommentMode -> comment state
| CDATAMode -> data state
and script state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| ''' -> state.Cons(); scriptSingleQuoteString state
| '"' -> state.Cons(); scriptDoubleQuoteString state
| '/' -> state.Cons(); scriptSlash state
| '<' -> state.Pop(); scriptLessThanSign state
| _ -> state.Cons(); script state
and scriptSingleQuoteString state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| ''' -> state.Cons(); script state
| _ -> state.Cons(); scriptSingleQuoteString state
and scriptDoubleQuoteString state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '"' -> state.Cons(); script state
| _ -> state.Cons(); scriptDoubleQuoteString state
and scriptSlash state =
match state.Peek() with
| '/' -> state.Cons(); scriptSingleLineComment state
| '*' -> state.Cons(); scriptMultiLineComment state
| _ -> scriptRegex state
and scriptMultiLineComment state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '*' -> state.Cons(); scriptMultiLineCommentStar state
| _ -> state.Cons(); scriptMultiLineComment state
and scriptMultiLineCommentStar state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '/' -> state.Cons(); script state
| _ -> scriptMultiLineComment state
and scriptSingleLineComment state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '\n' -> state.Cons(); script state
| _ -> state.Cons(); scriptSingleLineComment state
and scriptRegex state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '/' -> state.Cons(); script state
| '\\' -> state.Cons(); scriptRegexBackslash state
| _ -> state.Cons(); scriptRegex state
and scriptRegexBackslash state =
match state.Peek() with
| _ -> state.Cons(); scriptRegex state
and scriptLessThanSign state =
match state.Peek() with
| '/' -> state.Pop(); scriptEndTagOpen state
| '!' -> state.Cons('<'); state.Cons(); scriptDataEscapeStart state
| _ -> state.Cons('<'); state.Cons(); script state
and scriptDataEscapeStart state =
match state.Peek() with
| '-' -> state.Cons(); scriptDataEscapeStartDash state
| _ -> script state
and scriptDataEscapeStartDash state =
match state.Peek() with
| '-' -> state.Cons(); scriptDataEscapedDashDash state
| _ -> script state
and scriptDataEscapedDashDash state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '-' -> state.Cons(); scriptDataEscapedDashDash state
| '<' -> state.Pop(); scriptDataEscapedLessThanSign state
| '>' -> state.Cons(); script state
| _ -> state.Cons(); scriptDataEscaped state
and scriptDataEscapedLessThanSign state =
match state.Peek() with
| '/' -> state.Pop(); scriptDataEscapedEndTagOpen state
| TextParser.Letter _ -> state.Cons('<'); state.Cons(); scriptDataDoubleEscapeStart state
| _ -> state.Cons('<'); state.Cons(); scriptDataEscaped state
and scriptDataDoubleEscapeStart state =
match state.Peek() with
| TextParser.Whitespace _ | '/' | '>' when state.IsScriptTag -> state.Cons(); scriptDataDoubleEscaped state
| TextParser.Letter _ -> state.Cons(); scriptDataDoubleEscapeStart state
| _ -> state.Cons(); scriptDataEscaped state
and scriptDataDoubleEscaped state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '-' -> state.Cons(); scriptDataDoubleEscapedDash state
| '<' -> state.Cons(); scriptDataDoubleEscapedLessThanSign state
| _ -> state.Cons(); scriptDataDoubleEscaped state
and scriptDataDoubleEscapedDash state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '-' -> state.Cons(); scriptDataDoubleEscapedDashDash state
| '<' -> state.Cons(); scriptDataDoubleEscapedLessThanSign state
| _ -> state.Cons(); scriptDataDoubleEscaped state
and scriptDataDoubleEscapedLessThanSign state =
match state.Peek() with
| '/' -> state.Cons(); scriptDataDoubleEscapeEnd state
| _ -> state.Cons(); scriptDataDoubleEscaped state
and scriptDataDoubleEscapeEnd state =
match state.Peek() with
| TextParser.Whitespace _ | '/' | '>' when state.IsScriptTag -> state.Cons(); scriptDataDoubleEscaped state
| TextParser.Letter _ -> state.Cons(); scriptDataDoubleEscapeEnd state
| _ -> state.Cons(); scriptDataDoubleEscaped state
and scriptDataDoubleEscapedDashDash state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '-' -> state.Cons(); scriptDataDoubleEscapedDashDash state
| '<' -> state.Cons(); scriptDataDoubleEscapedLessThanSign state
| '>' -> state.Cons(); script state
| _ -> state.Cons(); scriptDataDoubleEscaped state
and scriptDataEscapedEndTagOpen state =
match state.Peek() with
| TextParser.Letter _ -> scriptDataEscapedEndTagName state
| _ -> state.Cons([|'<';'/'|]); state.Cons(); scriptDataEscaped state
and scriptDataEscapedEndTagName state =
match state.Peek() with
| TextParser.Whitespace _ when state.IsScriptTag -> state.Pop(); beforeAttributeName state
| '/' when state.IsScriptTag -> state.Pop(); selfClosingStartTag state
| '>' when state.IsScriptTag -> state.Pop(); state.EmitTag(true);
| '>' ->
state.Cons([|'<'; '/'|]);
state.Cons(state.CurrentTagName());
(!state.CurrentTag).Clear()
script state
| TextParser.Letter _ -> state.ConsTag(); scriptDataEscapedEndTagName state
| _ -> state.Cons([|'<';'/'|]); state.Cons(); scriptDataEscaped state
and scriptDataEscaped state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '-' -> state.Cons(); scriptDataEscapedDash state
| '<' -> scriptDataEscapedLessThanSign state
| _ -> state.Cons(); scriptDataEscaped state
and scriptDataEscapedDash state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| '-' -> state.Cons(); scriptDataEscapedDashDash state
| '<' -> scriptDataEscapedLessThanSign state
| _ -> state.Cons(); scriptDataEscaped state
and scriptEndTagOpen state =
match state.Peek() with
| TextParser.Letter _ -> scriptEndTagName state
| _ -> state.Cons('<'); state.Cons('/'); script state
and scriptEndTagName state =
match state.Peek() with
| TextParser.Whitespace _ -> state.Pop(); beforeAttributeName state
| '/' when state.IsScriptTag -> state.Pop(); selfClosingStartTag state
| '>' when state.IsScriptTag -> state.Pop(); state.EmitTag(true);
| '>' ->
state.Cons([|'<'; '/'|]);
state.Cons(state.CurrentTagName());
(!state.CurrentTag).Clear()
script state
| TextParser.Letter _ -> state.ConsTag(); scriptEndTagName state
| _ -> state.Cons('<'); state.Cons('/'); script state
and charRef state =
match state.Peek() with
| ';' -> state.Cons(); state.Emit()
| '<' -> state.Emit()
| _ -> state.Cons(); charRef state
and tagOpen state =
match state.Peek() with
| '!' -> state.Pop(); markupDeclaration state
| '/' -> state.Pop(); endTagOpen state
| '?' -> state.Pop(); bogusComment state
| TextParser.Letter _ -> state.ConsTag(); tagName false state
| _ -> state.Cons('<'); data state
and bogusComment state =
let rec bogusComment' (state:XHtmlState) =
let exitBogusComment state =
state.InsertionMode := CommentMode
state.Emit()
match state.Peek() with
| '>' -> state.Cons(); exitBogusComment state
| TextParser.EndOfFile _ -> exitBogusComment state
| _ -> state.Cons(); bogusComment' state
bogusComment' state
and markupDeclaration state =
match state.Pop(2) with
| [|'-';'-'|] -> comment state
| current ->
match new String(Array.append current (state.Pop(5))) with
| "DOCTYPE" -> docType state
| "[CDATA[" -> state.Cons("<![CDATA[".ToCharArray()); cData 0 state
| _ -> bogusComment state
and cData i (state:XHtmlState) =
match state.Peek() with
| ']' when i = 0 || i = 1 ->
state.Cons()
cData (i + 1) state
| '>' when i = 2 ->
state.Cons()
state.InsertionMode := CDATAMode
state.Emit()
| TextParser.EndOfFile _ ->
state.InsertionMode := CDATAMode
state.Emit()
| _ ->
state.Cons()
cData 0 state
and docType state =
match state.Peek() with
| '>' ->
state.Pop();
state.InsertionMode := DocTypeMode
state.Emit()
| _ -> state.Cons(); docType state
and comment state =
match state.Peek() with
| '-' -> state.Pop(); commentEndDash state;
| TextParser.EndOfFile _ ->
state.InsertionMode := CommentMode
state.Emit();
| _ -> state.Cons(); comment state
and commentEndDash state =
match state.Peek() with
| '-' -> state.Pop(); commentEndState state
| TextParser.EndOfFile _ ->
state.InsertionMode := CommentMode
state.Emit();
| _ ->
state.Cons(); comment state;
and commentEndState state =
match state.Peek() with
| '>' ->
state.Pop();
state.InsertionMode := CommentMode
state.Emit();
| TextParser.EndOfFile _ ->
state.InsertionMode := CommentMode
state.Emit();
| _ -> state.Cons(); comment state
and tagName isEndTag state =
match state.Peek() with
| TextParser.Whitespace _ -> state.Pop(); beforeAttributeName state
| '/' -> state.Pop(); selfClosingStartTag state
| '>' -> state.Pop(); state.EmitTag(isEndTag)
| _ -> state.ConsTag(); tagName isEndTag state
and selfClosingStartTag state =
match state.Peek() with
| '>' -> state.Pop(); state.EmitSelfClosingTag()
| TextParser.EndOfFile _ -> data state
| _ -> beforeAttributeName state
and endTagOpen state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| TextParser.Letter _ -> state.ConsTag(); tagName true state
| '>' -> state.Pop(); data state
| _ -> comment state
and beforeAttributeName state =
match state.Peek() with
| TextParser.Whitespace _ -> state.Pop(); beforeAttributeName state
| '/' -> state.Pop(); selfClosingStartTag state
| '>' -> state.Pop(); state.EmitTag(false)
| _ -> attributeName state
and attributeName state =
match state.Peek() with
| '=' -> state.Pop(); beforeAttributeValue state
| '/' -> state.Pop(); selfClosingStartTag state
| '>' -> state.Pop(); state.EmitTag(false)
| TextParser.LetterDigit _ -> state.ConsAttrName(); attributeName state
| TextParser.Whitespace _ -> afterAttributeName state
| _ -> state.ConsAttrName(); attributeName state
and afterAttributeName state =
match state.Peek() with
| TextParser.Whitespace _ -> state.Pop(); afterAttributeName state
| '/' -> state.Pop(); selfClosingStartTag state
| '>' -> state.Pop(); state.EmitTag(false)
| '=' -> state.Pop(); beforeAttributeValue state
| _ -> state.NewAttribute(); attributeName state
and beforeAttributeValue state =
match state.Peek() with
| TextParser.Whitespace _ -> state.Pop(); beforeAttributeValue state
| '/' -> state.Pop(); selfClosingStartTag state
| '>' -> state.Pop(); state.EmitTag(false)
| '"' -> state.Pop(); attributeValueQuoted '"' state
| '\'' -> state.Pop(); attributeValueQuoted '\'' state
| _ -> attributeValueUnquoted state
and attributeValueUnquoted state =
match state.Peek() with
| TextParser.Whitespace _ -> state.Pop(); state.NewAttribute(); beforeAttributeName state
| '/' -> state.Pop(); attributeValueUnquotedSlash state
| '>' -> state.Pop(); state.EmitTag(false)
| '&' ->
assert (state.ContentLength = 0)
state.InsertionMode := InsertionMode.CharRefMode
attributeValueCharRef ['/'; '>'] attributeValueUnquoted state
| _ -> state.ConsAttrValue(); attributeValueUnquoted state
and attributeValueUnquotedSlash state =
match state.Peek() with
| '>' -> selfClosingStartTag state
| _ -> state.ConsAttrValue('/'); state.ConsAttrValue(); attributeValueUnquoted state
and attributeValueQuoted quote state =
match state.Peek() with
| TextParser.EndOfFile _ -> data state
| c when c = quote -> state.Pop(); afterAttributeValueQuoted state
| '&' ->
assert (state.ContentLength = 0)
state.InsertionMode := InsertionMode.CharRefMode
attributeValueCharRef [quote] (attributeValueQuoted quote) state
| _ -> state.ConsAttrValue(); attributeValueQuoted quote state
and attributeValueCharRef stop continuation (state:XHtmlState) =
match state.Peek() with
| ';' ->
state.Cons()
state.EmitToAttributeValue()
continuation state
| TextParser.EndOfFile _ ->
state.EmitToAttributeValue()
continuation state
| c when List.exists ((=) c) stop ->
state.EmitToAttributeValue()
continuation state
| _ ->
state.Cons()
attributeValueCharRef stop continuation state
and afterAttributeValueQuoted state =
match state.Peek() with
| TextParser.Whitespace _ -> state.Pop(); state.NewAttribute(); afterAttributeValueQuoted state
| '/' -> state.Pop(); selfClosingStartTag state
| '>' -> state.Pop(); state.EmitTag(false)
| _ -> attributeName state
let next = ref (state.Reader.Peek())
while !next <> -1 do
data state
next := state.Reader.Peek()
!state.Tokens |> List.rev
let private parse reader =
let canNotHaveChildren (name:string) =
match name with
| "area" | "base" | "br" | "col" | "embed"| "hr" | "img" | "input" | "keygen" | "link" | "menuitem" | "meta" | "param"
| "source" | "track" | "wbr" -> true
| _ -> false
let isImplicitlyClosedByStartTag expectedTagEnd startTag =
match expectedTagEnd, startTag with
| ("td"|"th") , ("tr"|"td"|"th") -> true
| "tr", "tr" -> true
| "li", "li" -> true
| _ -> false
let implicitlyCloseByStartTag expectedTagEnd startTag tokens =
match expectedTagEnd, startTag with
| ("td"|"th"), "tr" ->
// the new tr is closing the cell and previous row
TagEnd expectedTagEnd :: TagEnd "tr" :: tokens
| ("td"|"th") , ("td"|"th")
| "tr", "tr"
| "li", "li" ->
// tags are on same level, just close
TagEnd expectedTagEnd :: tokens
| _ -> tokens
let isImplicitlyClosedByEndTag expectedTagEnd startTag =
match expectedTagEnd, startTag with
| ("td"|"th"|"tr") , ("thead"|"tbody"|"tfoot"|"table") -> true
| "li" , "ul" -> true
| _ -> false
let implicitlyCloseByEndTag expectedTagEnd tokens =
match expectedTagEnd with
| "td" | "th" ->
// the end tag closes the cell and the row
TagEnd expectedTagEnd :: TagEnd "tr" :: tokens
| "tr"
| "li" ->
// Only on level need to be closed
TagEnd expectedTagEnd :: tokens
| _ -> tokens
let rec parse' docType elements expectedTagEnd parentTagName (tokens:XHtmlToken list) =
match tokens with
| DocType dt :: rest -> parse' (dt.Trim()) elements expectedTagEnd parentTagName rest
| Tag(_, "br", []) :: rest ->
let t = HtmlText Environment.NewLine
parse' docType (t :: elements) expectedTagEnd parentTagName rest
| Tag(true, name, attributes) :: rest ->
let e = HtmlElement(name, attributes, [])
parse' docType (e :: elements) expectedTagEnd parentTagName rest
| Tag(false, name, attributes) :: rest when canNotHaveChildren name ->
let e = HtmlElement(name, attributes, [])
parse' docType (e :: elements) expectedTagEnd parentTagName rest
| Tag(_, name, _) :: _ when isImplicitlyClosedByStartTag expectedTagEnd name ->
// insert missing </tr> </td> or </th> when starting new row/cell/header
parse' docType elements expectedTagEnd parentTagName (implicitlyCloseByStartTag expectedTagEnd name tokens)
| TagEnd(name) :: _ when isImplicitlyClosedByEndTag expectedTagEnd name ->
// insert missing </tr> </td> or </th> when starting new row/cell/header
parse' docType elements expectedTagEnd parentTagName (implicitlyCloseByEndTag expectedTagEnd tokens)
| Tag(_, name, attributes) :: rest ->
let dt, tokens, content = parse' docType [] name expectedTagEnd rest
let e = HtmlElement(name, attributes, content)
parse' dt (e :: elements) expectedTagEnd parentTagName tokens
| TagEnd name :: _ when name <> expectedTagEnd && name = parentTagName ->
// insert missing closing tag
parse' docType elements expectedTagEnd parentTagName (TagEnd expectedTagEnd :: tokens)
| TagEnd name :: rest when name <> expectedTagEnd && (name <> (new String(expectedTagEnd.ToCharArray() |> Array.rev))) ->
// ignore this token if not the expected end tag (or it's reverse, eg: <li></il>)
parse' docType elements expectedTagEnd parentTagName rest
| TagEnd _ :: rest ->
docType, rest, List.rev elements
| Text cont :: rest ->
if cont = "" then
// ignore this token
parse' docType elements expectedTagEnd parentTagName rest
else
let t = HtmlText cont
parse' docType (t :: elements) expectedTagEnd parentTagName rest
| Comment cont :: rest ->
let c = HtmlComment cont
parse' docType (c :: elements) expectedTagEnd parentTagName rest
| CData cont :: rest ->
let c = HtmlCData cont
parse' docType (c :: elements) expectedTagEnd parentTagName rest
| EOF :: _ -> docType, [], List.rev elements
| [] -> docType, [], List.rev elements
let tokens = tokenise reader
let docType, _, elements = tokens |> parse' "" [] "" ""
if List.isEmpty elements then
failwith "Invalid HTML"
docType, elements
/// All attribute names and tag names will be normalized to lowercase
/// All html entities will be replaced by the corresponding characters
/// All the consecutive whitespace (except for `&nbsp;`) will be collapsed to a single space
/// All br tags will be replaced by newlines
let parseDocument reader =
HtmlDocument(parse reader)
/// All attribute names and tag names will be normalized to lowercase
/// All html entities will be replaced by the corresponding characters
/// All the consecutive whitespace (except for `&nbsp;`) will be collapsed to a single space
/// All br tags will be replaced by newlines
let parseFragment reader =
parse reader |> snd
// --------------------------------------------------------------------------------------
type XHtmlDocument with
/// Parses the specified HTML string
static member Parse(text) =
use reader = new StringReader(text)
HtmlParser.parseDocument reader
/// Loads HTML from the specified stream
static member Load(stream:Stream) =
use reader = new StreamReader(stream)
HtmlParser.parseDocument reader
/// Loads HTML from the specified reader
static member Load(reader:TextReader) =
HtmlParser.parseDocument reader
type XHtmlNode with
/// Parses the specified HTML string to a list of HTML nodes
static member Parse(text) =
use reader = new StringReader(text)
HtmlParser.parseFragment reader
/// Parses the specified HTML string to a list of HTML nodes
static member ParseRooted(rootName, text) =
use reader = new StringReader(text)
HtmlElement(rootName, [], HtmlParser.parseFragment reader)
let r = XHtmlDocument.Parse("<!doctype html>\n<html>\n <head>\n <meta http-equiv=\"Content-Type\" content=\"text/html\" charset=\"utf-8\">\n <title>DigitWise</title>\n <link rel=\"icon\" type=\"image/svg+xml\" href=\"favicon.svg\">\n <link rel=\"shortcut icon\" type=\"image/svg+xml\" href=\"favicon.svg\">\n <meta\n name=\"viewport\"\n content=\"width=device-width, initial-scale=1, shrink-to-fit=no\"\n />\n <meta name=\"theme-color\" content=\"#000000\" />\n <link rel=\"stylesheet\" href=\"css/site.css\">\n </head>\n <body class=\"website-body\">\n </body>\n</html>\n")
r |> Dump |> ignore
let rec parseNode = function
| HtmlElement ( name, attr, _ ) when name = "body" ->
HtmlElement ( name, attr, [ HtmlText "done" ] )
| HtmlElement ( name, attr, children ) ->
HtmlElement ( name, attr, children |> List.map parseNode )
| other -> other
let parseDoc = function
| HtmlDocument ( doctype, children ) ->
let fix = HtmlElement ( "", [], children ) |> parseNode
let changed =
match fix with
| HtmlElement ( _, _, newChild ) -> newChild
| _ -> []
HtmlDocument ( doctype, changed )
parseDoc r |> Dump |> ignore
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment