Last active
April 10, 2024 05:07
-
-
Save Porges/407f81957eadcd15cac575a8d3b54255 to your computer and use it in GitHub Desktop.
XML regex (from: https://web.archive.org/web/20130612103200/http://porg.es/blog/so-it-turns-out-that-dot-nets-regex-are-more-powerful-than-i-originally-thought)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var surrogate = @"([\ud800-\udbff][\udc00-\udfff])";// .NET can't handle \U10000-\u10FFFF | |
var c = @"([\u0009\u000a\u000d\u0020-\ud7ff\ue000-\ufffd]|"+surrogate + ")"; | |
var s = @"([\u0020\u0009\u000d\u000a]+)"; | |
var nameStartChar = @"([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|" + surrogate + ")"; | |
var nameChar = "(" + nameStartChar + @"|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])"; | |
var name = "(?'name'" + nameStartChar + nameChar + "*)"; | |
var names = "(?'names'" + name + @"(\u0020" + name +")*)"; | |
var nmtoken = "(?'nmtoken'" + nameChar + "+)"; | |
var nmtokens = "(?'nmtokens'" + nmtoken + @"(\u0020" + nmtoken +")*)"; | |
var pereference = "%" + name + ";"; | |
var entityReference= "(?'entityRef'&" + name + ";)"; | |
var charref = @"&\#([0-9]+|x[0-9a-fA-F]+);"; | |
var reference = "(?'reference'"+ entityReference + "|" + charref + ")"; | |
var entityValue = "(?'entityValue'\"([^%&\"]|" + pereference + "|" + reference + ")*\"|'([^%&']|" + pereference + "|" + reference + ")*')"; | |
var eq = "(?'eq'" + s + "?=" + s + "?)"; | |
var versionNum = @"1\.[0-9]+"; | |
var comment = "(?'comment'<!--((?!--)" + c + ")*-->)"; | |
var PITarget = "(?'pitarget'(?![xX][mM][lL])"+name+")"; | |
var PI= @"(?'PI'<\?" + PITarget + "(" + s + @"((?!\?>)" + c + @")*)?\?>)"; | |
var misc = "(?'misc'" + comment + "|" + PI + "|" + s + ")"; | |
var versionInfo = "(?'versionInfo'"+ s + "version" + eq + "('" + versionNum + "'|\"" + versionNum + "\"))"; | |
var encName = "(?'encName'[A-Za-z][A-Za-z0-9._-]*)"; | |
var encodingDecl = "(?'encodingDecl'" + s + "encoding" + eq + "(\"" + encName + "\"|'"+ encName + "'))"; | |
var sddecl = "(?'sddecl'" + s + "standalone" + eq + "(\"(yes|no)\"|'(yes|no)'))"; | |
var xmlDecl = @"(?'xmlDecl'<\?xml" + versionInfo + encodingDecl + "?" + sddecl + "?" + s + @"?\?>)"; | |
var mixed = @"(?'mixed'\(" + s + @"?\#PCDATA" + "(" + s + @"?\|" + s + "?" + name +")*" + s + "?" + @"\)\*|\(" +s + @"?\#PCDATA" + s + @"?\))"; | |
var children = @"(?'children'unsureifpossible)"; | |
var contentSpec = "(?'contentspec'EMPTY|ANY|"+mixed+"|"+children+")"; | |
var elementDecl = "(?'elementdecl'<!ELEMENT" + s + name + s + contentSpec + s + "?>)"; | |
var stringType = "CDATA"; | |
var tokenizedType = "(ID(REF(S)?)?|ENTIT(Y|IES)|NMTOKENS?)"; | |
var notationType = "(?'notation'NOTATION" +s + @"\(" + s + "?" + name + "(" + s + @"?\|" + s + "?" + name + ")*" + s + @"?\))"; | |
var enumeration = @"(?'enumeration'\(" + s + "?" + nmtoken + "(" + s + @"?\|" + s + "?" + nmtoken + ")*" + s + @"?\))"; | |
var enumeratedType = "(?'enumType'" + notationType + "|" + enumeration +")"; | |
var attType = "(?'attType'" + stringType + "|" + tokenizedType + "|" + enumeratedType + ")"; | |
var attValue = "(?'attValue'\"([^<&\"]|" +reference+ ")*\"|'([^<&']|" + reference + ")*')"; | |
var defaultDecl = @"(?'defaultDecl'\#REQUIRED|\#IMPLIED|(\#FIXED"+s+")?" + attValue + ")"; | |
var attDef = "(?'attDef'"+ s + name + s + attType + s + defaultDecl + ")"; | |
var attListDecl = "(?'attlist'<!ATTLIST" + s + name + attDef + "*" + s + "?>)"; | |
var systemLiteral = "(?'systemLiteral'\"[^\"]*\"|'[^']*')"; | |
var pubIdChar = @"[a-zA-Z0-9'()+,./:=?;!*#@$_%\u0020\u000d\u000a-]"; | |
var pubidLiteral = "(?'pubIdLiteral'\"" + pubIdChar + "*\"|'((?!')" + pubIdChar + ")*')"; | |
var externalID = "(?'externalID'SYSTEM" + s + systemLiteral +"|PUBLIC" + s + pubidLiteral + s + systemLiteral+")"; | |
var nDataDecl = "(?'ndatadecl'"+s + "NDATA" + s + name + ")"; | |
var entityDef = "(?'entityDef'" + entityValue + "|(" +externalID + nDataDecl + "?))"; | |
var peDef = "(?'pedef'" + entityValue + "|" + externalID + ")"; | |
var GEDecl = "(?'gedecl'<!ENTITY" + s + name + s + entityDef + s + "?>)"; | |
var PEDecl = "(?'gedecl'<!ENTITY" + s + "%" + s + name + s + peDef + s + "?>)"; | |
var entityDecl = "(?'entityDecl'"+ GEDecl + "|" + PEDecl +")"; | |
var publicID = "(?'publicID'PUBLIC" + s + pubidLiteral + ")"; | |
var notationDecl = "(?'notationDecl'<!NOTATION" + s + name + s + "(" + externalID + "|" + publicID + ")" + s + "?>)"; | |
var markupDecl = "(?'markupdecl'" + elementDecl + "|" + attListDecl + "|" + entityDecl + "|" + notationDecl + "|" + PI + "|" + comment + ")"; | |
var DeclSep = "(?'declSep'" + pereference + "|" + s + ")"; | |
var intSubSet = @"(?'intSubSet'(" + markupDecl + "|" + DeclSep + ")*)"; | |
var docTypeDecl = "(?'doctypedecl'<!DOCTYPE" + s + name + "(" + s + externalID+ ")?" + s + @"?(\[" + intSubSet + @"\]" + s + "?)?>)"; | |
var prolog = xmlDecl + "?" + misc + "*(" + docTypeDecl + misc + "*)?"; | |
var attribute = "(?'attribute'" +name + eq + attValue + ")"; | |
var CDSect = @"(?'CDSect'<!\[CDATA\[((?!\]\]>)"+c+@")*\]\]>)"; | |
var charData = @"(((?!\]\]>)[^<&])*)"; | |
var content = @"(?>" + // minor optimization... don't backtrack over this (makes failing faster) | |
@"<(?'openclose'" + name + @")(" + s + attribute + ")*" + s + @"?/>|"+ | |
@"<(?'open'"+ name +@")(" + s + attribute + ")*" + s + @"?>|"+ | |
@"</(?=\k'open'" + s + @"?>)(?'close-open'" + name + ")" + s +@"?>|" | |
+reference+@"|" | |
+PI+@"|" | |
+comment+@"|" | |
+CDSect+@"|" | |
+charData+@")*" + | |
"(?(open)(?!))"; | |
var rootElement = @"(?'root'(<(?'rootName'" + name + ")(" + s + attribute + ")*" + s + @"?>" + content + @"</\k'rootName'" + s + "?>)|(<(?'rootName'" + name + ")(" + s + attribute + ")*" + s + @"?/>))"; | |
var document = "^" + prolog + rootElement + misc + "*" + "$"; | |
var testDoc = @"<?xml version='1.0' encoding=""utf-8""?><!DOCTYPE nothtml []><items> | |
<item available=""yes"" > | |
<name> laptop </name> | |
<![CDATA[something14!$]] 1412]]> | |
<"+"\U00010000"+@"quantity> 2 & y ⍏ </"+"\U00010000"+@"quantity> | |
</item><?notxml?>" /* or <?xml?> here */ +@" | |
<item available=""yes"" x='' y=""&""> | |
<name> mouse </name > | |
<quantity> 1 " + /* or ]]> invalid here */ @" </quantity> | |
</item> | |
<item available=""no"" > | |
<!----> <!-- --> <!-- - -->" + /* or <!-- -- --> here */ @" | |
<name> keyboad </name> | |
<quantity> 0</quantity> | |
</item> | |
</items><!-- stuff can go here --> <!-- yup --> <?pi aasd as!@*&$^!*@&$!@ ?>"; | |
//Console.WriteLine(document); | |
Console.WriteLine(Regex.Match(testDoc, document, RegexOptions.IgnorePatternWhitespace|RegexOptions.Singleline|RegexOptions.ExplicitCapture)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
^(?'xmlDecl'<\?xml(?'versionInfo'([\u0020\u0009\u000d\u000a]+)version(?'eq'([\u0 | |
020\u0009\u000d\u000a]+)?=([\u0020\u0009\u000d\u000a]+)?)('1\.[0-9]+'|"1\.[0-9]+ | |
"))(?'encodingDecl'([\u0020\u0009\u000d\u000a]+)encoding(?'eq'([\u0020\u0009\u00 | |
0d\u000a]+)?=([\u0020\u0009\u000d\u000a]+)?)("(?'encName'[A-Za-z][A-Za-z0-9._-]* | |
)"|'(?'encName'[A-Za-z][A-Za-z0-9._-]*)'))?(?'sddecl'([\u0020\u0009\u000d\u000a] | |
+)standalone(?'eq'([\u0020\u0009\u000d\u000a]+)?=([\u0020\u0009\u000d\u000a]+)?) | |
("(yes|no)"|'(yes|no)'))?([\u0020\u0009\u000d\u000a]+)?\?>)?(?'misc'(?'comment'< | |
!--((?!--)([\u0009\u000a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\udbff][\udc0 | |
0-\udfff])))*-->)|(?'PI'<\?(?'pitarget'(?![xX][mM][lL])(?'name'([:A-Z_a-z\u00C0- | |
\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u | |
218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc0 | |
0-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F | |
-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\ | |
uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040] | |
)*))(([\u0020\u0009\u000d\u000a]+)((?!\?>)([\u0009\u000a\u000d\u0020-\ud7ff\ue00 | |
0-\ufffd]|([\ud800-\udbff][\udc00-\udfff])))*)?\?>)|([\u0020\u0009\u000d\u000a]+ | |
))*((?'doctypedecl'<!DOCTYPE([\u0020\u0009\u000d\u000a]+)(?'name'([:A-Z_a-z\u00C | |
0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070- | |
\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\ud | |
c00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u03 | |
7F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0 | |
-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u204 | |
0])*)(([\u0020\u0009\u000d\u000a]+)(?'externalID'SYSTEM([\u0020\u0009\u000d\u000 | |
a]+)(?'systemLiteral'"[^"]*"|'[^']*')|PUBLIC([\u0020\u0009\u000d\u000a]+)(?'pubI | |
dLiteral'"[a-zA-Z0-9'()+,./:=?;!*#@$_%\u0020\u000d\u000a-]*"|'((?!')[a-zA-Z0-9'( | |
)+,./:=?;!*#@$_%\u0020\u000d\u000a-])*')([\u0020\u0009\u000d\u000a]+)(?'systemLi | |
teral'"[^"]*"|'[^']*')))?([\u0020\u0009\u000d\u000a]+)?(\[(?'intSubSet'((?'marku | |
pdecl'(?'elementdecl'<!ELEMENT([\u0020\u0009\u000d\u000a]+)(?'name'([:A-Z_a-z\u0 | |
0C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u207 | |
0-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\ | |
udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u | |
037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFD | |
F0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2 | |
040])*)([\u0020\u0009\u000d\u000a]+)(?'contentspec'EMPTY|ANY|(?'mixed'\(([\u0020 | |
\u0009\u000d\u000a]+)?\#PCDATA(([\u0020\u0009\u000d\u000a]+)?\|([\u0020\u0009\u0 | |
00d\u000a]+)?(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u0 | |
37D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDC | |
F\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8- | |
\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u | |
2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[ | |
-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*))*([\u0020\u0009\u000d\u000a]+)?\)\*|\( | |
([\u0020\u0009\u000d\u000a]+)?\#PCDATA([\u0020\u0009\u000d\u000a]+)?\))|(?'child | |
ren'unsureifpossible))([\u0020\u0009\u000d\u000a]+)?>)|(?'attlist'<!ATTLIST([\u0 | |
020\u0009\u000d\u000a]+)(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02F | |
F\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\ | |
uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u | |
00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u21 | |
8F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00- | |
\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*)(?'attDef'([\u0020\u0009\u00 | |
0d\u000a]+)(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037 | |
D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\ | |
uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u | |
00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2F | |
EF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-. | |
0-9\u00B7\u0300-\u036F\u203F-\u2040])*)([\u0020\u0009\u000d\u000a]+)(?'attType'C | |
DATA|(ID(REF(S)?)?|ENTIT(Y|IES)|NMTOKENS?)|(?'enumType'(?'notation'NOTATION([\u0 | |
020\u0009\u000d\u000a]+)\(([\u0020\u0009\u000d\u000a]+)?(?'name'([:A-Z_a-z\u00C0 | |
-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\ | |
u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc | |
00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037 | |
F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0- | |
\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040 | |
])*)(([\u0020\u0009\u000d\u000a]+)?\|([\u0020\u0009\u000d\u000a]+)?(?'name'([:A- | |
Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u2 | |
00D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\ | |
udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370- | |
\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\u | |
FDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u | |
203F-\u2040])*))*([\u0020\u0009\u000d\u000a]+)?\))|(?'enumeration'\(([\u0020\u00 | |
09\u000d\u000a]+)?(?'nmtoken'(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\ | |
u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF | |
900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u | |
036F\u203F-\u2040])+)(([\u0020\u0009\u000d\u000a]+)?\|([\u0020\u0009\u000d\u000a | |
]+)?(?'nmtoken'(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u | |
037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFD | |
F0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2 | |
040])+))*([\u0020\u0009\u000d\u000a]+)?\))))([\u0020\u0009\u000d\u000a]+)(?'defa | |
ultDecl'\#REQUIRED|\#IMPLIED|(\#FIXED([\u0020\u0009\u000d\u000a]+))?(?'attValue' | |
"([^<&"]|(?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6 | |
\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u | |
3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_ | |
a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200 | |
D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\ud | |
bff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);())|&\#([0-9]+| | |
x[0-9a-fA-F]+);()))*"|'([^<&']|(?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\u00 | |
C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070 | |
-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\u | |
dc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u0 | |
37F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF | |
0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u20 | |
40])*);())|&\#([0-9]+|x[0-9a-fA-F]+);()))*')))*([\u0020\u0009\u000d\u000a]+)?>)| | |
(?'entityDecl'(?'gedecl'<!ENTITY([\u0020\u0009\u000d\u000a]+)(?'name'([:A-Z_a-z\ | |
u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2 | |
070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff] | |
[\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D | |
\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\u | |
FDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\ | |
u2040])*)([\u0020\u0009\u000d\u000a]+)(?'entityDef'(?'entityValue'"([^%&"]|%(?'n | |
ame'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\ | |
u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|( | |
[\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02 | |
FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF | |
\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300 | |
-\u036F\u203F-\u2040])*);|(?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u | |
00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u21 | |
8F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00- | |
\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\ | |
u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uF | |
FFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])* | |
);())|&\#([0-9]+|x[0-9a-fA-F]+);()))*"|'([^%&']|%(?'name'([:A-Z_a-z\u00C0-\u00D6 | |
\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u | |
2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udf | |
ff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FF | |
F\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD] | |
|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);|( | |
?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u0 | |
2FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7F | |
F\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0- | |
\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u | |
218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc0 | |
0-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA- | |
F]+);()))*')|((?'externalID'SYSTEM([\u0020\u0009\u000d\u000a]+)(?'systemLiteral' | |
"[^"]*"|'[^']*')|PUBLIC([\u0020\u0009\u000d\u000a]+)(?'pubIdLiteral'"[a-zA-Z0-9' | |
()+,./:=?;!*#@$_%\u0020\u000d\u000a-]*"|'((?!')[a-zA-Z0-9'()+,./:=?;!*#@$_%\u002 | |
0\u000d\u000a-])*')([\u0020\u0009\u000d\u000a]+)(?'systemLiteral'"[^"]*"|'[^']*' | |
))(?'ndatadecl'([\u0020\u0009\u000d\u000a]+)NDATA([\u0020\u0009\u000d\u000a]+)(? | |
'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FF | |
F\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD] | |
|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u | |
02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7 | |
FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u03 | |
00-\u036F\u203F-\u2040])*))?))([\u0020\u0009\u000d\u000a]+)?>)|(?'gedecl'<!ENTIT | |
Y([\u0020\u0009\u000d\u000a]+)%([\u0020\u0009\u000d\u000a]+)(?'name'([:A-Z_a-z\u | |
00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u20 | |
70-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][ | |
\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\ | |
u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uF | |
DF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u | |
2040])*)([\u0020\u0009\u000d\u000a]+)(?'pedef'(?'entityValue'"([^%&"]|%(?'name'( | |
[:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C | |
-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud8 | |
00-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0 | |
370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF90 | |
0-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u03 | |
6F\u203F-\u2040])*);|(?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\ | |
u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2 | |
C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udff | |
f]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF | |
\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]| | |
([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);()) | |
|&\#([0-9]+|x[0-9a-fA-F]+);()))*"|'([^%&']|%(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D | |
8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00- | |
\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff])) | |
(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u20 | |
0C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\u | |
d800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);|(?'ref | |
erence'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u | |
0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF9 | |
00-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D | |
6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\ | |
u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\ud | |
fff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+); | |
()))*')|(?'externalID'SYSTEM([\u0020\u0009\u000d\u000a]+)(?'systemLiteral'"[^"]* | |
"|'[^']*')|PUBLIC([\u0020\u0009\u000d\u000a]+)(?'pubIdLiteral'"[a-zA-Z0-9'()+,./ | |
:=?;!*#@$_%\u0020\u000d\u000a-]*"|'((?!')[a-zA-Z0-9'()+,./:=?;!*#@$_%\u0020\u000 | |
d\u000a-])*')([\u0020\u0009\u000d\u000a]+)(?'systemLiteral'"[^"]*"|'[^']*')))([\ | |
u0020\u0009\u000d\u000a]+)?>))|(?'notationDecl'<!NOTATION([\u0020\u0009\u000d\u0 | |
00a]+)(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u03 | |
7F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0 | |
-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\ | |
u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3 | |
001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u | |
00B7\u0300-\u036F\u203F-\u2040])*)([\u0020\u0009\u000d\u000a]+)((?'externalID'SY | |
STEM([\u0020\u0009\u000d\u000a]+)(?'systemLiteral'"[^"]*"|'[^']*')|PUBLIC([\u002 | |
0\u0009\u000d\u000a]+)(?'pubIdLiteral'"[a-zA-Z0-9'()+,./:=?;!*#@$_%\u0020\u000d\ | |
u000a-]*"|'((?!')[a-zA-Z0-9'()+,./:=?;!*#@$_%\u0020\u000d\u000a-])*')([\u0020\u0 | |
009\u000d\u000a]+)(?'systemLiteral'"[^"]*"|'[^']*'))|(?'publicID'PUBLIC([\u0020\ | |
u0009\u000d\u000a]+)(?'pubIdLiteral'"[a-zA-Z0-9'()+,./:=?;!*#@$_%\u0020\u000d\u0 | |
00a-]*"|'((?!')[a-zA-Z0-9'()+,./:=?;!*#@$_%\u0020\u000d\u000a-])*')))([\u0020\u0 | |
009\u000d\u000a]+)?>)|(?'PI'<\?(?'pitarget'(?![xX][mM][lL])(?'name'([:A-Z_a-z\u0 | |
0C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u207 | |
0-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\ | |
udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u | |
037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFD | |
F0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2 | |
040])*))(([\u0020\u0009\u000d\u000a]+)((?!\?>)([\u0009\u000a\u000d\u0020-\ud7ff\ | |
ue000-\ufffd]|([\ud800-\udbff][\udc00-\udfff])))*)?\?>)|(?'comment'<!--((?!--)([ | |
\u0009\u000a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\udbff][\udc00-\udfff]))) | |
*-->))|(?'declSep'%(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u03 | |
70-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900 | |
-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\ | |
u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2 | |
C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udff | |
f]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);|([\u0020\u0009\u000d\u000a]+))) | |
*)\]([\u0020\u0009\u000d\u000a]+)?)?>)(?'misc'(?'comment'<!--((?!--)([\u0009\u00 | |
0a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\udbff][\udc00-\udfff])))*-->)|(?'P | |
I'<\?(?'pitarget'(?![xX][mM][lL])(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u0 | |
0F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u300 | |
1-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z | |
\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u | |
2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff | |
][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*))(([\u0020\u0009\u0 | |
00d\u000a]+)((?!\?>)([\u0009\u000a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\ud | |
bff][\udc00-\udfff])))*)?\?>)|([\u0020\u0009\u000d\u000a]+))*)?(?'root'(<(?'root | |
Name'(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037 | |
F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0- | |
\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u | |
00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u30 | |
01-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u0 | |
0B7\u0300-\u036F\u203F-\u2040])*))(([\u0020\u0009\u000d\u000a]+)(?'attribute'(?' | |
name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF | |
\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]| | |
([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u0 | |
2FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7F | |
F\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u030 | |
0-\u036F\u203F-\u2040])*)(?'eq'([\u0020\u0009\u000d\u000a]+)?=([\u0020\u0009\u00 | |
0d\u000a]+)?)(?'attValue'"([^<&"]|(?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\ | |
u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2 | |
070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff] | |
[\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D | |
\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\u | |
FDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\ | |
u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+);()))*"|'([^<&']|(?'reference'(?'entityRe | |
f'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F- | |
\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\u | |
FFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00 | |
F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001 | |
-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B | |
7\u0300-\u036F\u203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+);()))*')))*([\u0020 | |
\u0009\u000d\u000a]+)?>(?><(?'openclose'(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u | |
00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2F | |
EF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([: | |
A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\ | |
u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800 | |
-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*))(([\u0020\u | |
0009\u000d\u000a]+)(?'attribute'(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00 | |
F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001 | |
-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\ | |
u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2 | |
070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff] | |
[\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*)(?'eq'([\u0020\u0009 | |
\u000d\u000a]+)?=([\u0020\u0009\u000d\u000a]+)?)(?'attValue'"([^<&"]|(?'referenc | |
e'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370- | |
\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\u | |
FDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00 | |
D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00 | |
-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]) | |
)|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+);()))* | |
"|'([^<&']|(?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00 | |
F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF | |
\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A- | |
Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u2 | |
00D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\ | |
udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);())|&\#([0-9] | |
+|x[0-9a-fA-F]+);()))*')))*([\u0020\u0009\u000d\u000a]+)?/>|<(?'open'(?'name'([: | |
A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\ | |
u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800 | |
-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u037 | |
0-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900- | |
\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F | |
\u203F-\u2040])*))(([\u0020\u0009\u000d\u000a]+)(?'attribute'(?'name'([:A-Z_a-z\ | |
u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2 | |
070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff] | |
[\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D | |
\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\u | |
FDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\ | |
u2040])*)(?'eq'([\u0020\u0009\u000d\u000a]+)?=([\u0020\u0009\u000d\u000a]+)?)(?' | |
attValue'"([^<&"]|(?'reference'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00 | |
D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00 | |
-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]) | |
)(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u2 | |
00C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ | |
ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);())|&\ | |
#([0-9]+|x[0-9a-fA-F]+);()))*"|'([^<&']|(?'reference'(?'entityRef'&(?'name'([:A- | |
Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u2 | |
00D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\ | |
udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370- | |
\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\u | |
FDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u | |
203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+);()))*')))*([\u0020\u0009\u000d\u00 | |
0a]+)?>|</(?=\k'open'([\u0020\u0009\u000d\u000a]+)?>)(?'close-open'(?'name'([:A- | |
Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u2 | |
00D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\ | |
udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370- | |
\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\u | |
FDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u | |
203F-\u2040])*))([\u0020\u0009\u000d\u000a]+)?>|(?'reference'(?'entityRef'&(?'na | |
me'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u | |
200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([ | |
\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02F | |
F\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\ | |
uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300- | |
\u036F\u203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+);())|(?'PI'<\?(?'pitarget'( | |
?![xX][mM][lL])(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\ | |
u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uF | |
DCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D | |
8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00- | |
\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff])) | |
|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*))(([\u0020\u0009\u000d\u000a]+)((?!\? | |
>)([\u0009\u000a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\udbff][\udc00-\udfff | |
])))*)?\?>)|(?'comment'<!--((?!--)([\u0009\u000a\u000d\u0020-\ud7ff\ue000-\ufffd | |
]|([\ud800-\udbff][\udc00-\udfff])))*-->)|(?'CDSect'<!\[CDATA\[((?!\]\]>)([\u000 | |
9\u000a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\udbff][\udc00-\udfff])))*\]\] | |
>)|(((?!\]\]>)[^<&])*))*(?(open)(?!))</\k'rootName'([\u0020\u0009\u000d\u000a]+) | |
?>)|(<(?'rootName'(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u037 | |
0-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900- | |
\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u | |
00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C | |
00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff | |
]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*))(([\u0020\u0009\u000d\u000a]+)(?' | |
attribute'(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D | |
\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\u | |
FDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u0 | |
0F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FE | |
F\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0 | |
-9\u00B7\u0300-\u036F\u203F-\u2040])*)(?'eq'([\u0020\u0009\u000d\u000a]+)?=([\u0 | |
020\u0009\u000d\u000a]+)?)(?'attValue'"([^<&"]|(?'reference'(?'entityRef'&(?'nam | |
e'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u2 | |
00C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ | |
ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF | |
\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\u | |
F900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\ | |
u036F\u203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+);()))*"|'([^<&']|(?'referenc | |
e'(?'entityRef'&(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370- | |
\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\u | |
FDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a-z\u00C0-\u00D6\u00 | |
D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00 | |
-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]) | |
)|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*);())|&\#([0-9]+|x[0-9a-fA-F]+);()))* | |
')))*([\u0020\u0009\u000d\u000a]+)?/>))(?'misc'(?'comment'<!--((?!--)([\u0009\u0 | |
00a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\udbff][\udc00-\udfff])))*-->)|(?' | |
PI'<\?(?'pitarget'(?![xX][mM][lL])(?'name'([:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u | |
00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u30 | |
01-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbff][\udc00-\udfff]))(([:A-Z_a- | |
z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\ | |
u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|([\ud800-\udbf | |
f][\udc00-\udfff]))|[-.0-9\u00B7\u0300-\u036F\u203F-\u2040])*))(([\u0020\u0009\u | |
000d\u000a]+)((?!\?>)([\u0009\u000a\u000d\u0020-\ud7ff\ue000-\ufffd]|([\ud800-\u | |
dbff][\udc00-\udfff])))*)?\?>)|([\u0020\u0009\u000d\u000a]+))*$ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment