Created
December 1, 2013 06:32
Source listing for the blog post http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Text.RegularExpressions; | |
using System.Reflection; | |
namespace RegexLibraryBuilder | |
{ | |
/// <summary> | |
/// Summary description for Class1. | |
/// </summary> | |
class RegexBuilderMain | |
{ | |
/// <summary> | |
/// The main entry point for the application. | |
/// </summary> | |
[STAThread] | |
static void Main(string[] args) | |
{ | |
//HtmlTagRegex. | |
RegexCompilationInfo[] compInfo = | |
{ | |
//HtmlTag Regex. | |
new RegexCompilationInfo | |
( | |
@"<" | |
+ @"(?<endTag>/)?" //Captures the / if this is an end tag. | |
+ @"(?<tagname>\w+)" //Captures TagName | |
+ @"(" //Groups tag contents | |
+ @"(\s+" //Groups attributes | |
+ @"(?<attName>\w+)" //Attribute name | |
+ @"(" //groups =value portion. | |
+ @"\s*=\s*" // = | |
+ @"(?:" //Groups attribute "value" portion. | |
+ @"""(?<attVal>[^""]*)""" // attVal='double quoted' | |
+ @"|'(?<attVal>[^']*)'" // attVal='single quoted' | |
+ @"|(?<attVal>[^'"">\s]+)" // attVal=urlnospaces | |
+ @")" | |
+ @")?" //end optional att value portion. | |
+ @")+\s*" //One or more attribute pairs | |
+ @"|\s*" //Some white space. | |
+ @")" | |
+ @"(?<completeTag>/)?>" //Captures the "/" if this is a complete tag. | |
, RegexOptions.IgnoreCase | |
, "HtmlTagRegex" | |
, "Haack.RegularExpressions" | |
, true | |
) | |
, | |
// Matches double words. | |
new RegexCompilationInfo | |
( | |
@"\b(\w+)\s+\1\b" | |
, RegexOptions.None | |
, "DoubleWordRegex" | |
, "Haack.RegularExpressions", true | |
) | |
}; | |
AssemblyName assemblyName = new AssemblyName(); | |
assemblyName.Name = "Haack.RegularExpressions"; | |
assemblyName.Version = new Version("1.0.0.0"); | |
Regex.CompileToAssembly(compInfo, assemblyName); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment