Created
May 27, 2013 16:49
-
-
Save hoehrmann/5658048 to your computer and use it in GitHub Desktop.
The attached file contains a preliminary outline of a HTML Tidy wrapper for .NET applications in Managed C++ written in February 2003. Originally http://lists.w3.org/Archives/Public/www-archive/2004Jan/0115.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is the main DLL file. | |
#include "stdafx.h" | |
#include "Tidydotnet.h" | |
#include "t:/tidylib2/include/tidy.h" | |
#include "t:/tidylib2/include/buffio.h" | |
using namespace System; | |
using namespace System::Xml; | |
using namespace System::Text; | |
using namespace System::Diagnostics; | |
/// <?xml version = '1.0' encoding = 'utf-8'?> | |
/// <doc> | |
/// <assembly> | |
/// <name>TidyDotNet</name> | |
/// </assembly> | |
/// <members> | |
namespace TidyDotNet | |
{ | |
public __value enum RepeatedAttributes | |
{ | |
KeepFirst = ::TidyKeepFirst, | |
KeepLast = ::TidyKeepLast, | |
}; | |
public __value enum DocTypeMode | |
{ | |
Omit = ::TidyDoctypeOmit, | |
Auto = ::TidyDoctypeAuto, | |
Strict = ::TidyDoctypeStrict, | |
Loose = ::TidyDoctypeLoose, | |
User = ::TidyDoctypeUser, | |
}; | |
public __gc class TidyDocument | |
{ | |
public: | |
TidyDocument(TidyDoc tDoc); | |
~TidyDocument(); | |
XmlDocument * ToXmlDocument(); | |
private: | |
void ProcessTree(TidyNode node, XmlDocument * doc, XmlNode * current); | |
protected: | |
TidyDoc m_tDoc; | |
}; | |
public __gc class SimpleHtmlParser | |
{ | |
public: | |
// public properties | |
__property int get_TabSize(); | |
__property void set_TabSize(int value); | |
__property bool get_AssumeXmlProcIns(); | |
__property void set_AssumeXmlProcIns(bool value); | |
__property bool get_FixBadComments(); | |
__property void set_FixBadComments(bool value); | |
__property bool get_InputXml(); | |
__property void set_InputXml(bool value); | |
__property bool get_JoinClasses(); | |
__property void set_JoinClasses(bool value); | |
__property bool get_JoinStyles(); | |
__property void set_JoinStyles(bool value); | |
__property bool get_LiteralAttributes(); | |
__property void set_LiteralAttributes(bool value); | |
__property bool get_Ncr(); | |
__property void set_Ncr(bool value); | |
__property bool get_QuoteAmpersand(); | |
__property void set_QuoteAmpersand(bool value); | |
__property TidyDotNet::RepeatedAttributes get_RepeatedAttributes(); | |
__property void set_RepeatedAttributes(TidyDotNet::RepeatedAttributes value); | |
// public methods | |
String * GetNewBlocklevelTags() __gc[]; | |
void SetNewBlocklevelTags([ParamArray] String* value __gc[]); | |
String * GetNewEmptyTags() __gc[]; | |
void SetNewEmptyTags([ParamArray] String* value __gc[]); | |
String * GetNewInlineTags() __gc[]; | |
void SetNewInlineTags([ParamArray] String* value __gc[]); | |
String * GetNewPreTags() __gc[]; | |
void SetNewPreTags([ParamArray] String* value __gc[]); | |
SimpleHtmlParser(); | |
~SimpleHtmlParser(); | |
private: | |
int m_TabSize; | |
bool m_AssumeXmlProcIns; | |
bool m_FixBadComments; | |
bool m_InputXml; | |
bool m_JoinClasses; | |
bool m_JoinStyles; | |
bool m_LiteralAttributes; | |
bool m_QuoteAmpersand; | |
bool m_Ncr; | |
// TODO: change type | |
TidyDotNet::RepeatedAttributes m_RepeatedAttributes; | |
String * m_NewBlocklevelTags __gc[]; | |
String * m_NewInlineTags __gc[]; | |
String * m_NewEmptyTags __gc[]; | |
String * m_NewPreTags __gc[]; | |
protected: | |
void SetupSimpleConfig(TidyDoc tdoc); | |
}; | |
public __gc class HtmlParser : public SimpleHtmlParser | |
{ | |
public: | |
__property bool get_AccessibilityCheck(); | |
__property void set_AccessibilityCheck(bool value); | |
__property bool get_AddXmlDecl(); | |
__property void set_AddXmlDecl(bool value); | |
__property bool get_AddXmlSpace(); | |
__property void set_AddXmlSpace(bool value); | |
__property bool get_AsciiChars(); | |
__property void set_AsciiChars(bool value); | |
__property bool get_Bare(); | |
__property void set_Bare(bool value); | |
__property bool get_Clean(); | |
__property void set_Clean(bool value); | |
__property bool get_DropEmptyParas(); | |
__property void set_DropEmptyParas(bool value); | |
__property bool get_DropFontTags(); | |
__property void set_DropFontTags(bool value); | |
__property bool get_DropProprietaryAttributes(); | |
__property void set_DropProprietaryAttributes(bool value); | |
__property bool get_EncloseBlockText(); | |
__property void set_EncloseBlockText(bool value); | |
__property bool get_EncloseText(); | |
__property void set_EncloseText(bool value); | |
__property bool get_EscapeCdata(); | |
__property void set_EscapeCdata(bool value); | |
__property bool get_FixBackslash(); | |
__property void set_FixBackslash(bool value); | |
__property bool get_FixUri(); | |
__property void set_FixUri(bool value); | |
__property bool get_HideComments(); | |
__property void set_HideComments(bool value); | |
__property bool get_LogicalEmphasis(); | |
__property void set_LogicalEmphasis(bool value); | |
__property bool get_LowerLiterals(); | |
__property void set_LowerLiterals(bool value); | |
__property bool get_OutputHtml(); | |
__property void set_OutputHtml(bool value); | |
__property bool get_OutputXhtml(); | |
__property void set_OutputXhtml(bool value); | |
__property bool get_ReplaceColor(); | |
__property void set_ReplaceColor(bool value); | |
__property bool get_TidyMark(); | |
__property void set_TidyMark(bool value); | |
__property bool get_Word2000(); | |
__property void set_Word2000(bool value); | |
__property String * get_AltText(); | |
__property void set_AltText(String * value); | |
__property String * get_CssPrefix(); | |
__property void set_CssPrefix(String * value); | |
__property String * get_Doctype(); | |
__property void set_Doctype(String * value); | |
__property DocTypeMode get_DoctypeMode(); | |
__property void set_DoctypeMode(DocTypeMode value); | |
#if 0 | |
__property String * get_Language(); | |
__property void set_Language(String * value); | |
#endif | |
HtmlParser(); | |
~HtmlParser(); | |
TidyDocument * ParseString(String * s); | |
private: | |
bool m_AccessibilityCheck; | |
bool m_AddXmlDecl; | |
bool m_AddXmlSpace; | |
bool m_AsciiChars; | |
bool m_Bare; | |
bool m_Clean; | |
bool m_DropEmptyParas; | |
bool m_DropFontTags; | |
bool m_DropProprietaryAttributes; | |
bool m_EncloseBlockText; | |
bool m_EncloseText; | |
bool m_EscapeCdata; | |
bool m_FixBackslash; | |
bool m_FixUri; | |
bool m_HideComments; | |
bool m_LogicalEmphasis; | |
bool m_LowerLiterals; | |
bool m_OutputHtml; | |
bool m_OutputXhtml; | |
bool m_ReplaceColor; | |
bool m_TidyMark; | |
bool m_Word2000; | |
String * m_AltText; | |
String * m_CssPrefix; | |
String * m_Doctype; | |
DocTypeMode m_DoctypeMode; | |
#if 0 | |
String * m_Language; | |
#endif | |
protected: | |
void SetupConfig(TidyDoc tdoc); | |
}; | |
} | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.TabSize'> | |
/// <summary>Align tab stops at this number of columns.</summary> | |
/// <remarks>The default is <c>8</c>.</remarks> | |
int TidyDotNet::SimpleHtmlParser::get_TabSize() { return this->m_TabSize; } | |
void TidyDotNet::SimpleHtmlParser::set_TabSize(int value) { this->m_TabSize = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.AssumeXmlProcIns'> | |
/// <summary>Processing instructions must be closed by <c>?></c>.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_AssumeXmlProcIns() { return this->m_AssumeXmlProcIns; } | |
void TidyDotNet::SimpleHtmlParser::set_AssumeXmlProcIns(bool value) { this->m_AssumeXmlProcIns = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.FixBadComments'> | |
/// <summary>Replace unexpected hyphens in comments by <c>=</c>.</summary> | |
/// <remarks>The default is <c>true</c>.</remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_FixBadComments() { return this->m_FixBadComments; } | |
void TidyDotNet::SimpleHtmlParser::set_FixBadComments(bool value) { this->m_FixBadComments = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.InputXml'> | |
/// <summary>Treat input as generic XML.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_InputXml() { return this->m_InputXml; } | |
void TidyDotNet::SimpleHtmlParser::set_InputXml(bool value) { this->m_InputXml = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.JoinClasses'> | |
/// <summary>Multiple <c>class</c> attribute specifications should be run together.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_JoinClasses() { return this->m_JoinClasses; } | |
void TidyDotNet::SimpleHtmlParser::set_JoinClasses(bool value) { this->m_JoinClasses = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.JoinStyles'> | |
/// <summary>Multiple <c>style</c> attribute specifications should be run together.</summary> | |
/// <remarks>The default is <c>true</c>.</remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_JoinStyles() { return this->m_JoinStyles; } | |
void TidyDotNet::SimpleHtmlParser::set_JoinStyles(bool value) { this->m_JoinStyles = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.LiteralAttributes'> | |
/// <summary>Keep white-space in attribute values unnormalized.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_LiteralAttributes() { return this->m_LiteralAttributes; } | |
void TidyDotNet::SimpleHtmlParser::set_LiteralAttributes(bool value) { this->m_LiteralAttributes = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.Ncr'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_Ncr() { return this->m_Ncr; } | |
void TidyDotNet::SimpleHtmlParser::set_Ncr(bool value) { this->m_Ncr = value; } | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.GetNewBlocklevelTags'> | |
/// <summary>Get previously setup block level elements.</summary> | |
String * TidyDotNet::SimpleHtmlParser::GetNewBlocklevelTags() __gc[] { return this->m_NewBlocklevelTags; } | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewBlocklevelTags'> | |
/// <summary>Setup new block level elements.</summary> | |
void TidyDotNet::SimpleHtmlParser::SetNewBlocklevelTags(String * value __gc[]) | |
{ | |
for (int i = 0; i < value->Count; ++i) | |
if (!value[i]->Length) | |
throw new ArgumentException("Element name must be non-empty string"); | |
this->m_NewBlocklevelTags = value; | |
} | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.GetNewEmptyTags'> | |
/// <summary>Get previously setup additonal empty elements.</summary> | |
String * TidyDotNet::SimpleHtmlParser::GetNewEmptyTags() __gc[] { return this->m_NewEmptyTags; } | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewEmptyTags'> | |
/// <summary>Setup new empty elements.</summary> | |
void TidyDotNet::SimpleHtmlParser::SetNewEmptyTags(String * value __gc[]) | |
{ | |
for (int i = 0; i < value->Count; ++i) | |
if (!value[i]->Length) | |
throw new ArgumentException("Element name must be non-empty string"); | |
this->m_NewEmptyTags = value; | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.GetNewInlineTags'> | |
/// <summary>Get previously setup additional inline elements.</summary> | |
String * TidyDotNet::SimpleHtmlParser::GetNewInlineTags() __gc[] { return this->m_NewInlineTags; } | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewInlineTags(System.String[])'> | |
/// <summary>Setup new inline elements.</summary> | |
void TidyDotNet::SimpleHtmlParser::SetNewInlineTags(String * value __gc[]) | |
{ | |
for (int i = 0; i < value->Count; ++i) | |
if (!value[i]->Length) | |
throw new ArgumentException("Element name must be non-empty string"); | |
this->m_NewInlineTags = value; | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.GetNewPreTags'> | |
/// <summary>Get previously setup <c><pre></c>-like elements.</summary> | |
String * TidyDotNet::SimpleHtmlParser::GetNewPreTags() __gc[] { return this->m_NewPreTags; } | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewPreTags(System.String[])'> | |
/// <summary>Setup new <c><pre></c>-like elements.</summary> | |
void TidyDotNet::SimpleHtmlParser::SetNewPreTags(String * value __gc[]) | |
{ | |
for (int i = 0; i < value->Count; ++i) | |
if (!value[i]->Length) | |
throw new ArgumentException("Element name must be non-empty string"); | |
this->m_NewPreTags = value; | |
} | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.QuoteAmpersand'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::SimpleHtmlParser::get_QuoteAmpersand() { return this->m_QuoteAmpersand; } | |
void TidyDotNet::SimpleHtmlParser::set_QuoteAmpersand(bool value) { this->m_QuoteAmpersand = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.RepeatedAttributes'> | |
/// <summary>Which specification to keep when encountering multiple specifications of the same attribute</summary> | |
/// <remarks>The default is <c>KeepFirst</c>.</remarks> | |
TidyDotNet::RepeatedAttributes TidyDotNet::SimpleHtmlParser::get_RepeatedAttributes() { return this->m_RepeatedAttributes; } | |
void TidyDotNet::SimpleHtmlParser::set_RepeatedAttributes(TidyDotNet::RepeatedAttributes value) { this->m_RepeatedAttributes = value; } | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.#ctor()'> | |
TidyDotNet::SimpleHtmlParser::SimpleHtmlParser() | |
{ | |
this->m_TabSize = 4; | |
this->m_AssumeXmlProcIns = false; | |
this->m_FixBadComments = true; | |
this->m_InputXml = false; | |
this->m_JoinClasses = false; | |
this->m_JoinStyles = true; | |
this->m_LiteralAttributes = false; | |
this->m_Ncr = false; | |
this->m_QuoteAmpersand = true; | |
this->m_NewBlocklevelTags = null; | |
this->m_NewEmptyTags = null; | |
this->m_NewInlineTags = null; | |
this->m_NewPreTags = null; | |
this->m_RepeatedAttributes = TidyDotNet::RepeatedAttributes::KeepFirst; | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.Finalize'> | |
TidyDotNet::SimpleHtmlParser::~SimpleHtmlParser() | |
{ | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetupSimpleConfig()'> | |
void TidyDotNet::SimpleHtmlParser::SetupSimpleConfig(TidyDoc tdoc) | |
{ | |
tidyOptSetBool(tdoc, TidyXmlPIs, this->m_AssumeXmlProcIns ? yes : no); | |
tidyOptSetBool(tdoc, TidyFixComments, this->m_FixBadComments ? yes : no); | |
tidyOptSetBool(tdoc, TidyXmlTags, this->m_InputXml ? yes : no); | |
tidyOptSetBool(tdoc, TidyJoinClasses, this->m_JoinClasses ? yes : no); | |
tidyOptSetBool(tdoc, TidyJoinStyles, this->m_JoinStyles ? yes : no); | |
tidyOptSetBool(tdoc, TidyLiteralAttribs, this->m_LiteralAttributes ? yes : no); | |
tidyOptSetBool(tdoc, TidyNCR, this->m_Ncr ? yes : no); | |
tidyOptSetBool(tdoc, TidyQuoteAmpersand, this->m_QuoteAmpersand ? yes : no); | |
tidyOptSetInt(tdoc, TidyTabSize, this->m_TabSize); | |
tidyOptSetInt(tdoc, TidyDuplicateAttrs, this->m_RepeatedAttributes); | |
if (this->m_NewBlocklevelTags && this->m_NewBlocklevelTags->Count) | |
{ | |
String * s = String::Join(",", this->m_NewBlocklevelTags); | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(s); | |
char __pin * value = &utf8[0]; | |
tidyOptSetValue(tdoc, TidyBlockTags, (char*)value); | |
} | |
if (this->m_NewInlineTags && this->m_NewInlineTags->Count) | |
{ | |
String * s = String::Join(",", this->m_NewInlineTags); | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(s); | |
char __pin * value = &utf8[0]; | |
tidyOptSetValue(tdoc, TidyInlineTags, (char*)value); | |
} | |
if (this->m_NewEmptyTags && this->m_NewEmptyTags->Count) | |
{ | |
String * s = String::Join(",", this->m_NewEmptyTags); | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(s); | |
char __pin * value = &utf8[0]; | |
tidyOptSetValue(tdoc, TidyEmptyTags, (char*)value); | |
} | |
if (this->m_NewPreTags && this->m_NewPreTags->Count) | |
{ | |
String * s = String::Join(",", this->m_NewPreTags); | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(s); | |
char __pin * value = &utf8[0]; | |
tidyOptSetValue(tdoc, TidyPreTags, (char*)value); | |
} | |
} | |
/// </member> | |
// ----------------------------------------------------- | |
/// <member name = 'P:TidyDotNet.HtmlParser.AccessibilityCheck'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_AccessibilityCheck() { return this->m_AccessibilityCheck; } | |
void TidyDotNet::HtmlParser::set_AccessibilityCheck(bool value) { this->m_AccessibilityCheck = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.AddXmlDecl'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_AddXmlDecl() { return this->m_AddXmlDecl; } | |
void TidyDotNet::HtmlParser::set_AddXmlDecl(bool value) { this->m_AddXmlDecl = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.AddXmlSpace'> | |
/// <summary>Add <c>xml:space</c> attribute to white-space preserving elements.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_AddXmlSpace() { return this->m_AddXmlSpace; } | |
void TidyDotNet::HtmlParser::set_AddXmlSpace(bool value) { this->m_AddXmlSpace = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.AltText'> | |
/// <summary>Automatically add this <c>alt</c> attribute when required.</summary> | |
/// <remarks>The default is <c>null</c>.</remarks> | |
String * TidyDotNet::HtmlParser::get_AltText() { return this->m_AltText; } | |
void TidyDotNet::HtmlParser::set_AltText(String * value) { this->m_AltText = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.AsciiChars'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_AsciiChars() { return this->m_AsciiChars; } | |
void TidyDotNet::HtmlParser::set_AsciiChars(bool value) { this->m_AsciiChars = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.Bare'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_Bare() { return this->m_Bare; } | |
void TidyDotNet::HtmlParser::set_Bare(bool value) { this->m_Bare = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.Clean'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_Clean() { return this->m_Clean; } | |
void TidyDotNet::HtmlParser::set_Clean(bool value) { this->m_Clean = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.CssPrefix'> | |
/// <summary>Prefix for Tidy generated class names.</summary> | |
/// <remarks>The default is <c>"c"</c>.</remarks> | |
// TODO: update default value | |
String * TidyDotNet::HtmlParser::get_CssPrefix() { return this->m_CssPrefix; } | |
void TidyDotNet::HtmlParser::set_CssPrefix(String * value) { this->m_CssPrefix = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.Doctype'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
String * TidyDotNet::HtmlParser::get_Doctype() { return this->m_Doctype; } | |
void TidyDotNet::HtmlParser::set_Doctype(String * value) { this->m_Doctype = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.DoctypeMode'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
TidyDotNet::DocTypeMode TidyDotNet::HtmlParser::get_DoctypeMode() { return this->m_DoctypeMode; } | |
void TidyDotNet::HtmlParser::set_DoctypeMode(TidyDotNet::DocTypeMode value) { this->m_DoctypeMode = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.DropEmptyParas'> | |
/// <summary>Drop empty paragraphs.</summary> | |
/// <remarks>The default is <c>true</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_DropEmptyParas() { return this->m_DropEmptyParas; } | |
void TidyDotNet::HtmlParser::set_DropEmptyParas(bool value) { this->m_DropEmptyParas = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.DropFontTags'> | |
/// <summary>Drop font elements.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_DropFontTags() { return this->m_DropFontTags; } | |
void TidyDotNet::HtmlParser::set_DropFontTags(bool value) { this->m_DropFontTags = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.DropProprietaryAttributes'> | |
/// <summary>Drop proprietary attributes.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_DropProprietaryAttributes() { return this->m_DropProprietaryAttributes; } | |
void TidyDotNet::HtmlParser::set_DropProprietaryAttributes(bool value) { this->m_DropProprietaryAttributes = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.EncloseBlockText'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_EncloseBlockText() { return this->m_EncloseBlockText; } | |
void TidyDotNet::HtmlParser::set_EncloseBlockText(bool value) { this->m_EncloseBlockText = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.EncloseText'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_EncloseText() { return this->m_EncloseText; } | |
void TidyDotNet::HtmlParser::set_EncloseText(bool value) { this->m_EncloseText = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.EscapeCdata'> | |
/// <summary>Convert <c>CDATA</c> nodes to regular text nodes.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_EscapeCdata() { return this->m_EscapeCdata; } | |
void TidyDotNet::HtmlParser::set_EscapeCdata(bool value) { this->m_EscapeCdata = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.FixBackslash'> | |
/// <summary>Convert backslash characters (<c>"\"</c>) in URIs to slashes (<c>"/"</c>).</summary> | |
/// <remarks>The default is <c>true</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_FixBackslash() { return this->m_FixBackslash; } | |
void TidyDotNet::HtmlParser::set_FixBackslash(bool value) { this->m_FixBackslash = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.FixUri'> | |
/// <summary>Escape invalid characters in URIs.</summary> | |
/// <remarks>The default is <c>true</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_FixUri() { return this->m_FixUri; } | |
void TidyDotNet::HtmlParser::set_FixUri(bool value) { this->m_FixUri = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.HideComments'> | |
/// <summary>Remove comment nodes from document.</summary> | |
/// <remarks>The default is <c>false</c>.</remarks> | |
bool TidyDotNet::HtmlParser::get_HideComments() { return this->m_HideComments; } | |
void TidyDotNet::HtmlParser::set_HideComments(bool value) { this->m_HideComments = value; } | |
/// </member> | |
#if 0 | |
/// <member name = 'P:TidyDotNet.HtmlParser.Language'> | |
String * TidyDotNet::HtmlParser::get_Language() { return this->m_Language; } | |
void TidyDotNet::HtmlParser::set_Language(String * value) { this->m_Language = value; } | |
/// </member> | |
#endif | |
/// <member name = 'P:TidyDotNet.HtmlParser.LogicalEmphasis'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_LogicalEmphasis() { return this->m_LogicalEmphasis; } | |
void TidyDotNet::HtmlParser::set_LogicalEmphasis(bool value) { this->m_LogicalEmphasis = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.LowerLiterals'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_LowerLiterals() { return this->m_LowerLiterals; } | |
void TidyDotNet::HtmlParser::set_LowerLiterals(bool value) { this->m_LowerLiterals = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.OutputHtml'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_OutputHtml() { return this->m_OutputHtml; } | |
void TidyDotNet::HtmlParser::set_OutputHtml(bool value) { this->m_OutputHtml = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.OutputXhtml'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_OutputXhtml() { return this->m_OutputXhtml; } | |
void TidyDotNet::HtmlParser::set_OutputXhtml(bool value) { this->m_OutputXhtml = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.ReplaceColor'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_ReplaceColor() { return this->m_ReplaceColor; } | |
void TidyDotNet::HtmlParser::set_ReplaceColor(bool value) { this->m_ReplaceColor = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.TidyMark'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_TidyMark() { return this->m_TidyMark; } | |
void TidyDotNet::HtmlParser::set_TidyMark(bool value) { this->m_TidyMark = value; } | |
/// </member> | |
/// <member name = 'P:TidyDotNet.HtmlParser.Word2000'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
bool TidyDotNet::HtmlParser::get_Word2000() { return this->m_Word2000; } | |
void TidyDotNet::HtmlParser::set_Word2000(bool value) { this->m_Word2000 = value; } | |
/// </member> | |
/// <member name = 'M:TidyDotNet.HtmlParser.#ctor()'> | |
TidyDotNet::HtmlParser::HtmlParser() | |
{ | |
// initialize boolean config options | |
this->m_AccessibilityCheck = false; | |
this->m_AddXmlDecl = false; | |
this->m_AddXmlSpace = false; | |
this->m_AsciiChars = false; | |
this->m_Bare = false; | |
this->m_Clean = false; | |
this->m_DropEmptyParas = true; | |
this->m_DropFontTags = false; | |
this->m_DropProprietaryAttributes = false; | |
this->m_EncloseBlockText = false; | |
this->m_EncloseText = false; | |
this->m_EscapeCdata = false; | |
this->m_FixBackslash = true; | |
this->m_FixUri = true; | |
this->m_HideComments = false; | |
this->m_LogicalEmphasis = false; | |
this->m_LowerLiterals = true; | |
this->m_OutputHtml = false; | |
this->m_OutputXhtml = false; | |
this->m_ReplaceColor = false; | |
this->m_TidyMark = false; | |
this->m_Word2000 = false; | |
this->m_AltText = null; | |
this->m_CssPrefix = null; | |
this->m_Doctype = null; | |
this->m_DoctypeMode = TidyDotNet::DocTypeMode::Auto; | |
#if 0 | |
this->m_Language = false; | |
#endif | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.HtmlParser.Finalize'> | |
TidyDotNet::HtmlParser::~HtmlParser() | |
{ | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.HtmlParser.SetupConfig()'> | |
void TidyDotNet::HtmlParser::SetupConfig(TidyDoc tdoc) | |
{ | |
// Integer options | |
tidyOptSetInt(tdoc, TidyAccessibilityCheckLevel, this->m_AccessibilityCheck); | |
tidyOptSetInt(tdoc, TidyDoctypeMode, this->m_DoctypeMode); | |
// Boolean options | |
tidyOptSetBool(tdoc, TidyXmlDecl, this->m_AddXmlDecl ? yes : no); | |
tidyOptSetBool(tdoc, TidyXmlSpace, this->m_AddXmlSpace ? yes : no); | |
tidyOptSetBool(tdoc, TidyAsciiChars, this->m_AsciiChars ? yes : no); | |
tidyOptSetBool(tdoc, TidyMakeBare, this->m_Bare ? yes : no); | |
tidyOptSetBool(tdoc, TidyMakeClean, this->m_Clean ? yes : no); | |
tidyOptSetBool(tdoc, TidyDropEmptyParas, this->m_DropEmptyParas ? yes : no); | |
tidyOptSetBool(tdoc, TidyDropFontTags, this->m_DropFontTags ? yes : no); | |
tidyOptSetBool(tdoc, TidyDropPropAttrs, this->m_DropProprietaryAttributes ? yes : no); | |
tidyOptSetBool(tdoc, TidyEncloseBlockText, this->m_EncloseBlockText ? yes : no); | |
tidyOptSetBool(tdoc, TidyEncloseBodyText, this->m_EncloseText ? yes : no); | |
tidyOptSetBool(tdoc, TidyEscapeCdata, this->m_EscapeCdata ? yes : no); | |
tidyOptSetBool(tdoc, TidyFixBackslash, this->m_FixBackslash ? yes : no); | |
tidyOptSetBool(tdoc, TidyFixUri, this->m_FixUri ? yes : no); | |
tidyOptSetBool(tdoc, TidyHideComments, this->m_HideComments ? yes : no); | |
tidyOptSetBool(tdoc, TidyLogicalEmphasis, this->m_LogicalEmphasis ? yes : no); | |
tidyOptSetBool(tdoc, TidyLowerLiterals, this->m_LowerLiterals ? yes : no); | |
tidyOptSetBool(tdoc, TidyHtmlOut, this->m_OutputHtml ? yes : no); | |
tidyOptSetBool(tdoc, TidyXhtmlOut, this->m_OutputXhtml ? yes : no); | |
tidyOptSetBool(tdoc, TidyReplaceColor, this->m_ReplaceColor ? yes : no); | |
tidyOptSetBool(tdoc, ::TidyMark, this->m_TidyMark ? yes : no); | |
tidyOptSetBool(tdoc, TidyWord2000, this->m_Word2000 ? yes : no); | |
if (this->m_AltText && this->m_AltText->Length) | |
{ | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(this->m_AltText); | |
char __pin * value = &utf8[0]; | |
tidyOptSetValue(tdoc, TidyAltText, (char*)value); | |
} | |
if (this->m_CssPrefix && this->m_CssPrefix->Length) | |
{ | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(this->m_CssPrefix); | |
char __pin * value = &utf8[0]; | |
tidyOptSetValue(tdoc, TidyCSSPrefix, (char*)value); | |
} | |
if (this->m_Doctype && this->m_Doctype->Length) | |
{ | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(this->m_Doctype); | |
char __pin * value = &utf8[0]; | |
tidyOptSetValue(tdoc, TidyDoctype, (char*)value); | |
} | |
#if 0 | |
this->m_DoctypeMode | |
#endif | |
#if 0 | |
this->m_Language | |
#endif | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.HtmlParser.ParseString()'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
TidyDotNet::TidyDocument * TidyDotNet::HtmlParser::ParseString(String * s) | |
{ | |
// convert System::String argument to UTF-8 | |
// encoded char by using a `pinned` pointer | |
char utf8 __gc[] = Encoding::UTF8->GetBytes(s); | |
char __pin * input = &utf8[0]; | |
TidyBuffer errbuf = {0}; | |
TidyDoc tDoc = tidyCreate(); | |
// apply config options from instance to Tidy | |
this->SetupSimpleConfig(tDoc); | |
this->SetupConfig(tDoc); | |
// set input character encoding to UTF-8 | |
tidySetCharEncoding(tDoc, "utf8"); | |
// actually Tidy should discard error output, but... | |
tidySetErrorBuffer(tDoc, &errbuf); | |
tidyParseString(tDoc, (char*)input); | |
tidyCleanAndRepair(tDoc); | |
tidyRunDiagnostics(tDoc); | |
TidyDocument * doc = new TidyDocument(tDoc); | |
return doc; | |
} | |
/// </member> | |
// ------------------------------ | |
/// <member name = 'M:TidyDotNet.TidyDocument.#ctor()'> | |
TidyDotNet::TidyDocument::TidyDocument(TidyDoc tDoc) | |
{ | |
if (!tDoc) | |
throw new ArgumentNullException("tDoc"); | |
this->m_tDoc = tDoc; | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.TidyDocument.Finalize'> | |
TidyDotNet::TidyDocument::~TidyDocument() | |
{ | |
if (this->m_tDoc) | |
tidyRelease(this->m_tDoc); | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.TidyDocument.ToXmlDocument()'> | |
/// <summary></summary> | |
/// <remarks></remarks> | |
/// <returns></returns> | |
XmlDocument * TidyDotNet::TidyDocument::ToXmlDocument() | |
{ | |
XmlDocument * doc = new XmlDocument(); | |
TidyNode root = tidyGetRoot(this->m_tDoc); | |
this->ProcessTree(root, doc, null); | |
return doc; | |
} | |
/// </member> | |
/// <member name = 'M:TidyDotNet.TidyDocument.ProcessTree()'> | |
void TidyDotNet::TidyDocument::ProcessTree(TidyNode node, XmlDocument * doc, XmlNode * current) | |
{ | |
TidyNode n = node; | |
if (!doc) | |
throw new ArgumentNullException(); | |
if (!node) | |
return; | |
if (!current) | |
current = doc; | |
while (n) | |
{ | |
int nodetype = tidyNodeGetType(n); | |
TidyNode firstchild = tidyGetChild(n); | |
if (TidyNode_Root == nodetype) | |
{ | |
this->ProcessTree(firstchild, doc, current); | |
} | |
else if (TidyNode_Start == nodetype || TidyNode_StartEnd == nodetype) | |
{ | |
// Get System::String element name from TidyNode | |
ctmbstr tname = tidyNodeGetName(n); | |
String * name = new String(tname, 0, (int)strlen(tname), Encoding::UTF8); | |
XmlNode * e = doc->CreateElement(name); | |
// Copy attributes to element | |
// TODO: deal with xmlns attributes (namespaces) | |
for (TidyAttr att = tidyAttrFirst(n); att; att = tidyAttrNext(att)) | |
{ | |
ctmbstr tattname = tidyAttrName(att); | |
ctmbstr tattval = tidyAttrValue(att); | |
String * attname = new String(tattname, 0, (int)strlen(tattname), Encoding::UTF8); | |
String * attval = new String(tattval, 0, (int)strlen(tattval), Encoding::UTF8); | |
XmlAttribute * a = doc->CreateAttribute(attname); | |
a->Value = attval; | |
e->Attributes->Append(a); | |
} | |
current->AppendChild(e); | |
this->ProcessTree(firstchild, doc, e); | |
} | |
else if (TidyNode_Comment == nodetype) | |
{ | |
int len = tidyNodeGetLength(n) + 1; | |
char* buf = new char[len]; | |
int res = tidyNodeGetValue(m_tDoc, n, buf, &len); | |
if (res < 1) | |
throw new InvalidOperationException(); | |
// TODO: change len-3 when comment node storage in Tidy changes | |
String * value = new String(buf, 0, len-3, Encoding::UTF8); | |
XmlNode * comment = doc->CreateComment(value); | |
current->AppendChild(comment); | |
} | |
else if (TidyNode_ProcIns == nodetype) | |
{ | |
int len = tidyNodeGetLength(n) + 1; | |
char* buf = new char[len]; | |
int res = tidyNodeGetValue(m_tDoc, n, buf, &len); | |
if (res < 1) | |
throw new InvalidOperationException(); | |
String * value = new String(buf, 0, len, Encoding::UTF8); | |
Debug::WriteLine(len.ToString()); | |
Debug::WriteLine(value); | |
XmlNode * procins = doc->CreateProcessingInstruction("pi", value); | |
current->AppendChild(procins); | |
} | |
else if (TidyNode_Text == nodetype) | |
{ | |
XmlNode * text = doc->CreateTextNode("Text nodes unsupported"); | |
current->AppendChild(text); | |
} | |
else if (TidyNode_CDATA == nodetype) | |
{ | |
XmlNode * cdata = doc->CreateCDataSection("CDATA sections unsupported"); | |
current->AppendChild(cdata); | |
} | |
else if (TidyNode_DocType == nodetype) | |
{ | |
// XmlNode * doctype = doc->CreateDocumentType("foo", "bar", "baz", String::Empty); | |
// current->AppendChild(doctype); | |
} | |
n = tidyGetNext(n); | |
} | |
} | |
/// </member> | |
/// </members> | |
/// </doc> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment