Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
Created May 27, 2013 16:49
Show Gist options
  • Save hoehrmann/5658048 to your computer and use it in GitHub Desktop.
Save hoehrmann/5658048 to your computer and use it in GitHub Desktop.
The attached file contains a preliminary outline of a HTML Tidy wrapper for .NET applications in Managed C++ written in February 2003. Originally http://lists.w3.org/Archives/Public/www-archive/2004Jan/0115.html
// This is the main DLL file.
#include "stdafx.h"
#include "Tidydotnet.h"
#include "t:/tidylib2/include/tidy.h"
#include "t:/tidylib2/include/buffio.h"
using namespace System;
using namespace System::Xml;
using namespace System::Text;
using namespace System::Diagnostics;
/// <?xml version = '1.0' encoding = 'utf-8'?>
/// <doc>
/// <assembly>
/// <name>TidyDotNet</name>
/// </assembly>
/// <members>
namespace TidyDotNet
{
public __value enum RepeatedAttributes
{
KeepFirst = ::TidyKeepFirst,
KeepLast = ::TidyKeepLast,
};
public __value enum DocTypeMode
{
Omit = ::TidyDoctypeOmit,
Auto = ::TidyDoctypeAuto,
Strict = ::TidyDoctypeStrict,
Loose = ::TidyDoctypeLoose,
User = ::TidyDoctypeUser,
};
public __gc class TidyDocument
{
public:
TidyDocument(TidyDoc tDoc);
~TidyDocument();
XmlDocument * ToXmlDocument();
private:
void ProcessTree(TidyNode node, XmlDocument * doc, XmlNode * current);
protected:
TidyDoc m_tDoc;
};
public __gc class SimpleHtmlParser
{
public:
// public properties
__property int get_TabSize();
__property void set_TabSize(int value);
__property bool get_AssumeXmlProcIns();
__property void set_AssumeXmlProcIns(bool value);
__property bool get_FixBadComments();
__property void set_FixBadComments(bool value);
__property bool get_InputXml();
__property void set_InputXml(bool value);
__property bool get_JoinClasses();
__property void set_JoinClasses(bool value);
__property bool get_JoinStyles();
__property void set_JoinStyles(bool value);
__property bool get_LiteralAttributes();
__property void set_LiteralAttributes(bool value);
__property bool get_Ncr();
__property void set_Ncr(bool value);
__property bool get_QuoteAmpersand();
__property void set_QuoteAmpersand(bool value);
__property TidyDotNet::RepeatedAttributes get_RepeatedAttributes();
__property void set_RepeatedAttributes(TidyDotNet::RepeatedAttributes value);
// public methods
String * GetNewBlocklevelTags() __gc[];
void SetNewBlocklevelTags([ParamArray] String* value __gc[]);
String * GetNewEmptyTags() __gc[];
void SetNewEmptyTags([ParamArray] String* value __gc[]);
String * GetNewInlineTags() __gc[];
void SetNewInlineTags([ParamArray] String* value __gc[]);
String * GetNewPreTags() __gc[];
void SetNewPreTags([ParamArray] String* value __gc[]);
SimpleHtmlParser();
~SimpleHtmlParser();
private:
int m_TabSize;
bool m_AssumeXmlProcIns;
bool m_FixBadComments;
bool m_InputXml;
bool m_JoinClasses;
bool m_JoinStyles;
bool m_LiteralAttributes;
bool m_QuoteAmpersand;
bool m_Ncr;
// TODO: change type
TidyDotNet::RepeatedAttributes m_RepeatedAttributes;
String * m_NewBlocklevelTags __gc[];
String * m_NewInlineTags __gc[];
String * m_NewEmptyTags __gc[];
String * m_NewPreTags __gc[];
protected:
void SetupSimpleConfig(TidyDoc tdoc);
};
public __gc class HtmlParser : public SimpleHtmlParser
{
public:
__property bool get_AccessibilityCheck();
__property void set_AccessibilityCheck(bool value);
__property bool get_AddXmlDecl();
__property void set_AddXmlDecl(bool value);
__property bool get_AddXmlSpace();
__property void set_AddXmlSpace(bool value);
__property bool get_AsciiChars();
__property void set_AsciiChars(bool value);
__property bool get_Bare();
__property void set_Bare(bool value);
__property bool get_Clean();
__property void set_Clean(bool value);
__property bool get_DropEmptyParas();
__property void set_DropEmptyParas(bool value);
__property bool get_DropFontTags();
__property void set_DropFontTags(bool value);
__property bool get_DropProprietaryAttributes();
__property void set_DropProprietaryAttributes(bool value);
__property bool get_EncloseBlockText();
__property void set_EncloseBlockText(bool value);
__property bool get_EncloseText();
__property void set_EncloseText(bool value);
__property bool get_EscapeCdata();
__property void set_EscapeCdata(bool value);
__property bool get_FixBackslash();
__property void set_FixBackslash(bool value);
__property bool get_FixUri();
__property void set_FixUri(bool value);
__property bool get_HideComments();
__property void set_HideComments(bool value);
__property bool get_LogicalEmphasis();
__property void set_LogicalEmphasis(bool value);
__property bool get_LowerLiterals();
__property void set_LowerLiterals(bool value);
__property bool get_OutputHtml();
__property void set_OutputHtml(bool value);
__property bool get_OutputXhtml();
__property void set_OutputXhtml(bool value);
__property bool get_ReplaceColor();
__property void set_ReplaceColor(bool value);
__property bool get_TidyMark();
__property void set_TidyMark(bool value);
__property bool get_Word2000();
__property void set_Word2000(bool value);
__property String * get_AltText();
__property void set_AltText(String * value);
__property String * get_CssPrefix();
__property void set_CssPrefix(String * value);
__property String * get_Doctype();
__property void set_Doctype(String * value);
__property DocTypeMode get_DoctypeMode();
__property void set_DoctypeMode(DocTypeMode value);
#if 0
__property String * get_Language();
__property void set_Language(String * value);
#endif
HtmlParser();
~HtmlParser();
TidyDocument * ParseString(String * s);
private:
bool m_AccessibilityCheck;
bool m_AddXmlDecl;
bool m_AddXmlSpace;
bool m_AsciiChars;
bool m_Bare;
bool m_Clean;
bool m_DropEmptyParas;
bool m_DropFontTags;
bool m_DropProprietaryAttributes;
bool m_EncloseBlockText;
bool m_EncloseText;
bool m_EscapeCdata;
bool m_FixBackslash;
bool m_FixUri;
bool m_HideComments;
bool m_LogicalEmphasis;
bool m_LowerLiterals;
bool m_OutputHtml;
bool m_OutputXhtml;
bool m_ReplaceColor;
bool m_TidyMark;
bool m_Word2000;
String * m_AltText;
String * m_CssPrefix;
String * m_Doctype;
DocTypeMode m_DoctypeMode;
#if 0
String * m_Language;
#endif
protected:
void SetupConfig(TidyDoc tdoc);
};
}
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.TabSize'>
/// <summary>Align tab stops at this number of columns.</summary>
/// <remarks>The default is <c>8</c>.</remarks>
int TidyDotNet::SimpleHtmlParser::get_TabSize() { return this->m_TabSize; }
void TidyDotNet::SimpleHtmlParser::set_TabSize(int value) { this->m_TabSize = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.AssumeXmlProcIns'>
/// <summary>Processing instructions must be closed by <c>?></c>.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::SimpleHtmlParser::get_AssumeXmlProcIns() { return this->m_AssumeXmlProcIns; }
void TidyDotNet::SimpleHtmlParser::set_AssumeXmlProcIns(bool value) { this->m_AssumeXmlProcIns = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.FixBadComments'>
/// <summary>Replace unexpected hyphens in comments by <c>=</c>.</summary>
/// <remarks>The default is <c>true</c>.</remarks>
bool TidyDotNet::SimpleHtmlParser::get_FixBadComments() { return this->m_FixBadComments; }
void TidyDotNet::SimpleHtmlParser::set_FixBadComments(bool value) { this->m_FixBadComments = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.InputXml'>
/// <summary>Treat input as generic XML.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::SimpleHtmlParser::get_InputXml() { return this->m_InputXml; }
void TidyDotNet::SimpleHtmlParser::set_InputXml(bool value) { this->m_InputXml = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.JoinClasses'>
/// <summary>Multiple <c>class</c> attribute specifications should be run together.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::SimpleHtmlParser::get_JoinClasses() { return this->m_JoinClasses; }
void TidyDotNet::SimpleHtmlParser::set_JoinClasses(bool value) { this->m_JoinClasses = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.JoinStyles'>
/// <summary>Multiple <c>style</c> attribute specifications should be run together.</summary>
/// <remarks>The default is <c>true</c>.</remarks>
bool TidyDotNet::SimpleHtmlParser::get_JoinStyles() { return this->m_JoinStyles; }
void TidyDotNet::SimpleHtmlParser::set_JoinStyles(bool value) { this->m_JoinStyles = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.LiteralAttributes'>
/// <summary>Keep white-space in attribute values unnormalized.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::SimpleHtmlParser::get_LiteralAttributes() { return this->m_LiteralAttributes; }
void TidyDotNet::SimpleHtmlParser::set_LiteralAttributes(bool value) { this->m_LiteralAttributes = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.Ncr'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::SimpleHtmlParser::get_Ncr() { return this->m_Ncr; }
void TidyDotNet::SimpleHtmlParser::set_Ncr(bool value) { this->m_Ncr = value; }
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.GetNewBlocklevelTags'>
/// <summary>Get previously setup block level elements.</summary>
String * TidyDotNet::SimpleHtmlParser::GetNewBlocklevelTags() __gc[] { return this->m_NewBlocklevelTags; }
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewBlocklevelTags'>
/// <summary>Setup new block level elements.</summary>
void TidyDotNet::SimpleHtmlParser::SetNewBlocklevelTags(String * value __gc[])
{
for (int i = 0; i < value->Count; ++i)
if (!value[i]->Length)
throw new ArgumentException("Element name must be non-empty string");
this->m_NewBlocklevelTags = value;
}
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.GetNewEmptyTags'>
/// <summary>Get previously setup additonal empty elements.</summary>
String * TidyDotNet::SimpleHtmlParser::GetNewEmptyTags() __gc[] { return this->m_NewEmptyTags; }
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewEmptyTags'>
/// <summary>Setup new empty elements.</summary>
void TidyDotNet::SimpleHtmlParser::SetNewEmptyTags(String * value __gc[])
{
for (int i = 0; i < value->Count; ++i)
if (!value[i]->Length)
throw new ArgumentException("Element name must be non-empty string");
this->m_NewEmptyTags = value;
}
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.GetNewInlineTags'>
/// <summary>Get previously setup additional inline elements.</summary>
String * TidyDotNet::SimpleHtmlParser::GetNewInlineTags() __gc[] { return this->m_NewInlineTags; }
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewInlineTags(System.String[])'>
/// <summary>Setup new inline elements.</summary>
void TidyDotNet::SimpleHtmlParser::SetNewInlineTags(String * value __gc[])
{
for (int i = 0; i < value->Count; ++i)
if (!value[i]->Length)
throw new ArgumentException("Element name must be non-empty string");
this->m_NewInlineTags = value;
}
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.GetNewPreTags'>
/// <summary>Get previously setup <c>&lt;pre></c>-like elements.</summary>
String * TidyDotNet::SimpleHtmlParser::GetNewPreTags() __gc[] { return this->m_NewPreTags; }
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetNewPreTags(System.String[])'>
/// <summary>Setup new <c>&lt;pre></c>-like elements.</summary>
void TidyDotNet::SimpleHtmlParser::SetNewPreTags(String * value __gc[])
{
for (int i = 0; i < value->Count; ++i)
if (!value[i]->Length)
throw new ArgumentException("Element name must be non-empty string");
this->m_NewPreTags = value;
}
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.QuoteAmpersand'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::SimpleHtmlParser::get_QuoteAmpersand() { return this->m_QuoteAmpersand; }
void TidyDotNet::SimpleHtmlParser::set_QuoteAmpersand(bool value) { this->m_QuoteAmpersand = value; }
/// </member>
/// <member name = 'P:TidyDotNet.SimpleHtmlParser.RepeatedAttributes'>
/// <summary>Which specification to keep when encountering multiple specifications of the same attribute</summary>
/// <remarks>The default is <c>KeepFirst</c>.</remarks>
TidyDotNet::RepeatedAttributes TidyDotNet::SimpleHtmlParser::get_RepeatedAttributes() { return this->m_RepeatedAttributes; }
void TidyDotNet::SimpleHtmlParser::set_RepeatedAttributes(TidyDotNet::RepeatedAttributes value) { this->m_RepeatedAttributes = value; }
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.#ctor()'>
TidyDotNet::SimpleHtmlParser::SimpleHtmlParser()
{
this->m_TabSize = 4;
this->m_AssumeXmlProcIns = false;
this->m_FixBadComments = true;
this->m_InputXml = false;
this->m_JoinClasses = false;
this->m_JoinStyles = true;
this->m_LiteralAttributes = false;
this->m_Ncr = false;
this->m_QuoteAmpersand = true;
this->m_NewBlocklevelTags = null;
this->m_NewEmptyTags = null;
this->m_NewInlineTags = null;
this->m_NewPreTags = null;
this->m_RepeatedAttributes = TidyDotNet::RepeatedAttributes::KeepFirst;
}
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.Finalize'>
TidyDotNet::SimpleHtmlParser::~SimpleHtmlParser()
{
}
/// </member>
/// <member name = 'M:TidyDotNet.SimpleHtmlParser.SetupSimpleConfig()'>
void TidyDotNet::SimpleHtmlParser::SetupSimpleConfig(TidyDoc tdoc)
{
tidyOptSetBool(tdoc, TidyXmlPIs, this->m_AssumeXmlProcIns ? yes : no);
tidyOptSetBool(tdoc, TidyFixComments, this->m_FixBadComments ? yes : no);
tidyOptSetBool(tdoc, TidyXmlTags, this->m_InputXml ? yes : no);
tidyOptSetBool(tdoc, TidyJoinClasses, this->m_JoinClasses ? yes : no);
tidyOptSetBool(tdoc, TidyJoinStyles, this->m_JoinStyles ? yes : no);
tidyOptSetBool(tdoc, TidyLiteralAttribs, this->m_LiteralAttributes ? yes : no);
tidyOptSetBool(tdoc, TidyNCR, this->m_Ncr ? yes : no);
tidyOptSetBool(tdoc, TidyQuoteAmpersand, this->m_QuoteAmpersand ? yes : no);
tidyOptSetInt(tdoc, TidyTabSize, this->m_TabSize);
tidyOptSetInt(tdoc, TidyDuplicateAttrs, this->m_RepeatedAttributes);
if (this->m_NewBlocklevelTags && this->m_NewBlocklevelTags->Count)
{
String * s = String::Join(",", this->m_NewBlocklevelTags);
char utf8 __gc[] = Encoding::UTF8->GetBytes(s);
char __pin * value = &utf8[0];
tidyOptSetValue(tdoc, TidyBlockTags, (char*)value);
}
if (this->m_NewInlineTags && this->m_NewInlineTags->Count)
{
String * s = String::Join(",", this->m_NewInlineTags);
char utf8 __gc[] = Encoding::UTF8->GetBytes(s);
char __pin * value = &utf8[0];
tidyOptSetValue(tdoc, TidyInlineTags, (char*)value);
}
if (this->m_NewEmptyTags && this->m_NewEmptyTags->Count)
{
String * s = String::Join(",", this->m_NewEmptyTags);
char utf8 __gc[] = Encoding::UTF8->GetBytes(s);
char __pin * value = &utf8[0];
tidyOptSetValue(tdoc, TidyEmptyTags, (char*)value);
}
if (this->m_NewPreTags && this->m_NewPreTags->Count)
{
String * s = String::Join(",", this->m_NewPreTags);
char utf8 __gc[] = Encoding::UTF8->GetBytes(s);
char __pin * value = &utf8[0];
tidyOptSetValue(tdoc, TidyPreTags, (char*)value);
}
}
/// </member>
// -----------------------------------------------------
/// <member name = 'P:TidyDotNet.HtmlParser.AccessibilityCheck'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_AccessibilityCheck() { return this->m_AccessibilityCheck; }
void TidyDotNet::HtmlParser::set_AccessibilityCheck(bool value) { this->m_AccessibilityCheck = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.AddXmlDecl'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_AddXmlDecl() { return this->m_AddXmlDecl; }
void TidyDotNet::HtmlParser::set_AddXmlDecl(bool value) { this->m_AddXmlDecl = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.AddXmlSpace'>
/// <summary>Add <c>xml:space</c> attribute to white-space preserving elements.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::HtmlParser::get_AddXmlSpace() { return this->m_AddXmlSpace; }
void TidyDotNet::HtmlParser::set_AddXmlSpace(bool value) { this->m_AddXmlSpace = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.AltText'>
/// <summary>Automatically add this <c>alt</c> attribute when required.</summary>
/// <remarks>The default is <c>null</c>.</remarks>
String * TidyDotNet::HtmlParser::get_AltText() { return this->m_AltText; }
void TidyDotNet::HtmlParser::set_AltText(String * value) { this->m_AltText = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.AsciiChars'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_AsciiChars() { return this->m_AsciiChars; }
void TidyDotNet::HtmlParser::set_AsciiChars(bool value) { this->m_AsciiChars = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.Bare'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_Bare() { return this->m_Bare; }
void TidyDotNet::HtmlParser::set_Bare(bool value) { this->m_Bare = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.Clean'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_Clean() { return this->m_Clean; }
void TidyDotNet::HtmlParser::set_Clean(bool value) { this->m_Clean = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.CssPrefix'>
/// <summary>Prefix for Tidy generated class names.</summary>
/// <remarks>The default is <c>"c"</c>.</remarks>
// TODO: update default value
String * TidyDotNet::HtmlParser::get_CssPrefix() { return this->m_CssPrefix; }
void TidyDotNet::HtmlParser::set_CssPrefix(String * value) { this->m_CssPrefix = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.Doctype'>
/// <summary></summary>
/// <remarks></remarks>
String * TidyDotNet::HtmlParser::get_Doctype() { return this->m_Doctype; }
void TidyDotNet::HtmlParser::set_Doctype(String * value) { this->m_Doctype = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.DoctypeMode'>
/// <summary></summary>
/// <remarks></remarks>
TidyDotNet::DocTypeMode TidyDotNet::HtmlParser::get_DoctypeMode() { return this->m_DoctypeMode; }
void TidyDotNet::HtmlParser::set_DoctypeMode(TidyDotNet::DocTypeMode value) { this->m_DoctypeMode = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.DropEmptyParas'>
/// <summary>Drop empty paragraphs.</summary>
/// <remarks>The default is <c>true</c>.</remarks>
bool TidyDotNet::HtmlParser::get_DropEmptyParas() { return this->m_DropEmptyParas; }
void TidyDotNet::HtmlParser::set_DropEmptyParas(bool value) { this->m_DropEmptyParas = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.DropFontTags'>
/// <summary>Drop font elements.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::HtmlParser::get_DropFontTags() { return this->m_DropFontTags; }
void TidyDotNet::HtmlParser::set_DropFontTags(bool value) { this->m_DropFontTags = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.DropProprietaryAttributes'>
/// <summary>Drop proprietary attributes.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::HtmlParser::get_DropProprietaryAttributes() { return this->m_DropProprietaryAttributes; }
void TidyDotNet::HtmlParser::set_DropProprietaryAttributes(bool value) { this->m_DropProprietaryAttributes = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.EncloseBlockText'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_EncloseBlockText() { return this->m_EncloseBlockText; }
void TidyDotNet::HtmlParser::set_EncloseBlockText(bool value) { this->m_EncloseBlockText = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.EncloseText'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_EncloseText() { return this->m_EncloseText; }
void TidyDotNet::HtmlParser::set_EncloseText(bool value) { this->m_EncloseText = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.EscapeCdata'>
/// <summary>Convert <c>CDATA</c> nodes to regular text nodes.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::HtmlParser::get_EscapeCdata() { return this->m_EscapeCdata; }
void TidyDotNet::HtmlParser::set_EscapeCdata(bool value) { this->m_EscapeCdata = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.FixBackslash'>
/// <summary>Convert backslash characters (<c>"\"</c>) in URIs to slashes (<c>"/"</c>).</summary>
/// <remarks>The default is <c>true</c>.</remarks>
bool TidyDotNet::HtmlParser::get_FixBackslash() { return this->m_FixBackslash; }
void TidyDotNet::HtmlParser::set_FixBackslash(bool value) { this->m_FixBackslash = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.FixUri'>
/// <summary>Escape invalid characters in URIs.</summary>
/// <remarks>The default is <c>true</c>.</remarks>
bool TidyDotNet::HtmlParser::get_FixUri() { return this->m_FixUri; }
void TidyDotNet::HtmlParser::set_FixUri(bool value) { this->m_FixUri = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.HideComments'>
/// <summary>Remove comment nodes from document.</summary>
/// <remarks>The default is <c>false</c>.</remarks>
bool TidyDotNet::HtmlParser::get_HideComments() { return this->m_HideComments; }
void TidyDotNet::HtmlParser::set_HideComments(bool value) { this->m_HideComments = value; }
/// </member>
#if 0
/// <member name = 'P:TidyDotNet.HtmlParser.Language'>
String * TidyDotNet::HtmlParser::get_Language() { return this->m_Language; }
void TidyDotNet::HtmlParser::set_Language(String * value) { this->m_Language = value; }
/// </member>
#endif
/// <member name = 'P:TidyDotNet.HtmlParser.LogicalEmphasis'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_LogicalEmphasis() { return this->m_LogicalEmphasis; }
void TidyDotNet::HtmlParser::set_LogicalEmphasis(bool value) { this->m_LogicalEmphasis = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.LowerLiterals'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_LowerLiterals() { return this->m_LowerLiterals; }
void TidyDotNet::HtmlParser::set_LowerLiterals(bool value) { this->m_LowerLiterals = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.OutputHtml'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_OutputHtml() { return this->m_OutputHtml; }
void TidyDotNet::HtmlParser::set_OutputHtml(bool value) { this->m_OutputHtml = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.OutputXhtml'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_OutputXhtml() { return this->m_OutputXhtml; }
void TidyDotNet::HtmlParser::set_OutputXhtml(bool value) { this->m_OutputXhtml = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.ReplaceColor'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_ReplaceColor() { return this->m_ReplaceColor; }
void TidyDotNet::HtmlParser::set_ReplaceColor(bool value) { this->m_ReplaceColor = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.TidyMark'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_TidyMark() { return this->m_TidyMark; }
void TidyDotNet::HtmlParser::set_TidyMark(bool value) { this->m_TidyMark = value; }
/// </member>
/// <member name = 'P:TidyDotNet.HtmlParser.Word2000'>
/// <summary></summary>
/// <remarks></remarks>
bool TidyDotNet::HtmlParser::get_Word2000() { return this->m_Word2000; }
void TidyDotNet::HtmlParser::set_Word2000(bool value) { this->m_Word2000 = value; }
/// </member>
/// <member name = 'M:TidyDotNet.HtmlParser.#ctor()'>
TidyDotNet::HtmlParser::HtmlParser()
{
// initialize boolean config options
this->m_AccessibilityCheck = false;
this->m_AddXmlDecl = false;
this->m_AddXmlSpace = false;
this->m_AsciiChars = false;
this->m_Bare = false;
this->m_Clean = false;
this->m_DropEmptyParas = true;
this->m_DropFontTags = false;
this->m_DropProprietaryAttributes = false;
this->m_EncloseBlockText = false;
this->m_EncloseText = false;
this->m_EscapeCdata = false;
this->m_FixBackslash = true;
this->m_FixUri = true;
this->m_HideComments = false;
this->m_LogicalEmphasis = false;
this->m_LowerLiterals = true;
this->m_OutputHtml = false;
this->m_OutputXhtml = false;
this->m_ReplaceColor = false;
this->m_TidyMark = false;
this->m_Word2000 = false;
this->m_AltText = null;
this->m_CssPrefix = null;
this->m_Doctype = null;
this->m_DoctypeMode = TidyDotNet::DocTypeMode::Auto;
#if 0
this->m_Language = false;
#endif
}
/// </member>
/// <member name = 'M:TidyDotNet.HtmlParser.Finalize'>
TidyDotNet::HtmlParser::~HtmlParser()
{
}
/// </member>
/// <member name = 'M:TidyDotNet.HtmlParser.SetupConfig()'>
void TidyDotNet::HtmlParser::SetupConfig(TidyDoc tdoc)
{
// Integer options
tidyOptSetInt(tdoc, TidyAccessibilityCheckLevel, this->m_AccessibilityCheck);
tidyOptSetInt(tdoc, TidyDoctypeMode, this->m_DoctypeMode);
// Boolean options
tidyOptSetBool(tdoc, TidyXmlDecl, this->m_AddXmlDecl ? yes : no);
tidyOptSetBool(tdoc, TidyXmlSpace, this->m_AddXmlSpace ? yes : no);
tidyOptSetBool(tdoc, TidyAsciiChars, this->m_AsciiChars ? yes : no);
tidyOptSetBool(tdoc, TidyMakeBare, this->m_Bare ? yes : no);
tidyOptSetBool(tdoc, TidyMakeClean, this->m_Clean ? yes : no);
tidyOptSetBool(tdoc, TidyDropEmptyParas, this->m_DropEmptyParas ? yes : no);
tidyOptSetBool(tdoc, TidyDropFontTags, this->m_DropFontTags ? yes : no);
tidyOptSetBool(tdoc, TidyDropPropAttrs, this->m_DropProprietaryAttributes ? yes : no);
tidyOptSetBool(tdoc, TidyEncloseBlockText, this->m_EncloseBlockText ? yes : no);
tidyOptSetBool(tdoc, TidyEncloseBodyText, this->m_EncloseText ? yes : no);
tidyOptSetBool(tdoc, TidyEscapeCdata, this->m_EscapeCdata ? yes : no);
tidyOptSetBool(tdoc, TidyFixBackslash, this->m_FixBackslash ? yes : no);
tidyOptSetBool(tdoc, TidyFixUri, this->m_FixUri ? yes : no);
tidyOptSetBool(tdoc, TidyHideComments, this->m_HideComments ? yes : no);
tidyOptSetBool(tdoc, TidyLogicalEmphasis, this->m_LogicalEmphasis ? yes : no);
tidyOptSetBool(tdoc, TidyLowerLiterals, this->m_LowerLiterals ? yes : no);
tidyOptSetBool(tdoc, TidyHtmlOut, this->m_OutputHtml ? yes : no);
tidyOptSetBool(tdoc, TidyXhtmlOut, this->m_OutputXhtml ? yes : no);
tidyOptSetBool(tdoc, TidyReplaceColor, this->m_ReplaceColor ? yes : no);
tidyOptSetBool(tdoc, ::TidyMark, this->m_TidyMark ? yes : no);
tidyOptSetBool(tdoc, TidyWord2000, this->m_Word2000 ? yes : no);
if (this->m_AltText && this->m_AltText->Length)
{
char utf8 __gc[] = Encoding::UTF8->GetBytes(this->m_AltText);
char __pin * value = &utf8[0];
tidyOptSetValue(tdoc, TidyAltText, (char*)value);
}
if (this->m_CssPrefix && this->m_CssPrefix->Length)
{
char utf8 __gc[] = Encoding::UTF8->GetBytes(this->m_CssPrefix);
char __pin * value = &utf8[0];
tidyOptSetValue(tdoc, TidyCSSPrefix, (char*)value);
}
if (this->m_Doctype && this->m_Doctype->Length)
{
char utf8 __gc[] = Encoding::UTF8->GetBytes(this->m_Doctype);
char __pin * value = &utf8[0];
tidyOptSetValue(tdoc, TidyDoctype, (char*)value);
}
#if 0
this->m_DoctypeMode
#endif
#if 0
this->m_Language
#endif
}
/// </member>
/// <member name = 'M:TidyDotNet.HtmlParser.ParseString()'>
/// <summary></summary>
/// <remarks></remarks>
TidyDotNet::TidyDocument * TidyDotNet::HtmlParser::ParseString(String * s)
{
// convert System::String argument to UTF-8
// encoded char by using a `pinned` pointer
char utf8 __gc[] = Encoding::UTF8->GetBytes(s);
char __pin * input = &utf8[0];
TidyBuffer errbuf = {0};
TidyDoc tDoc = tidyCreate();
// apply config options from instance to Tidy
this->SetupSimpleConfig(tDoc);
this->SetupConfig(tDoc);
// set input character encoding to UTF-8
tidySetCharEncoding(tDoc, "utf8");
// actually Tidy should discard error output, but...
tidySetErrorBuffer(tDoc, &errbuf);
tidyParseString(tDoc, (char*)input);
tidyCleanAndRepair(tDoc);
tidyRunDiagnostics(tDoc);
TidyDocument * doc = new TidyDocument(tDoc);
return doc;
}
/// </member>
// ------------------------------
/// <member name = 'M:TidyDotNet.TidyDocument.#ctor()'>
TidyDotNet::TidyDocument::TidyDocument(TidyDoc tDoc)
{
if (!tDoc)
throw new ArgumentNullException("tDoc");
this->m_tDoc = tDoc;
}
/// </member>
/// <member name = 'M:TidyDotNet.TidyDocument.Finalize'>
TidyDotNet::TidyDocument::~TidyDocument()
{
if (this->m_tDoc)
tidyRelease(this->m_tDoc);
}
/// </member>
/// <member name = 'M:TidyDotNet.TidyDocument.ToXmlDocument()'>
/// <summary></summary>
/// <remarks></remarks>
/// <returns></returns>
XmlDocument * TidyDotNet::TidyDocument::ToXmlDocument()
{
XmlDocument * doc = new XmlDocument();
TidyNode root = tidyGetRoot(this->m_tDoc);
this->ProcessTree(root, doc, null);
return doc;
}
/// </member>
/// <member name = 'M:TidyDotNet.TidyDocument.ProcessTree()'>
void TidyDotNet::TidyDocument::ProcessTree(TidyNode node, XmlDocument * doc, XmlNode * current)
{
TidyNode n = node;
if (!doc)
throw new ArgumentNullException();
if (!node)
return;
if (!current)
current = doc;
while (n)
{
int nodetype = tidyNodeGetType(n);
TidyNode firstchild = tidyGetChild(n);
if (TidyNode_Root == nodetype)
{
this->ProcessTree(firstchild, doc, current);
}
else if (TidyNode_Start == nodetype || TidyNode_StartEnd == nodetype)
{
// Get System::String element name from TidyNode
ctmbstr tname = tidyNodeGetName(n);
String * name = new String(tname, 0, (int)strlen(tname), Encoding::UTF8);
XmlNode * e = doc->CreateElement(name);
// Copy attributes to element
// TODO: deal with xmlns attributes (namespaces)
for (TidyAttr att = tidyAttrFirst(n); att; att = tidyAttrNext(att))
{
ctmbstr tattname = tidyAttrName(att);
ctmbstr tattval = tidyAttrValue(att);
String * attname = new String(tattname, 0, (int)strlen(tattname), Encoding::UTF8);
String * attval = new String(tattval, 0, (int)strlen(tattval), Encoding::UTF8);
XmlAttribute * a = doc->CreateAttribute(attname);
a->Value = attval;
e->Attributes->Append(a);
}
current->AppendChild(e);
this->ProcessTree(firstchild, doc, e);
}
else if (TidyNode_Comment == nodetype)
{
int len = tidyNodeGetLength(n) + 1;
char* buf = new char[len];
int res = tidyNodeGetValue(m_tDoc, n, buf, &len);
if (res < 1)
throw new InvalidOperationException();
// TODO: change len-3 when comment node storage in Tidy changes
String * value = new String(buf, 0, len-3, Encoding::UTF8);
XmlNode * comment = doc->CreateComment(value);
current->AppendChild(comment);
}
else if (TidyNode_ProcIns == nodetype)
{
int len = tidyNodeGetLength(n) + 1;
char* buf = new char[len];
int res = tidyNodeGetValue(m_tDoc, n, buf, &len);
if (res < 1)
throw new InvalidOperationException();
String * value = new String(buf, 0, len, Encoding::UTF8);
Debug::WriteLine(len.ToString());
Debug::WriteLine(value);
XmlNode * procins = doc->CreateProcessingInstruction("pi", value);
current->AppendChild(procins);
}
else if (TidyNode_Text == nodetype)
{
XmlNode * text = doc->CreateTextNode("Text nodes unsupported");
current->AppendChild(text);
}
else if (TidyNode_CDATA == nodetype)
{
XmlNode * cdata = doc->CreateCDataSection("CDATA sections unsupported");
current->AppendChild(cdata);
}
else if (TidyNode_DocType == nodetype)
{
// XmlNode * doctype = doc->CreateDocumentType("foo", "bar", "baz", String::Empty);
// current->AppendChild(doctype);
}
n = tidyGetNext(n);
}
}
/// </member>
/// </members>
/// </doc>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment