Skip to content

Instantly share code, notes, and snippets.

@UweKeim
Created September 25, 2017 13:35
Show Gist options
  • Save UweKeim/2187791205263ea86fb472b216548cf0 to your computer and use it in GitHub Desktop.
Save UweKeim/2187791205263ea86fb472b216548cf0 to your computer and use it in GitHub Desktop.
Small, incomplete "port" of WordPress "wptexturize" function to .NET/C#
namespace ZetaProducer.RuntimeBusinessLogic.Rendering.Helper
{
using AngleSharp.Dom;
using AngleSharp.Parser.Html;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
public static class Texturizer
{
/// <summary>
/// Angelehnt an die WordPress-Funktion "wptexturize".
/// https://developer.wordpress.org/reference/functions/wptexturize
/// https://codex.wordpress.org/Function_Reference/wptexturize
/// Ersetzt bestimmte Zeichen.
///
/// Ist aber noch lange nicht so umfangreich wie die WordPress-Funktion.
///
/// Quelltext zu "wptexturize":
/// https://github.com/WordPress/WordPress/blob/master/wp-includes/formatting.php#L51
/// </summary>
public static string Texturize(string text)
{
if (string.IsNullOrWhiteSpace(text)) return text;
var parser = new HtmlParser();
var document = parser.Parse(text);
var it = document.CreateNodeIterator(document);
INode node;
var didFindBody = false;
while ((node = it.Next()) != null)
{
if ((didFindBody || node.NodeName.ToLowerInvariant() == @"body") && !NoTexturizeTags.Contains(node.NodeName.ToLower()))
{
didFindBody = true;
var before = node.NodeValue; // TODO: Ist 'NodeValue' die korrekte Property?
if (!string.IsNullOrEmpty(before))
{
var after = processOneNode(before);
if (after != before) node.NodeValue = after; // TODO: Ist 'NodeValue' die korrekte Property?
}
}
}
return document.DocumentElement.GetElementsByTagName(@"body").First().InnerHtml;
}
private static string processOneNode(string content)
{
var result = content;
result = Regex.Replace(result, @"(^|\s+)-(\s+|$)", $@"$1{EmDash}$2", RegexOptions.Singleline);
result = Regex.Replace(result, @"(^|\b)\.\.\.(\b|$)", $@"$1{Ellipsis}$2", RegexOptions.Singleline);
result = Regex.Replace(result, @"(^|\s+)""(\w+)", $@"$1{OpeningDoubleQuote}$2", RegexOptions.Singleline);
result = Regex.Replace(result, @"(\w+)""(\s+|$|[!.,;])", $@"$1{ClosingDoubleQuote}$2", RegexOptions.Singleline);
result = Regex.Replace(result, @"(^|\s+)'(\w+)", $@"$1{OpeningSingleQuote}$2", RegexOptions.Singleline);
result = Regex.Replace(result, @"(\w+)'(\s+|$)", $@"$1{ClosingSingleQuote}$2", RegexOptions.Singleline);
// TODO: Noch mehr (alle?) Regeln von WordPress übernehmen.
// https://github.com/AngleSharp/AngleSharp/issues/361#issuecomment-230155588
var c = WebUtility.HtmlDecode(result);
return c;
}
private static readonly string[] NoTexturizeTags =
{
@"pre",
@"code",
@"kbd",
@"style",
@"script",
@"tt"
};
private const string OpeningDoubleQuote = @"&bdquo;";
private const string ClosingDoubleQuote = @"&ldquo;";
private const string Apostrophe = @"&apos;";
private const string OpeningSingleQuote = @"&sbquo;";
private const string ClosingSingleQuote = @"&lsquo;";
private const string EmDash = @"&mdash;";
private const string EnDash = @"&ndash;";
private const string Ellipsis = @"&hellip;";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment