Created
April 18, 2012 13:36
-
-
Save q42jaap/2413598 to your computer and use it in GitHub Desktop.
HtmlTruncator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This class truncates a string to a certain amount of chars, but truncates only on wordboundaries if possible. | |
It can handle html tags (which should be proper xml tags, so <br> will not work, should be <br/>, you can use SgmlReader if this would be a problem), and closes them correctly. | |
I personally think this is better than regex versions of this which I have seen. | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class HtmlTruncator | |
{ | |
public static string LimitOnWordBoundary(string str, int maxLength, string ellipses = "...") | |
{ | |
XmlDocument doc = new XmlDocument(); | |
XmlParserContext context = new XmlParserContext(doc.NameTable, new XmlNamespaceManager(doc.NameTable), null, XmlSpace.Preserve); | |
XmlTextReader reader = new XmlTextReader("<xml>" + str + "</xml>", XmlNodeType.Document, context); | |
bool shouldWriteEllipses; | |
using (var writer = doc.CreateNavigator().AppendChild()) | |
{ | |
LimitOnWordBoundary(writer, reader, maxLength, out shouldWriteEllipses); | |
writer.Flush(); | |
} | |
return doc.DocumentElement.InnerXml + (shouldWriteEllipses ? ellipses : ""); | |
} | |
public static void LimitOnWordBoundary(XmlWriter writer, XmlReader reader, int maxLength, out bool shouldWriteEllipses) | |
{ | |
if (reader == null) | |
{ | |
throw new ArgumentNullException("reader"); | |
} | |
if (writer == null) | |
{ | |
throw new ArgumentNullException("writer"); | |
} | |
int elementCount = 0; | |
int currentLength = 0; | |
shouldWriteEllipses = false; | |
int magicMinimumLength = Math.Min(5, (maxLength + 1) / 2); | |
int num = (reader.NodeType == XmlNodeType.None) ? -1 : reader.Depth; | |
do | |
{ | |
bool done = false; | |
switch (reader.NodeType) | |
{ | |
case XmlNodeType.Element: | |
elementCount++; | |
writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI); | |
writer.WriteAttributes(reader, false); | |
if (reader.IsEmptyElement) | |
{ | |
elementCount--; | |
writer.WriteEndElement(); | |
} | |
break; | |
case XmlNodeType.Text: | |
string value = reader.Value; | |
int strLen = value.Length; | |
if (currentLength + strLen > maxLength) | |
{ | |
string almost = value.Substring(0, maxLength - currentLength + 1); | |
int lastSpace = almost.LastIndexOf(' '); | |
if (lastSpace < 0) | |
{ | |
if (currentLength < magicMinimumLength) | |
{ | |
value = value.Substring(0, maxLength - currentLength); | |
} | |
else | |
{ | |
value = null; | |
} | |
} | |
else if (lastSpace + currentLength < magicMinimumLength) | |
{ | |
value = value.Substring(0, maxLength - currentLength); | |
} | |
else | |
{ | |
value = value.Substring(0, lastSpace); | |
} | |
shouldWriteEllipses = true; | |
done = true; | |
} | |
if (value != null) | |
{ | |
writer.WriteString(value); | |
currentLength += value.Length; | |
} | |
break; | |
case XmlNodeType.Whitespace: | |
case XmlNodeType.SignificantWhitespace: | |
writer.WriteString(reader.Value); | |
currentLength += reader.Value.Length; | |
break; | |
case XmlNodeType.EndElement: | |
elementCount--; | |
writer.WriteFullEndElement(); | |
break; | |
case XmlNodeType.CDATA: | |
//writer.WriteCData(reader.Value); | |
break; | |
case XmlNodeType.EntityReference: | |
writer.WriteEntityRef(reader.Name); | |
currentLength++; | |
break; | |
case XmlNodeType.ProcessingInstruction: | |
case XmlNodeType.XmlDeclaration: | |
//writer.WriteProcessingInstruction(reader.Name, reader.Value); | |
break; | |
case XmlNodeType.Comment: | |
//writer.WriteComment(reader.Value); | |
break; | |
case XmlNodeType.DocumentType: | |
//writer.WriteDocType(reader.Name, reader.GetAttribute("PUBLIC"), reader.GetAttribute("SYSTEM"), reader.Value); | |
break; | |
} | |
if (done) break; | |
} | |
while (reader.Read() && ((num < reader.Depth) || ((num == reader.Depth) && (reader.NodeType == XmlNodeType.EndElement)))); | |
while (elementCount > 0) | |
{ | |
writer.WriteFullEndElement(); | |
elementCount--; | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[TestFixture] // nunit | |
public class HtmlTruncatorTests | |
{ | |
[Test] | |
public void StringWithoutTagsShouldNotBreakIfShorter() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("aap noot mies", 100, ellipses: "..."); | |
Assert.AreEqual("aap noot mies", xml); | |
} | |
[Test] | |
public void StringWithTagsShouldNotBreakIfShorter() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("aap <span>noot</span> mies", 100, ellipses: "..."); | |
Assert.AreEqual("aap <span>noot</span> mies", xml); | |
} | |
[Test] | |
public void StringWithoutTagsCountsSpaces() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("aap noot mies", 13, ellipses: "..."); | |
Assert.AreEqual("aap noot mies", xml); | |
xml = HtmlTruncator.LimitOnWordBoundary("aap noot mies", 12, ellipses: "..."); | |
Assert.AreEqual("aap noot...", xml); | |
} | |
[Test] | |
public void StringWithSplitInsideTag() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("<span>aap noot mies</span>", 6, ellipses: "..."); | |
Assert.AreEqual("<span>aap</span>...", xml); | |
} | |
[Test] | |
public void StringWithNestedTags() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("<span><b>aap noot</b> mies</span>", 6, ellipses: "..."); | |
Assert.AreEqual("<span><b>aap</b></span>...", xml); | |
} | |
[Test] | |
public void StringWithTwoTagsSeperatedBySpaceCountsSpaces() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("<span>aap</span> <span>noot</span> mies", 13, ellipses: "..."); | |
Assert.AreEqual("<span>aap</span> <span>noot</span> mies", xml); | |
xml = HtmlTruncator.LimitOnWordBoundary("<span>aap</span> <span>noot</span> mies", 12, ellipses: "..."); | |
Assert.AreEqual("<span>aap</span> <span>noot</span>...", xml); | |
} | |
[Test] | |
public void StringWithTwoTagsSplitAfterTheFirst() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("<span>aap</span> noot mies <span>wim</span>", 11, ellipses: "..."); | |
Assert.AreEqual("<span>aap</span> noot...", xml); | |
} | |
[Test] | |
public void StringWithoutTagsShouldBreakCorrectly() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("aap noot mies", 6, ellipses: "..."); | |
Assert.AreEqual("aap...", xml); | |
} | |
[Test] | |
public void LongStringShouldJustTruncate() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("aapnootmies", 6, ellipses: "..."); | |
Assert.AreEqual("aapnoo...", xml); | |
} | |
[Test] | |
public void LongStringWithoutSpaceAfterTagWithEnoughBeforeShouldIgnore() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("<span>aap noot</span>aapnootmies", 9, ellipses: "..."); | |
Assert.AreEqual("<span>aap noot</span>...", xml); | |
} | |
[Test] | |
public void LongStringWithSpaceAfterTagWithEnoughBeforeShouldIgnore() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("<span>aap noot</span> aapnootmies", 9, ellipses: "..."); | |
Assert.AreEqual("<span>aap noot</span>...", xml); | |
} | |
[Test] | |
public void LongStringAfterTagWithJustOneCharBeforeShouldBreak() | |
{ | |
var xml = HtmlTruncator.LimitOnWordBoundary("<span>a</span> aapnootmies", 9, ellipses: "..."); | |
Assert.AreEqual("<span>a</span> aapnoot...", xml); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment