Skip to content

Instantly share code, notes, and snippets.

@KristofferK
Last active February 11, 2018 12:14
Show Gist options
  • Select an option

  • Save KristofferK/3fe6e110b6e29d49018458f2a1b2cb85 to your computer and use it in GitHub Desktop.

Select an option

Save KristofferK/3fe6e110b6e29d49018458f2a1b2cb85 to your computer and use it in GitHub Desktop.
Converts a html string to a text string.
namespace Utils
{
public static class Util
{
public static string StripTags(object o, bool keepLinebreaks = false)
{
if (o == null) return null;
string s = o.ToString();
if (keepLinebreaks)
{
s = Regex.Replace(s, "<[ /]*br[ /]*>", "\n", RegexOptions.IgnoreCase);
s = Regex.Replace(s, "<\\s*/\\s*p>", "\n", RegexOptions.IgnoreCase);
}
s = s.Replace("\t", "").Replace('\u0009'.ToString(), "");
s = s.Replace("\r\n", "\n");
s = s.Replace("\r", "");
s = s.Replace("[linebreak]", "\n");
s = s.Replace("orttelefon", "ORTTELE");
s = WebUtility.HtmlDecode(Regex.Replace(s, "<[^>]*(>|$)", string.Empty)).Trim(); // Fjerner tags
s = Regex.Replace(s, @"(https?://)[^\s]+", "[LINK FJERNET] ", RegexOptions.IgnoreCase);
s = Regex.Replace(s, @"([\w\.\-]+)@([\w\-]+)((\.(\w){2,3})+)", "email"); // Fjerner emails
s = Regex.Replace(s, @"(telefon|tlf|tlf)([\\.:]*)[0-9\s]+", "${2} telefon ", RegexOptions.IgnoreCase); // Fjerner telefon nummer
s = Regex.Replace(s, @"på [0-9]{4}\\s?[0-9]{4}[^0-9]", "på telefon", RegexOptions.IgnoreCase); // Fjerner telefon nummer
s = s.Replace("ORTTELE", "orttelefon");
s = Regex.Replace(s, "\n\\s+\n", "\n\n");
s = Regex.Replace(s, "\n{3,}", "\n\n"); // Fjerner ny-linje-spam
s = Regex.Replace(s, "\\.([a-zA-ZæøåÆØÅ])", ". ${1}"); // Laver ting som "Ord1.Ord2" om til "Ord1. Ord2".
s = Regex.Replace(s, " {2,}", ". ").Trim(); // Fjerner mellemrum-spam
s = Regex.Replace(s, "\\.{2,}", ".").Trim(); // Fjerner punktum-spam
s = Regex.Replace(s, "\\. [\\. ]+", ". ");
s = Regex.Replace(s, "\n\\.[ ]*", "\n"); // En linje kan sgu ikke starte med punktum..
s = Regex.Replace(s, "\n{3,}", "\n\n"); // Fjerner ny-linje-spam igen at muligvis have lavet nye linjer ovenfor
s = s.Replace(" . telefon .", " telefon.");
s = Regex.Replace(s, "\\[(/?)tag:([^\\]]+)\\]", "<${1}${2}>"); // Tillad tags. [tag:b]Hej[/tag:b] => <b>Hej</b>
return s;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment