Skip to content

Instantly share code, notes, and snippets.

@pisceanfoot
Created May 20, 2014 01:19
Show Gist options
  • Save pisceanfoot/cedf0200155a66270391 to your computer and use it in GitHub Desktop.
Save pisceanfoot/cedf0200155a66270391 to your computer and use it in GitHub Desktop.
StringUtil html regex
public class StringUtil
{
#region 截取中文字符串
/// <summary>
/// 获取 gb2312 字符串长度
/// </summary>
/// <param name="content">字符串,兼容Null和DbNull.Value</param>
/// <param name="length">长度</param>
/// <returns></returns>
public static string GetGB2312StringLength(object obj, int length)
{
return GetStringLength(obj, length, "gb2312");
}
/// <summary>
/// 获取 gb2312 字符串长度
/// </summary>
/// <param name="content">字符串,兼容Null和DbNull.Value</param>
/// <param name="length">长度</param>
/// <returns></returns>
public static string GetGB2312StringLength(object obj, int length, string suffix)
{
return GetStringLength(obj, length, "gb2312", suffix);
}
/// <summary>
/// 获取字符串长度
/// </summary>
/// <param name="content">字符串,兼容Null和DbNull.Value</param>
/// <param name="length">长度</param>
/// <param name="encode">编码</param>
/// <returns></returns>
public static string GetStringLength(object obj, int length, string encode)
{
return GetStringLength(obj, length, encode, null);
}
/// <summary>
/// 获取字符串长度
/// </summary>
/// <param name="content">字符串,兼容Null和DbNull.Value</param>
/// <param name="length">长度</param>
/// <param name="encode">编码</param>
/// <returns></returns>
public static string GetStringLength(object obj, int length, string encode, string suffix)
{
if (obj == null || obj == DBNull.Value) return string.Empty;
string content = obj.ToString();
string tmp = null;
if (!string.IsNullOrEmpty(suffix))
tmp = content;
Encoding encoding = Encoding.GetEncoding(encode);
StringBuilder sb = new StringBuilder();
int totalLength = 0;
foreach (char contentChar in content)
{
int size = encoding.GetByteCount(new char[] { contentChar });
if (totalLength + size > length)
break;
sb.Append(contentChar);
totalLength += size;
}
string result = sb.ToString();
if (!string.IsNullOrEmpty(suffix))
{
if (tmp != result)
result += suffix;
}
return result;
}
#endregion
#region HTML
/// <summary>
/// 删除HTML标记
/// </summary>
/// <param name="html"></param>
/// <returns></returns>
public static string ConvertHTML2TextRegex(string html)
{
string[] aryRegex ={@"<%=[\w\W]*?%>", @"<script[\w\W]*?</script>", @"<style[\w\W]*?</style>", @"<[/]?[\w\W]*?>", @"([\r\n])[\s]+",
@"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);",
@"&#(\d+);", @"-->", @"<!--.*\n"};
string[] aryReplacment = { "", "", "", "", "", " ", "", "", "", "", "", "", "" };
for (int i = 0; i < aryRegex.Length; i++)
{
Regex regex = new Regex(aryRegex[i], RegexOptions.IgnoreCase);
html = regex.Replace(html, aryReplacment[i]);
}
//Replace "\r\n \t" to an empty character.
html.Replace("\r\n", "");
html.Replace("\t", "");
return html;
}
/// <summary>
/// 过滤HTML中的不安全标签包括js,style,meta 等
/// </summary>
/// <param name="content"></param>
/// <returns></returns>
public static string RemoveUnsafeHtml(string content)
{
content = Regex.Replace(content, @"(\<|\s+)o([a-z]+\s?=)", "$1$2", RegexOptions.IgnoreCase);
content = Regex.Replace(content, @"(script|frame|form|meta|behavior|style)([\s|:|>])+", "$1.$2", RegexOptions.IgnoreCase);
return content;
}
#endregion
#region MD5
/// <summary>
/// MD5函数
/// </summary>
/// <param name="str">原始字符串</param>
/// <returns>MD5结果</returns>
public static string MD5(string str)
{
byte[] b = Encoding.UTF8.GetBytes(str);
b = new MD5CryptoServiceProvider().ComputeHash(b);
return StringConvert.ByteArrayToHexString(b);
}
#endregion
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment