Created
May 20, 2014 01:19
-
-
Save pisceanfoot/cedf0200155a66270391 to your computer and use it in GitHub Desktop.
StringUtil html regex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class StringUtil | |
{ | |
#region 截取中文字符串 | |
/// <summary> | |
/// 获取 gb2312 字符串长度 | |
/// </summary> | |
/// <param name="content">字符串,兼容Null和DbNull.Value</param> | |
/// <param name="length">长度</param> | |
/// <returns></returns> | |
public static string GetGB2312StringLength(object obj, int length) | |
{ | |
return GetStringLength(obj, length, "gb2312"); | |
} | |
/// <summary> | |
/// 获取 gb2312 字符串长度 | |
/// </summary> | |
/// <param name="content">字符串,兼容Null和DbNull.Value</param> | |
/// <param name="length">长度</param> | |
/// <returns></returns> | |
public static string GetGB2312StringLength(object obj, int length, string suffix) | |
{ | |
return GetStringLength(obj, length, "gb2312", suffix); | |
} | |
/// <summary> | |
/// 获取字符串长度 | |
/// </summary> | |
/// <param name="content">字符串,兼容Null和DbNull.Value</param> | |
/// <param name="length">长度</param> | |
/// <param name="encode">编码</param> | |
/// <returns></returns> | |
public static string GetStringLength(object obj, int length, string encode) | |
{ | |
return GetStringLength(obj, length, encode, null); | |
} | |
/// <summary> | |
/// 获取字符串长度 | |
/// </summary> | |
/// <param name="content">字符串,兼容Null和DbNull.Value</param> | |
/// <param name="length">长度</param> | |
/// <param name="encode">编码</param> | |
/// <returns></returns> | |
public static string GetStringLength(object obj, int length, string encode, string suffix) | |
{ | |
if (obj == null || obj == DBNull.Value) return string.Empty; | |
string content = obj.ToString(); | |
string tmp = null; | |
if (!string.IsNullOrEmpty(suffix)) | |
tmp = content; | |
Encoding encoding = Encoding.GetEncoding(encode); | |
StringBuilder sb = new StringBuilder(); | |
int totalLength = 0; | |
foreach (char contentChar in content) | |
{ | |
int size = encoding.GetByteCount(new char[] { contentChar }); | |
if (totalLength + size > length) | |
break; | |
sb.Append(contentChar); | |
totalLength += size; | |
} | |
string result = sb.ToString(); | |
if (!string.IsNullOrEmpty(suffix)) | |
{ | |
if (tmp != result) | |
result += suffix; | |
} | |
return result; | |
} | |
#endregion | |
#region HTML | |
/// <summary> | |
/// 删除HTML标记 | |
/// </summary> | |
/// <param name="html"></param> | |
/// <returns></returns> | |
public static string ConvertHTML2TextRegex(string html) | |
{ | |
string[] aryRegex ={@"<%=[\w\W]*?%>", @"<script[\w\W]*?</script>", @"<style[\w\W]*?</style>", @"<[/]?[\w\W]*?>", @"([\r\n])[\s]+", | |
@"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", | |
@"&#(\d+);", @"-->", @"<!--.*\n"}; | |
string[] aryReplacment = { "", "", "", "", "", " ", "", "", "", "", "", "", "" }; | |
for (int i = 0; i < aryRegex.Length; i++) | |
{ | |
Regex regex = new Regex(aryRegex[i], RegexOptions.IgnoreCase); | |
html = regex.Replace(html, aryReplacment[i]); | |
} | |
//Replace "\r\n \t" to an empty character. | |
html.Replace("\r\n", ""); | |
html.Replace("\t", ""); | |
return html; | |
} | |
/// <summary> | |
/// 过滤HTML中的不安全标签包括js,style,meta 等 | |
/// </summary> | |
/// <param name="content"></param> | |
/// <returns></returns> | |
public static string RemoveUnsafeHtml(string content) | |
{ | |
content = Regex.Replace(content, @"(\<|\s+)o([a-z]+\s?=)", "$1$2", RegexOptions.IgnoreCase); | |
content = Regex.Replace(content, @"(script|frame|form|meta|behavior|style)([\s|:|>])+", "$1.$2", RegexOptions.IgnoreCase); | |
return content; | |
} | |
#endregion | |
#region MD5 | |
/// <summary> | |
/// MD5函数 | |
/// </summary> | |
/// <param name="str">原始字符串</param> | |
/// <returns>MD5结果</returns> | |
public static string MD5(string str) | |
{ | |
byte[] b = Encoding.UTF8.GetBytes(str); | |
b = new MD5CryptoServiceProvider().ComputeHash(b); | |
return StringConvert.ByteArrayToHexString(b); | |
} | |
#endregion | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment