function aa( str,sReplace) aa=str.replcase("<"+sreplace+">",""); aa=aa.replcase("<"/+sreplace+">","");
/// <summary> /// 清除字符串中指定的HTML标签,以“|”分隔 。 /// </summary> /// <param name="codeString">HTML字符串。</param> /// <param name="tags">待清除的HTML标签名称,不包含尖括号。</param> /// <returns>清理后的字符串。</returns> static public string ClearTag(string htmlString, string tags) { if (tags == null || tags.Trim() == "") { return htmlString; } //例: 依据正则 <(DIV|/DIV)[^>]*?>,清理DIV标记 char[] strSeparators = new char[] { '|' }; string[] tagArray = tags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries); StringBuilder RegString = new StringBuilder(); int c = tagArray.Length; if (c > 0) { RegString.Append("<("); for (int i = 0; i < c; i++) { if (i > 0) { RegString.Append("|"); } RegString.Append(tagArray[i]); RegString.Append("|/"); RegString.Append(tagArray[i]); } RegString.Append(")[^>]*?>"); return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), ""); } else { return htmlString; } }
附送另一个比较有用的函数 /// <summary> /// 清除字符串中除指定标签外的其他所有HTML标签。 /// </summary> /// <param name="codeString">HTML字符串。</param> /// <param name="reserveTags">要保留的标签,以“|”分隔,不包含尖括号。</param> /// <returns>清理后的字符串。</returns> static public string ReserveTag(string htmlString, string reserveTags) { if (reserveTags == null || reserveTags.Trim() == "") { return ClearAllTag(htmlString); } //依据正则:<(?!DIV|/DIV)[^>]*?>,清理除DIV外的其他标记 char[] strSeparators = new char[] { '|' }; string[] tagArray = reserveTags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries); StringBuilder RegString = new StringBuilder(); int c = tagArray.Length; if (c > 0) { RegString.Append("<(?!"); for (int i = 0; i < c; i++) { if (i > 0) { RegString.Append("|"); } RegString.Append(tagArray[i]); RegString.Append("|/"); RegString.Append(tagArray[i]); } RegString.Append(")[^>]*?>"); return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), ""); } else { return htmlString; } }
public string wipescript(string html) { system.text.regularexpressions.regex regex1 = new system.text.regularexpressions.regex(@"<script[\s\s] </script *>",system.text.regularexpressions.regexoptions.ignorecase); system.text.regularexpressions.regex regex2 = new system.text.regularexpressions.regex(@" href *= *[\s\s]*script *:",system.text.regularexpressions.regexoptions.ignorecase); system.text.regularexpressions.regex regex3 = new system.text.regularexpressions.regex(@" on[\s\s]*=",system.text.regularexpressions.regexoptions.ignorecase); system.text.regularexpressions.regex regex4 = new system.text.regularexpressions.regex(@"<iframe[\s\s] </iframe *>",system.text.regularexpressions.regexoptions.ignorecase); system.text.regularexpressions.regex regex5 = new system.text.regularexpressions.regex(@"<frameset[\s\s] </frameset *>",system.text.regularexpressions.regexoptions.ignorecase); html = regex1.replace(html, ""); //过滤<script></script>标记 html = regex2.replace(html, ""); //过滤href=javascript: (<a>) 属性 html = regex3.replace(html, " _disibledevent="); //过滤其它控件的on...事件 html = regex4.replace(html, ""); //过滤iframe html = regex5.replace(html, ""); //过滤frameset return html; }
贴个经典的,stackoverflow的:private static Regex _tags = new Regex("<[^>]*(>|$)", RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.Compiled); private static Regex _whitelist = new Regex(@" ^</?(a|b(lockquote)?|code|em|h(1|2|3)|i|li|ol|p(re)?|s(ub|up|trong|trike)?|ul)>$ |^<(b|h)r\s?/?>$ |^<a[^>]+>$ |^<img[^>]+/?>$", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.Compiled);/// <summary> /// sanitize any potentially dangerous tags from the provided raw HTML input using /// a whitelist based approach, leaving the "safe" HTML tags /// </summary> public static string Sanitize(string html) { var tagname = ""; Match tag; var tags = _tags.Matches(html); // iterate through all HTML tags in the input for (int i = tags.Count-1; i > -1; i--) { tag = tags[i]; tagname = tag.Value.ToLower(); if (!_whitelist.IsMatch(tagname)) { // not on our whitelist? I SAY GOOD DAY TO YOU, SIR. GOOD DAY! html = html.Remove(tag.Index, tag.Length); } else if (tagname.StartsWith("<a")) { // detailed <a> tag checking if (!IsMatch(tagname, @"<a\s href=""(\#\d+|(https?|ftp)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+)"" (\stitle=""[^""]+"")?\s?>")) { html = html.Remove(tag.Index, tag.Length); } } else if (tagname.StartsWith("<img")) { // detailed <img> tag checking if (!IsMatch(tagname, @"<img\s src=""https?://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+"" (\swidth=""\d{1,3}"")? (\sheight=""\d{1,3}"")? (\salt=""[^""]*"")? (\stitle=""[^""]*"")? \s?/?>")) { html = html.Remove(tag.Index, tag.Length); } }
} return html; } /// <summary> /// Utility function to match a regex pattern: case, whitespace, and line insensitive /// </summary> private static bool IsMatch(string s, string pattern) { return Regex.IsMatch(s, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture); }
aa=str.replcase("<"+sreplace+">","");
aa=aa.replcase("<"/+sreplace+">","");
/// <summary>
/// 清除字符串中指定的HTML标签,以“|”分隔 。
/// </summary>
/// <param name="codeString">HTML字符串。</param>
/// <param name="tags">待清除的HTML标签名称,不包含尖括号。</param>
/// <returns>清理后的字符串。</returns>
static public string ClearTag(string htmlString, string tags)
{
if (tags == null || tags.Trim() == "")
{
return htmlString;
}
//例: 依据正则 <(DIV|/DIV)[^>]*?>,清理DIV标记
char[] strSeparators = new char[] { '|' };
string[] tagArray = tags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries);
StringBuilder RegString = new StringBuilder();
int c = tagArray.Length;
if (c > 0)
{
RegString.Append("<(");
for (int i = 0; i < c; i++)
{
if (i > 0)
{
RegString.Append("|");
}
RegString.Append(tagArray[i]);
RegString.Append("|/");
RegString.Append(tagArray[i]); }
RegString.Append(")[^>]*?>");
return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), "");
}
else
{
return htmlString;
}
}
附送另一个比较有用的函数
/// <summary>
/// 清除字符串中除指定标签外的其他所有HTML标签。
/// </summary>
/// <param name="codeString">HTML字符串。</param>
/// <param name="reserveTags">要保留的标签,以“|”分隔,不包含尖括号。</param>
/// <returns>清理后的字符串。</returns>
static public string ReserveTag(string htmlString, string reserveTags)
{
if (reserveTags == null || reserveTags.Trim() == "")
{
return ClearAllTag(htmlString);
} //依据正则:<(?!DIV|/DIV)[^>]*?>,清理除DIV外的其他标记
char[] strSeparators = new char[] { '|' };
string[] tagArray = reserveTags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries);
StringBuilder RegString = new StringBuilder();
int c = tagArray.Length;
if (c > 0)
{
RegString.Append("<(?!");
for (int i = 0; i < c; i++)
{
if (i > 0)
{
RegString.Append("|");
}
RegString.Append(tagArray[i]);
RegString.Append("|/");
RegString.Append(tagArray[i]); }
RegString.Append(")[^>]*?>");
return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), "");
}
else
{
return htmlString;
}
}
public string wipescript(string html)
{
system.text.regularexpressions.regex regex1 = new system.text.regularexpressions.regex(@"<script[\s\s] </script *>",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex2 = new system.text.regularexpressions.regex(@" href *= *[\s\s]*script *:",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex3 = new system.text.regularexpressions.regex(@" on[\s\s]*=",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex4 = new system.text.regularexpressions.regex(@"<iframe[\s\s] </iframe *>",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex5 = new system.text.regularexpressions.regex(@"<frameset[\s\s] </frameset *>",system.text.regularexpressions.regexoptions.ignorecase);
html = regex1.replace(html, ""); //过滤<script></script>标记
html = regex2.replace(html, ""); //过滤href=javascript: (<a>) 属性
html = regex3.replace(html, " _disibledevent="); //过滤其它控件的on...事件
html = regex4.replace(html, ""); //过滤iframe
html = regex5.replace(html, ""); //过滤frameset
return html;
}
private static Regex _whitelist = new Regex(@"
^</?(a|b(lockquote)?|code|em|h(1|2|3)|i|li|ol|p(re)?|s(ub|up|trong|trike)?|ul)>$
|^<(b|h)r\s?/?>$
|^<a[^>]+>$
|^<img[^>]+/?>$",
RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace |
RegexOptions.ExplicitCapture | RegexOptions.Compiled);/// <summary>
/// sanitize any potentially dangerous tags from the provided raw HTML input using
/// a whitelist based approach, leaving the "safe" HTML tags
/// </summary>
public static string Sanitize(string html)
{ var tagname = "";
Match tag;
var tags = _tags.Matches(html); // iterate through all HTML tags in the input
for (int i = tags.Count-1; i > -1; i--)
{
tag = tags[i];
tagname = tag.Value.ToLower(); if (!_whitelist.IsMatch(tagname))
{
// not on our whitelist? I SAY GOOD DAY TO YOU, SIR. GOOD DAY!
html = html.Remove(tag.Index, tag.Length);
}
else if (tagname.StartsWith("<a"))
{
// detailed <a> tag checking
if (!IsMatch(tagname,
@"<a\s
href=""(\#\d+|(https?|ftp)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+)""
(\stitle=""[^""]+"")?\s?>"))
{
html = html.Remove(tag.Index, tag.Length);
}
}
else if (tagname.StartsWith("<img"))
{
// detailed <img> tag checking
if (!IsMatch(tagname,
@"<img\s
src=""https?://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+""
(\swidth=""\d{1,3}"")?
(\sheight=""\d{1,3}"")?
(\salt=""[^""]*"")?
(\stitle=""[^""]*"")?
\s?/?>"))
{
html = html.Remove(tag.Index, tag.Length);
}
}
} return html;
}
/// <summary>
/// Utility function to match a regex pattern: case, whitespace, and line insensitive
/// </summary>
private static bool IsMatch(string s, string pattern)
{
return Regex.IsMatch(s, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase |
RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);
}
/// 过滤指定的HTML标签
/// </summary>
/// <param name="html">源字符串</param>
/// <param name="tags">待过滤标签</param>
/// <returns></returns>
private string HtmlFilter(string html, string[] tags)
{
StringBuilder sb = new StringBuilder("<(");
foreach (string s in tags)
{
sb.Append(s + "|");
}
string pattern = sb.ToString().TrimEnd('|') + ")(?<!<)\\b[^>]*>";
return System.Text.RegularExpressions.Regex.Replace(html, pattern, "", RegexOptions.IgnoreCase);
}
//调用
string[] tags = new string[] { "a", "/a", "img" };
richTextBox2.Text += HtmlFilter(yourStr, tags);