求一函数，可将Html字符串，清除指定的标签

function aa( str,sReplace)
aa=str.replcase("<"+sreplace+">","");
aa=aa.replcase("<"/+sreplace+">","");

/// <summary>
        /// 清除字符串中指定的HTML标签，以“|”分隔。
        /// </summary>
        /// <param name="codeString">HTML字符串。</param>
        /// <param name="tags">待清除的HTML标签名称，不包含尖括号。</param>
        /// <returns>清理后的字符串。</returns>
        static public string ClearTag(string htmlString, string tags)
        {
            if (tags == null || tags.Trim() == "")
            {
                return htmlString;
            }
            //例: 依据正则 <(DIV|/DIV)[^>]*?>，清理DIV标记
            char[] strSeparators = new char[] { '|' };
            string[] tagArray = tags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries);
            StringBuilder RegString = new StringBuilder();
            int c = tagArray.Length;
            if (c > 0)
            {
                RegString.Append("<(");
                for (int i = 0; i < c; i++)
                {
                    if (i > 0)
                    {
                        RegString.Append("|");
                    }
                    RegString.Append(tagArray[i]);
                    RegString.Append("|/");
                    RegString.Append(tagArray[i]);                }
                RegString.Append(")[^>]*?>");
                return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), "");
            }
            else
            {
                return htmlString;
            }
        }

        附送另一个比较有用的函数
/// <summary>
        /// 清除字符串中除指定标签外的其他所有HTML标签。
        /// </summary>
        /// <param name="codeString">HTML字符串。</param>
        /// <param name="reserveTags">要保留的标签，以“|”分隔，不包含尖括号。</param>
        /// <returns>清理后的字符串。</returns>
        static public string ReserveTag(string htmlString, string reserveTags)
        {
            if (reserveTags == null || reserveTags.Trim() == "")
            {
                return ClearAllTag(htmlString);
            }            //依据正则：<(?!DIV|/DIV)[^>]*?>，清理除DIV外的其他标记
            char[] strSeparators = new char[] { '|' };
            string[] tagArray = reserveTags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries);
            StringBuilder RegString = new StringBuilder();
            int c = tagArray.Length;
            if (c > 0)
            {
                RegString.Append("<(?!");
                for (int i = 0; i < c; i++)
                {
                    if (i > 0)
                    {
                        RegString.Append("|");
                    }
                    RegString.Append(tagArray[i]);
                    RegString.Append("|/");
                    RegString.Append(tagArray[i]);                }
                RegString.Append(")[^>]*?>");
                return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), "");
            }
            else
            {
                return htmlString;
            }
        }

public string wipescript(string html)
{
system.text.regularexpressions.regex regex1 = new system.text.regularexpressions.regex(@"<script[\s\s] </script *>",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex2 = new system.text.regularexpressions.regex(@" href *= *[\s\s]*script *:",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex3 = new system.text.regularexpressions.regex(@" on[\s\s]*=",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex4 = new system.text.regularexpressions.regex(@"<iframe[\s\s] </iframe *>",system.text.regularexpressions.regexoptions.ignorecase);
system.text.regularexpressions.regex regex5 = new system.text.regularexpressions.regex(@"<frameset[\s\s] </frameset *>",system.text.regularexpressions.regexoptions.ignorecase);
html = regex1.replace(html, ""); //过滤<script></script>标记
html = regex2.replace(html, ""); //过滤href=javascript: (<a>) 属性
html = regex3.replace(html, " _disibledevent="); //过滤其它控件的on...事件
html = regex4.replace(html, ""); //过滤iframe
html = regex5.replace(html, ""); //过滤frameset
return html;
}

贴个经典的，stackoverflow的：private static Regex _tags = new Regex("<[^>]*(>|$)", RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.Compiled);
private static Regex _whitelist = new Regex(@"
    ^</?(a|b(lockquote)?|code|em|h(1|2|3)|i|li|ol|p(re)?|s(ub|up|trong|trike)?|ul)>$
    |^<(b|h)r\s?/?>$
    |^<a[^>]+>$
    |^<img[^>]+/?>$",
    RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace |
    RegexOptions.ExplicitCapture | RegexOptions.Compiled);/// <summary>
/// sanitize any potentially dangerous tags from the provided raw HTML input using
/// a whitelist based approach, leaving the "safe" HTML tags
/// </summary>
public static string Sanitize(string html)
{    var tagname = "";
    Match tag;
    var tags = _tags.Matches(html);    // iterate through all HTML tags in the input
    for (int i = tags.Count-1; i > -1; i--)
    {
        tag = tags[i];
        tagname = tag.Value.ToLower();        if (!_whitelist.IsMatch(tagname))
        {
            // not on our whitelist? I SAY GOOD DAY TO YOU, SIR. GOOD DAY!
            html = html.Remove(tag.Index, tag.Length);
        }
        else if (tagname.StartsWith("<a"))
        {
            // detailed <a> tag checking
            if (!IsMatch(tagname,
                @"<a\s
                  href=""(\#\d+|(https?|ftp)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+)""
                  (\stitle=""[^""]+"")?\s?>"))
            {
                html = html.Remove(tag.Index, tag.Length);
            }
        }
        else if (tagname.StartsWith("<img"))
        {
            // detailed <img> tag checking
            if (!IsMatch(tagname,
                @"<img\s
              src=""https?://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+""
              (\swidth=""\d{1,3}"")?
              (\sheight=""\d{1,3}"")?
              (\salt=""[^""]*"")?
              (\stitle=""[^""]*"")?
              \s?/?>"))
            {
                html = html.Remove(tag.Index, tag.Length);
            }
        }

    }    return html;
}
/// <summary>
/// Utility function to match a regex pattern: case, whitespace, and line insensitive
/// </summary>
private static bool IsMatch(string s, string pattern)
{
    return Regex.IsMatch(s, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase |
        RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);
}

/// <summary>
/// 过滤指定的HTML标签
/// </summary>
/// <param name="html">源字符串</param>
/// <param name="tags">待过滤标签</param>
/// <returns></returns>
private string HtmlFilter(string html, string[] tags)
{
    StringBuilder sb = new StringBuilder("<(");
    foreach (string s in tags)
    {
        sb.Append(s + "|");
    }
    string pattern = sb.ToString().TrimEnd('|') + ")(?<!<)\\b[^>]*>";
    return System.Text.RegularExpressions.Regex.Replace(html, pattern, "", RegexOptions.IgnoreCase);
}
//调用
string[] tags = new string[] { "a", "/a", "img" };
richTextBox2.Text += HtmlFilter(yourStr, tags);

调试易

求一函数，可将Html字符串，清除指定的标签

解决方案 »