标签可指定多个,返回清除后的字符串

解决方案 »

  1.   

    function aa( str,sReplace)
    aa=str.replcase("<"+sreplace+">","");
    aa=aa.replcase("<"/+sreplace+">","");
      

  2.   


     /// <summary>
            /// 清除字符串中指定的HTML标签,以“|”分隔 。
            /// </summary>
            /// <param name="codeString">HTML字符串。</param>
            /// <param name="tags">待清除的HTML标签名称,不包含尖括号。</param>
            /// <returns>清理后的字符串。</returns>
            static public string ClearTag(string htmlString, string tags)
            {
                if (tags == null || tags.Trim() == "")
                {
                    return htmlString;
                }
                //例: 依据正则 <(DIV|/DIV)[^>]*?>,清理DIV标记
                char[] strSeparators = new char[] { '|' };
                string[] tagArray = tags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries);
                StringBuilder RegString = new StringBuilder();
                int c = tagArray.Length;
                if (c > 0)
                {
                    RegString.Append("<(");
                    for (int i = 0; i < c; i++)
                    {
                        if (i > 0)
                        {
                            RegString.Append("|");
                        }
                        RegString.Append(tagArray[i]);
                        RegString.Append("|/");
                        RegString.Append(tagArray[i]);                }
                    RegString.Append(")[^>]*?>");
                    return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), "");
                }
                else
                {
                    return htmlString;
                }
            }
            
            附送另一个比较有用的函数
    /// <summary>
            /// 清除字符串中除指定标签外的其他所有HTML标签。
            /// </summary>
            /// <param name="codeString">HTML字符串。</param>
            /// <param name="reserveTags">要保留的标签,以“|”分隔,不包含尖括号。</param>
            /// <returns>清理后的字符串。</returns>
            static public string ReserveTag(string htmlString, string reserveTags)
            {
                if (reserveTags == null || reserveTags.Trim() == "")
                {
                    return ClearAllTag(htmlString);
                }            //依据正则:<(?!DIV|/DIV)[^>]*?>,清理除DIV外的其他标记
                char[] strSeparators = new char[] { '|' };
                string[] tagArray = reserveTags.Split(strSeparators, StringSplitOptions.RemoveEmptyEntries);
                StringBuilder RegString = new StringBuilder();
                int c = tagArray.Length;
                if (c > 0)
                {
                    RegString.Append("<(?!");
                    for (int i = 0; i < c; i++)
                    {
                        if (i > 0)
                        {
                            RegString.Append("|");
                        }
                        RegString.Append(tagArray[i]);
                        RegString.Append("|/");
                        RegString.Append(tagArray[i]);                }
                    RegString.Append(")[^>]*?>");
                    return System.Text.RegularExpressions.Regex.Replace(htmlString, RegString.ToString(), "");
                }
                else
                {
                    return htmlString;
                }
            }
      

  3.   


    public string wipescript(string html) 

    system.text.regularexpressions.regex regex1 = new system.text.regularexpressions.regex(@"<script[\s\s] </script *>",system.text.regularexpressions.regexoptions.ignorecase); 
    system.text.regularexpressions.regex regex2 = new system.text.regularexpressions.regex(@" href *= *[\s\s]*script *:",system.text.regularexpressions.regexoptions.ignorecase); 
    system.text.regularexpressions.regex regex3 = new system.text.regularexpressions.regex(@" on[\s\s]*=",system.text.regularexpressions.regexoptions.ignorecase); 
    system.text.regularexpressions.regex regex4 = new system.text.regularexpressions.regex(@"<iframe[\s\s] </iframe *>",system.text.regularexpressions.regexoptions.ignorecase); 
    system.text.regularexpressions.regex regex5 = new system.text.regularexpressions.regex(@"<frameset[\s\s] </frameset *>",system.text.regularexpressions.regexoptions.ignorecase); 
    html = regex1.replace(html, ""); //过滤<script></script>标记 
    html = regex2.replace(html, ""); //过滤href=javascript: (<a>) 属性 
    html = regex3.replace(html, " _disibledevent="); //过滤其它控件的on...事件 
    html = regex4.replace(html, ""); //过滤iframe 
    html = regex5.replace(html, ""); //过滤frameset 
    return html; 
      

  4.   

    贴个经典的,stackoverflow的:private static Regex _tags = new Regex("<[^>]*(>|$)", RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.Compiled);
    private static Regex _whitelist = new Regex(@"
        ^</?(a|b(lockquote)?|code|em|h(1|2|3)|i|li|ol|p(re)?|s(ub|up|trong|trike)?|ul)>$
        |^<(b|h)r\s?/?>$
        |^<a[^>]+>$
        |^<img[^>]+/?>$",
        RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace |
        RegexOptions.ExplicitCapture | RegexOptions.Compiled);/// <summary>
    /// sanitize any potentially dangerous tags from the provided raw HTML input using 
    /// a whitelist based approach, leaving the "safe" HTML tags
    /// </summary>
    public static string Sanitize(string html)
    {    var tagname = "";
        Match tag;
        var tags = _tags.Matches(html);    // iterate through all HTML tags in the input
        for (int i = tags.Count-1; i > -1; i--)
        {
            tag = tags[i];
            tagname = tag.Value.ToLower();        if (!_whitelist.IsMatch(tagname))
            {
                // not on our whitelist? I SAY GOOD DAY TO YOU, SIR. GOOD DAY!
                html = html.Remove(tag.Index, tag.Length);
            }
            else if (tagname.StartsWith("<a"))
            {
                // detailed <a> tag checking
                if (!IsMatch(tagname,
                    @"<a\s
                      href=""(\#\d+|(https?|ftp)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+)""
                      (\stitle=""[^""]+"")?\s?>"))
                {
                    html = html.Remove(tag.Index, tag.Length);
                }
            }
            else if (tagname.StartsWith("<img"))
            {
                // detailed <img> tag checking
                if (!IsMatch(tagname,
                    @"<img\s
                  src=""https?://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+""
                  (\swidth=""\d{1,3}"")?
                  (\sheight=""\d{1,3}"")?
                  (\salt=""[^""]*"")?
                  (\stitle=""[^""]*"")?
                  \s?/?>"))
                {
                    html = html.Remove(tag.Index, tag.Length);
                }
            }
            
        }    return html;
    }
    /// <summary>
    /// Utility function to match a regex pattern: case, whitespace, and line insensitive
    /// </summary>
    private static bool IsMatch(string s, string pattern)
    {
        return Regex.IsMatch(s, pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase |
            RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);
    }
      

  5.   

    /// <summary>
    /// 过滤指定的HTML标签
    /// </summary>
    /// <param name="html">源字符串</param>
    /// <param name="tags">待过滤标签</param>
    /// <returns></returns>
    private string HtmlFilter(string html, string[] tags)
    {
        StringBuilder sb = new StringBuilder("<(");
        foreach (string s in tags)
        {
            sb.Append(s + "|");
        }
        string pattern = sb.ToString().TrimEnd('|') + ")(?<!<)\\b[^>]*>";
        return System.Text.RegularExpressions.Regex.Replace(html, pattern, "", RegexOptions.IgnoreCase);
    }
    //调用
    string[] tags = new string[] { "a", "/a", "img" };
    richTextBox2.Text += HtmlFilter(yourStr, tags);