就是说 我获取到了一段html网页上面的值,
但是有的值包含了如下代码(html注释),我如何来过滤掉这段注释的代码呢?
求具体方法,或者正则表达式应该怎么写?  
求解,谢谢各位···············内容标题
<!--
 /* Font Definitions */
 @font-face{font-family:宋体;panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face{font-family:新宋体;panose-1:2 1 6 9 3 1 1 1 1 1;}
@font-face{font-family:仿宋_GB2312;panose-1:2 1 6 9 3 1 1 1 1 1;}
@font-face{font-family:"\@宋体";panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face{font-family:"\@仿宋_GB2312";panose-1:2 1 6 9 3 1 1 1 1 1;}
@font-face{font-family:"\@新宋体";panose-1:2 1 6 9 3 1 1 1 1 1;}
 /* Style Definitions */
 p.MsoNormal, li.MsoNormal, div.MsoNormal{margin:0cm;margin-bottom:.0001pt;text-align:justify;text-justify:inter-ideograph;font-size:16.0pt;font-family:仿宋_GB2312;color:#003366;}
p.MsoHeader, li.MsoHeader, div.MsoHeader{margin:0cm;margin-bottom:.0001pt;text-align:center;layout-grid-mode:char;border:none;padding:0cm;font-size:9.0pt;font-family:仿宋_GB2312;color:#003366;}
p.MsoFooter, li.MsoFooter, div.MsoFooter{margin:0cm;margin-bottom:.0001pt;layout-grid-mode:char;font-size:9.0pt;font-family:仿宋_GB2312;color:#003366;}
p.MsoBodyTextIndent, li.MsoBodyTextIndent, div.MsoBodyTextIndent{margin:0cm;margin-bottom:.0001pt;text-align:justify;text-justify:inter-ideograph;text-indent:52.5pt;line-height:25.0pt;font-size:16.0pt;font-family:仿宋_GB2312;color:#003366;}
p.MsoBodyTextIndent2, li.MsoBodyTextIndent2, div.MsoBodyTextIndent2{margin:0cm;margin-bottom:.0001pt;text-align:justify;text-justify:inter-ideograph;text-indent:31.35pt;font-size:16.0pt;font-family:仿宋_GB2312;color:#003366;}
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate{margin:0cm;margin-bottom:.0001pt;text-align:justify;text-justify:inter-ideograph;font-size:9.0pt;font-family:仿宋_GB2312;color:#003366;}
p.Char, li.Char, div.Char{margin:0cm;margin-bottom:.0001pt;text-align:justify;text-justify:inter-ideograph;font-size:12.0pt;font-family:"Times New Roman";}
 /* Page Definitions */
 @page Section1{size:595.3pt 841.9pt;margin:54.55pt 62.35pt 62.3pt 79.4pt;layout-grid:15.6pt;}
div.Section1{page:Section1;}
 /* List Definitions */
 ol{margin-bottom:0cm;}
ul{margin-bottom:0cm;}
-->具体内容································
就是内容中突然多出了这么一段注释,如何过滤掉呢?

解决方案 »

  1.   

    我记得可以按照table,div等之类的标签来区别的
      

  2.   

    http://adyhpq.blog.163.com/blog/static/3866700201106101017955/
    参考一下
      

  3.   


    我是读取一个html  获取有用的数据,也就是采集,已经过滤完了 数据去掉了,但是取出来的数据多余了这么段html注释的代码,现在不知道怎么再过滤这个············
      

  4.   

    准确的说这不是注释,这是为了解决浏览器兼容问题,专门在CSS 或者js里这样写避免浏览器把js和CSS当成字符串打印出来程序不需要他的话,看看正则的定义(30分钟) 几下就搞定了,
    正则不难的,网上找源代码不如自己学一下,以后还能用
                var html = @"<html><head>
                        <!--
                         /* Font Definitions */
                         @font-face{font-family:宋体;panose-1:2 1 6 0 3 1 1 1 1 1;}
                        @font-face{font-family:新宋体;panose-1:2 1 6 9 3 1 1 1 1 1;}
                        @font-face{font-family:仿宋_GB2312;panose-1:2 1 6 9 3 1 1 1 1 1;}
                        --></head></html>";
                Regex regex = new Regex("<!--[^<]+-->");
                html = regex.Replace(html, "");
                this.textBox1.Text = html;
      

  5.   

      //html过滤
            public static string setPinglunContent(string strContent)
            {
                string theString = strContent.Replace("<", "[").Replace(">", "]");
                char enter = (char)13;
                char enter2 = (char)10;
                theString = theString.Replace(enter.ToString(), "<br>").Replace(enter2.ToString(), "");
                theString = theString.Replace(" ", "&nbsp;").Replace(" ", "&nbsp;&nbsp;");
                return theString;
            }
    类似的这种
      

  6.   

    帮你写了一个  using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Text.RegularExpressions;namespace test
    {
        public partial class re : System.Web.UI.Page
        {
            protected void Page_Load(object sender, EventArgs e)
            {
                string html = @"111111<!--过滤过滤过滤-->222222222";
                Regex re = new Regex("<!--.*-->");
                html = re.Replace(html, "");
                Label1.Text = html;        }
        }
    }