private void button1_Click(object sender, EventArgs e)
        {
            label1.Text = DateTime.Now.ToString();
            string strURL = "http://ftp2.etedi.com/cd_qry/qry_vessel_code.aspx";
            //存放抓取的数据
            DataTable dtbl = new DataTable();
            dtbl.Columns.Add("Code");
            dtbl.Columns.Add("ENG_Name");
            dtbl.Columns.Add("IN_Voyage");
            dtbl.Columns.Add("OUT_Voyage");            //开始抓取数据
            //获得指定页面的内容   
            WebRequest hwr = WebRequest.Create(strURL);
            HttpWebResponse hwp = hwr.GetResponse() as HttpWebResponse;
            StreamReader sr;
            string code = hwp.ContentType;
            //得到编码了
            //如果取不到则默认为gb2312
            try
            {
                code = code.Split('=')[1];
            }
            catch
            {
                code = "gb2312";//utf-8
                //code = "utf-8";
            }
            Stream rep = hwp.GetResponseStream();
            sr = new StreamReader(rep, Encoding.GetEncoding(code));
            string strSource = sr.ReadToEnd();            //匹配出表格内容
            Regex rx = new Regex("<table cellspacing=\"0\" rules=\"all\" bordercolor=\"Black\" border=\"1\" id=\"DataGrid1\" style=\"border-color:Black;width:502px;border-collapse:collapse;\" >" + @"([\S\s]*?)" + "</table>"
                                , RegexOptions.Compiled | RegexOptions.IgnoreCase);            MatchCollection matchs = rx.Matches(strSource);
            if (matchs.Count > 0)
            {
                strSource = matchs[0].Value;//@all</td><td>(.*)@all</td>
                string pattern = "<tr.*?>@all(.*)@all(.*)@all(.*)@all(.*)@all</tr>";
                pattern = pattern.Replace("@all", @"[\S\s]*?");
                rx = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                //将匹配出的数据放入DataTable
                DataRow drow;
                matchs = rx.Matches(strSource);
                for (int i = 0; i < matchs.Count; i++)
                {
                    drow = dtbl.NewRow();
                    drow["Code"] = matchs[i].Groups[1].Value;
                    drow["ENG_Name"] = matchs[i].Groups[2].Value;
                    drow["IN_Voyage"] = matchs[i].Groups[3].Value;
                    drow["OUT_Voyage"] = matchs[i].Groups[4].Value;
                    dtbl.Rows.Add(drow);
                }
            }
            //绑定显示抓取的数据
            dataGridView1.DataSource = dtbl;
            //GridView1.DataBind();
            //可以在文本框中显示抓取的表格内容
            //TextBox1.Text = strSource;
            label2.Text = DateTime.Now.ToString();
        }读取不到数据

解决方案 »

  1.   

    我勒个去 这速度。。
    你等下吧 我先看下html构造 太慢了这。
      

  2.   

    受不了了 你把<table></table>中间的数据 留2-3行 
    然后把整个页面的HTML发出来  我给你改 
      

  3.   

    我也受不了了,太慢了
    查看下源代码,映入眼帘的就是好多行的viewstate
      

  4.   


    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
    <HTML>
    <HEAD>
    <title>船号对照表</title>
    <meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1">
    <meta name="CODE_LANGUAGE" Content="C#">
    <meta name="vs_defaultClientScript" content="JavaScript">
    <meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
    </HEAD>
    <body MS_POSITIONING="GridLayout" background="images\expl.gif">
    <form name="Form1" method="post" action="qry_vessel_code.aspx" id="Form1">
    <input type="hidden" name="__VIEWSTATE" value=" 4+Oz47Oz47Pj47Pj47Pj47Pj47Pj47Pu7DKEAuvwFgTU8mM3YUvPpMfNxh" />
     
    <FONT face="宋体">
    <TABLE id="Table1" style="Z-INDEX: 101; LEFT: 0px; POSITION: absolute; TOP: 0px" cellSpacing="0"
    cellPadding="0" width="100%" border="0">
    <TR>
    <TD height="100">
    <img id="Image1" src="images\text12.gif" alt="" border="0" style="height:100px;width:982px;" /></TD>
    </TR>
    <TR>
    <TD style="HEIGHT: 225px" align="center" vAlign="top">
    <P>&nbsp;</P>
    <table cellspacing="0" rules="all" bordercolor="Black" border="1" id="DataGrid1" style="border-color:Black;width:502px;border-collapse:collapse;">
    <tr align="Center" style="color:White;background-color:#A2C1EC;font-size:Smaller;font-weight:bold;">
    <td>船号</td><td>英文船名</td><td>进口航次</td><td>出口航次</td>
    </tr><tr align="Center" style="color:#003366;background-color:#DDDDFF;font-size:Smaller;">
    <td>?    </td><td>0                                  </td><td>        </td><td>        </td>
    </table></TD>
    </TR>
    </TABLE>
    </FONT>
    </form>
    </body>
    </HTML>
      

  5.   


    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
    <HTML>
    <HEAD>
    <title>船号对照表</title>
    <meta name="GENERATOR" Content="Microsoft Visual Studio .NET 7.1">
    <meta name="CODE_LANGUAGE" Content="C#">
    <meta name="vs_defaultClientScript" content="JavaScript">
    <meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
    </HEAD>
    <body MS_POSITIONING="GridLayout" background="images\expl.gif">
    <form name="Form1" method="post" action="qry_vessel_code.aspx" id="Form1">
    <input type="hidden" name="__VIEWSTATE" value=" 4+Oz47Oz47Pj47Pj47Pj47Pj47Pj47Pu7DKEAuvwFgTU8mM3YUvPpMfNxh" />
     
    <FONT face="宋体">
    <TABLE id="Table1" style="Z-INDEX: 101; LEFT: 0px; POSITION: absolute; TOP: 0px" cellSpacing="0"
    cellPadding="0" width="100%" border="0">
    <TR>
    <TD height="100">
    <img id="Image1" src="images\text12.gif" alt="" border="0" style="height:100px;width:982px;" /></TD>
    </TR>
    <TR>
    <TD style="HEIGHT: 225px" align="center" vAlign="top">
    <P>&nbsp;</P>
    <table cellspacing="0" rules="all" bordercolor="Black" border="1" id="DataGrid1" style="border-color:Black;width:502px;border-collapse:collapse;">
    <tr align="Center" style="color:White;background-color:#A2C1EC;font-size:Smaller;font-weight:bold;">
    <td>船号</td><td>英文船名</td><td>进口航次</td><td>出口航次</td>
    </tr><tr align="Center" style="color:#003366;background-color:#DDDDFF;font-size:Smaller;">
    <td>615050</td><td>105HYODONGCHEMI                    </td><td>1106    </td><td>1107    </td>
    </tr><tr align="Center" style="color:#003366;background-color:#CCDDEE;font-size:Smaller;">
    <td>615093</td><td>105HYODONGCHEMI                    </td><td>1110    </td><td>1111    </td>
    </tr>
    </table></TD>
    </TR>
    </TABLE>
    </FONT>
    </form>
    </body>
    </HTML>看这个吧   上面那个少点东西
      

  6.   

    <table cellspacing=\"0\" rules=\"all\" bordercolor=\"Black\" border=\"1\" id=\"DataGrid1\" style=\"border-color:Black;width:502px;border-collapse:collapse;\" >" + @"([\S\s]*?)" + "</table>
    =>Regex reg = new Regex(@"(?<=<table[^<>]+?DataGrid1[^<>]+>)[\s\S]+?(?=</table>)");
      

  7.   

    把dataset存在viewstate里了吧。正好拿来反序列化
      

  8.   

                Regex reg = new Regex(@"(?<=<table[^<>]+?DataGrid1[^<>]+>)[\s\S]+?(?=</table>)");
                string data = reg.Match(strURL).Value;
                Regex reg1 = new Regex(@"<tr[^<>]+>(?:\s*<td>(?<key>[^<>]+)</td>){4}\s*</tr>");
                MatchCollection mc = reg1.Matches(data);
                foreach (Match m in mc)
                {
                    Console.WriteLine(m.Groups["key"].Captures[0]);
                    Console.WriteLine(m.Groups["key"].Captures[1]);
                    Console.WriteLine(m.Groups["key"].Captures[2]);
                    Console.WriteLine(m.Groups["key"].Captures[3]);
                }