我正在做一个投标管理系统
比如:下面是一个招标网
http://www.szft.gov.cn/zfbm/zfcg/zhbgg/index.jsp
点击进去是详细内容现在我要抓取里面有用的招标信息,要如何抓取呀。
请各位网友提供一个思路呀。谢了。

解决方案 »

  1.   

    帮忙http://community.csdn.net/Expert/topic/4999/4999993.xml?temp=.1140558
      

  2.   

    用传参数的方法,在接收的页面里用string id = Request["ID"]来接收
      

  3.   

    private string GrabContent( string strProperty, string str, string strReg )
            {
                Regex reg = new Regex( strReg, RegexOptions.IgnoreCase);
                Match m = reg.Match( str );
                return m.Groups[strProperty].Value;
                /*
                MatchCollection mc = reg.Matches(str);
                foreach(Match m in mc )
                {
                    string name = m.Groups[strProperty].Value;
                }
                return string.Empty;
                */
            }        private string GrabUrl( string url )
            {
                WebClient wc = new WebClient();
                Stream s = null;
                try
                {              
                    s = wc.OpenRead(url);
                    StreamReader sr = new StreamReader( s, System.Text.Encoding.Default);
                    return sr.ReadToEnd();
                }
                catch
                {
                    return string.Empty;
                }
                finally
                {
                    if( s != null )
                        s.Close();
                    wc.Dispose();
                }           
            }        const string categoryUrl = "http://www.lijiabaobei.com/category.asp?cid={0}";
            const string subcategoryUrl = "http://www.lijiabaobei.com/scategory.asp?cid={0}";
            const string subcategoryPageUrl = "http://www.lijiabaobei.com/scategory.asp?page={0}&cid={1}";
            const string productUrl = "http://www.lijiabaobei.com/product.asp?cid={0}";        const string subcategoryReg = "<td valign=\"bottom\" bgcolor=\"E7E7E7\" style=\"padding-top:4px;padding-left:8px\">·<a href=\"scategory.asp\\?cid=(?<subcategoryid>.*?)\">(?<name>.*?)</a></td>";        const string subcategoryPage = "第<strong><font color=red>1</font>/(?<pagecount>.*?)</strong>页";
            const string productLink = "<td width=\"293\" valign=\"bottom\" class=\"large\" style=\"padding-top:12px\"><a class=\"nav_sp_title_l\" href=\"product.asp\\?cid=(?<id>.*?)\">(?<name>.*?)</a></td>";
            
            private void Button1_Click(object sender, System.EventArgs e)
            {
                /*
                using (SqlDataReader dr = lsy.GetAllCategory())
                {
                    while(dr.Read())
                    {
                        Regex reg = new Regex( subcategoryReg, RegexOptions.IgnoreCase );
                        string html = GrabUrl( string.Format( categoryUrl, dr["categoryid"] ) );
                        MatchCollection mc = reg.Matches( html );                    for( int i = 0; i < mc.Count; i++ )
                        {
                            Match m = mc[i];                        lsy.AddSubCategory( Int32.Parse(dr["categoryid"].ToString()),
                                Int32.Parse(m.Groups["subcategoryid"].Value.Trim()),
                                m.Groups["name"].Value.Trim()
                                );
                        }
                    }
                }
                */            using ( SqlDataReader dr = lsy.GetAllSubCategory())
                {
                    while(dr.Read())
                    {                   
                        int subcategoryid = Int32.Parse(dr["subcategoryid"].ToString() );
                        string html = GrabUrl( string.Format( subcategoryUrl, subcategoryid ) );                    Regex regPage = new Regex(subcategoryPage, RegexOptions.IgnoreCase );
                        Match mp = regPage.Match(html);                    int pageCount = Convertor.ToInt32(mp.Groups["pagecount"].Value.Trim());                    /*                    ParseOnePage( html, subcategoryid );                    for( int i = 1; i < pageCount; i++ )
                        {
                            ParseOnePage( GrabUrl( string.Format( subcategoryPageUrl, i, subcategoryid ) ), subcategoryid);
                        }
                        */                    for( int i = 2; i <= pageCount; i++ )
                            ParseOnePage( GrabUrl( string.Format( subcategoryPageUrl, i, subcategoryid ) ), subcategoryid);                }
                }
            }        private void ParseOnePage(string html, int subcategoryid )
            {
                //获取子项
                Regex reg = new Regex( productLink, RegexOptions.IgnoreCase );
                MatchCollection mc = reg.Matches( html );            for( int i = 0; i < mc.Count; i++ )
                {
                    Match m = mc[i];                int id = Int32.Parse(m.Groups["id"].Value.Trim());                AddProduct(id, subcategoryid);
                }
            }        const string regProducingArea = "<br>\r\n      ·产地/(?<producingarea>.*?) <br></td>\r\n";
            const string regAge = "<span class=\"style11\">适用月(年)龄:</span><font color class=\"color_sh_title\">(?<age>.*?)</font>";
            const string regIntroduction = "<font color class=\"color_sh_text\">★(?<introduction>.*?)</font></td>\r\n";
            const string regRe = "<td style=\"padding:10px 5px 10px 13px\">(?<re>.*?)\r\n</td>\r\n\t\t\t\t\t\t</tr>\r\n\t  </table>";
            const string regRow = "<td>(?<number>.*?)</td>\r\n\t\t\t\t\t\t\t\t<td>(?<name>.*?)</td>\r\n\t\t\t\t\t\t\t\t<td>(?<spec>.*?)</td>\r\n\t\t\t\t\t\t\t\t<td>¥(?<etprice>.*?)</td>\r\n\t\t\t\t\t\t\t\t<td><font color class=\"color1\">¥(?<vipprice>.*?)</font></td>";
            const string regImage = "<img src=\"(?<image>.*?)\" width=\"200\" height=\"200\" border=\"0\" class=\"box_juti\"></a><br>\r\n";
            const string regSpec = "<td colspan=\"2\" valign=\"top\" style=\"padding-top:10px;padding-bottom:20px\">\r\n                              ·规格/(?<spec>.*?)\r\n";        private void AddProduct( int id, int subcategoryid )
            {
                string htmlProduct = GrabUrl( string.Format( productUrl, id ) );            string name;
                string spec;
                string number;
                decimal etprice;
                decimal vipprice;            GetRowsProperty( htmlProduct, out number, out name, out spec, out etprice, out vipprice );            string producingarea = GetProperty(htmlProduct, regProducingArea, "producingarea");
                string age = GetProperty(htmlProduct, regAge, "age");
                string introduction = GetProperty(htmlProduct, regIntroduction, "introduction");
                string re = GetProperty(htmlProduct, regRe, "re");
                string image = GetProperty(htmlProduct, regImage, "image");
                
                spec = GetProperty(htmlProduct, regSpec, "spec" );            lsy.AddProduct(id, subcategoryid,
                    name,
                    number,
                    spec,
                    producingarea,
                    age,
                    introduction,
                    re,
                    image,
                    etprice,
                    vipprice);
            }