using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Web;
using System.Text;
using HtmlAgilityPack;
using Conn;namespace DataApp
{
    public partial class MainForm : Form
    {
        string url = @"http://117.159.3.6:9035/QueryWeb/";
        int count = 0;
        DataTable dt1 = new DataTable("t1");//企业信息
        DataTable dt2 = new DataTable("t2");//企业资质                
        public MainForm()
        {
            InitializeComponent();
        }
        private void MainForm_Load(object sender, EventArgs e)
        {
            ////企业信息
            //dt1.Columns.Add("企业名称", Type.GetType("System.String"));
            //dt1.Columns.Add("统一信用代码", Type.GetType("System.String"));
            //dt1.Columns.Add("注册地址", Type.GetType("System.String"));
            //dt1.Columns.Add("企业类型", Type.GetType("System.String"));
            //dt1.Columns.Add("注册日期", Type.GetType("System.String"));
            //dt1.Columns.Add("营业地址", Type.GetType("System.String"));
            //dt1.Columns.Add("营业地址邮编", Type.GetType("System.String"));
            //dt1.Columns.Add("法定代表人", Type.GetType("System.String"));
            //dt1.Columns.Add("官网", Type.GetType("System.String"));
            ////资质信息
            //dt2.Columns.Add("企业名称", Type.GetType("System.String"));
            //dt2.Columns.Add("资质类型", Type.GetType("System.String"));
            //dt2.Columns.Add("资质证书编号", Type.GetType("System.String"));
            //dt2.Columns.Add("发证机关", Type.GetType("System.String"));
            //dt2.Columns.Add("发证日期", Type.GetType("System.String"));
            //dt2.Columns.Add("有效期至", Type.GetType("System.String"));
            //dt2.Columns.Add("资质范围", Type.GetType("System.String"));
        }
       
        //开始采集
        private void BtnCai_Click(object sender, EventArgs e)
        {
            string jzurl = "query11.aspx?type=&typeNum=7&Province=1";//建筑企业
            //string sjurl = "query11.aspx?type=工程设计&typeNum=2&Province=1";//设计企业
            //string wsurl = "query41.aspx?Province=2";//外省企业
            webBrowser1.Navigate(url + jzurl);//加载url                   
            webBrowser1.Navigated += new WebBrowserNavigatedEventHandler(Web_Navigated);
            webBrowser1.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(Web_DocumentCompleted); //装载WebBrowser.DocumentCompleted事件;
        }
        private void Web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            count = count - 1;
            if (0 == count)
            {
                GetInfoByDOM(webBrowser1);
                System.Windows.Forms.HtmlDocument htdoc = webBrowser1.Document;
                HtmlElement htmlcounts = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_Label4");
                HtmlElement htmlpages = htdoc.GetElementById("lblPageCount");
                HtmlElement htmlpagesindex = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_lblPageIndex");
                HtmlElement btnclicktag = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_btnNext");
                lblCounts.Text = htmlcounts.InnerText;//总数据数 
                LblPages.Text = htmlpages.InnerText;//总页数 
                lblPageIndex.Text = htmlpagesindex.InnerText;//第几页
                btnclicktag.InvokeMember("click");//执行下一页点击事件            
            }
        }
        private void Web_Navigated(object sender, WebBrowserNavigatedEventArgs e)
        {
            count++;
        }
        /// <summary>
        /// 企业基本信息采集
        /// </summary>
        /// <param name="par"></param>
        public void GetBasicInfo(string par)
        {
            var html = url + "CorpDetails.aspx?" + par;
            var web = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(html);
            DataRow dr = dt1.NewRow();
            dr["企业名称"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label10").InnerText;
            dr["统一信用代码"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label3").InnerText;
            dr["注册地址"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label1").InnerText;
            dr["企业类型"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label2").InnerText;
            dr["注册日期"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label4").InnerText;
            dr["营业地址"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label6").InnerText;
            dr["营业地址邮编"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label7").InnerText;
            dr["法定代表人"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label8").InnerText;
            dr["官网"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label13").InnerText;
            dt1.Rows.Add(dr);
            GDV.DataSource = dt1;
        }
        //测试数据库连接
        private void BtnData_Click(object sender, EventArgs e)
        {
            string sql = "SELECT * FROM ims_hulu_info_shop";
            DataSet ds = DbHelperMySQL.Query(sql);
            DataTable dt = ds.Tables[0];
            GDV.DataSource = dt;
        }        //采集企业基本信息数据入库dt1
        private void GetInfoByDOM(WebBrowser WebBro)
        {
            var Doc = new HtmlAgilityPack.HtmlDocument();
            Doc.LoadHtml(WebBro.DocumentText);
            var res = Doc.GetElementbyId("ctl00_ContentPlaceHolder1_GridView1");//表格                        
            if (res != null)
            {
                var trs = res.SelectNodes(@"tr");//获取所有行
                trs.RemoveAt(0);//移除第一行,是表头
                for (int r = 0; r < trs.Count - 1; r++)
                {
                    HtmlNodeCollection tds = trs[r].SelectNodes(@"td");//td
                    if (tds != null)
                    {
                        for (int d = 0; d < tds.Count; d++)
                        {
                            if (d == 1)
                            {
                                //GetBasicInfo(GetHtmlAHref(tds[1].InnerHtml));//基本信息
                                GetCertByUrl(GetHtmlAHref(tds[1].InnerHtml),tds[1].InnerText);//资质信息
                            }
                        }                    }                }
            }
        }
        //采集企业资质信息数据入库dt2
        private void GetCertByUrl(string par,string name)
        {
            var html = url + "SubCorpCert.aspx?" + par;
            var web = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(html);
            var res = htmlDoc.GetElementbyId("DataList1");//表格
            if (res != null)
            {
                var trs = res.SelectNodes(@"tr");//获取所有行
                for (int r = 0; r < trs.Count; r++)
                {
                    DataRow dr = dt2.NewRow();
                    var tds = trs[r].SelectNodes(@"td");//获取所有列
                    for (int d = 0; d < tds.Count; d++)
                    {
                        dr["企业名称"] = name;
                        dr["资质类型"] = GetInfoByDocStr(tds[0].InnerHtml,"DataList1_ctl0"+r+"_CertType");
                        dr["资质证书编号"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_CertIDLabel");
                        dr["发证机关"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_OrganNameLabel");
                        dr["发证日期"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label3");
                        dr["有效期至"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label1");
                        dr["资质范围"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label2");
                    }
                    dt2.Rows.Add(dr);
                    GDV.DataSource = dt2;//每读取一个table插入数据库
                }
            }
        }
        //采集企业人员信息数据入库dt3
        //采集企业中标信息数据入库dt4
        //采集企业良坏信息数据入库dt5
        /// <summary>
        /// 从html文章Table字符串中返回指定ID的文本
        /// </summary>
        /// <param name="table"></param>
        /// <param name="ID"></param>
        /// <returns></returns>
        private string GetInfoByDocStr(string table,string ID)
        {
            var Doc = new HtmlAgilityPack.HtmlDocument();
            Doc.LoadHtml(table);
            var res = Doc.GetElementbyId(ID);
            return res.InnerText;
        }
        /// <summary>
        /// 获取超链接的参数值
        /// </summary>
        /// <param name="htmla"></param>
        /// <returns></returns>
        public string GetHtmlAHref(string htmla)
        {
            string reg = @"<a[^>]*href=([""'])?(?<href>[^'""]+)\1[^>]*>";
            var item = Regex.Match(htmla, reg, RegexOptions.IgnoreCase);
            int strindex = item.Groups["href"].Value.IndexOf("?");
            return item.Groups["href"].Value.Substring(strindex + 1).Replace("&amp;", "&");
        }
    }}

解决方案 »

  1.   

    这是我以前写的利用timer控件来查询分页:    private void timer1_Tick(object sender, EventArgs e)
            {
                GetHsData();
                btnNext.InvokeMember("click");
            }        private void btnDownload_Click(object sender, EventArgs e)
            {
                if (GetQueryForm())
                {
                    //tboxHS.SetAttribute("value", "38089119");
                    //btnSubmit.InvokeMember("click");                if (GetNextPage())
                    {                    timer1.Enabled = true;
                        timer1.Interval = 20000;
                        timer1.Start();
                    }            }            ////tboxStartDate.SetAttribute("value", "2017-01-01");
                ////tboxEndDate.SetAttribute("value", "2017-12-31");
                //
              
            }
            private bool GetNextPage()
            {
                btnNext = null;
                HtmlElementCollection htmlele = wbrMain.Document.GetElementsByTagName("a");
                foreach (HtmlElement item in htmlele)
                {
                    if (item.OuterHtml.IndexOf("下一页") > 0)
                    {
                        btnNext = item;
                    }
                }
                htmlele = null;
                if (btnNext != null)
                {
                    return true;
                }
                else
                    return false;
            }        private void GetHsData()
            {            HtmlElementCollection tbs = wbrMain.Document.GetElementsByTagName("TABLE");
                foreach (HtmlElement tb in tbs)
                {
                    HtmlElementCollection trs = tb.GetElementsByTagName("TR");
                    foreach (HtmlElement tr in trs)
                    {
                        HtmlElementCollection tds = tr.GetElementsByTagName("TD");
                        if (tds.Count > 0)
                        {
                            DataRow dr = HsData.NewRow();
                            for (int i = 0; i < tds.Count; i++)
                            {
                                dr["ID"] = AutoPrimaryID.GenerateStringID();
                                dr["ITEM_NO"] = tds[0].InnerText;
                                dr["IE_DATE"] = tds[1].InnerText;
                                dr["HS_CODE"] = tds[2].InnerText;
                                dr["OWNER_NAME"] = tds[3].InnerText;
                                dr["PRODUCT_DESC"] = tds[4].InnerText;
                                dr["COUNTRY_NAME"] = tds[5].InnerText;
                                dr["CUSTOMS_NAME"] = tds[6].InnerText;
                                dr["ORIGIN_AREA"] = tds[7].InnerText;
                                dr["DOLLAR_CURR"] = tds[8].InnerText;
                                dr["QTY_UNIT"] = tds[9].InnerText;
                            }
                            HsData.Rows.Add(dr);
                        }
                    }
                }
                CMMBLL.UpateData("HS_TEMP", HsData);
                HsData.AcceptChanges();
                HsData.Clear();
                HsData.AcceptChanges();
            }        private void wbrMain_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
            {
                if (onLogin)
                {
                    btnDownload.Enabled = true;
                    isLogin = true;
                }
                else
                    Login();        }
      

  2.   

    谢谢孤独侠 分享!  我也为这个问题 烦恼了一周。 如何抓取下面网址  高管名单  这个 表格的 第2,3, 4..最后页呢 ?  找了 10多篇文章,没有结果 
       http://data.10jqka.com.cn/financial/ggjy/
      

  3.   

    给你个网址:http://data.10jqka.com.cn/ajax/ggjy/field/enddate/order/desc/page/3/ajax/1/
    要切换页码就改Page后面的数字