using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Web;
using System.Text;
using HtmlAgilityPack;
using Conn;namespace DataApp
{
public partial class MainForm : Form
{
string url = @"http://117.159.3.6:9035/QueryWeb/";
int count = 0;
DataTable dt1 = new DataTable("t1");//企业信息
DataTable dt2 = new DataTable("t2");//企业资质
public MainForm()
{
InitializeComponent();
}
private void MainForm_Load(object sender, EventArgs e)
{
////企业信息
//dt1.Columns.Add("企业名称", Type.GetType("System.String"));
//dt1.Columns.Add("统一信用代码", Type.GetType("System.String"));
//dt1.Columns.Add("注册地址", Type.GetType("System.String"));
//dt1.Columns.Add("企业类型", Type.GetType("System.String"));
//dt1.Columns.Add("注册日期", Type.GetType("System.String"));
//dt1.Columns.Add("营业地址", Type.GetType("System.String"));
//dt1.Columns.Add("营业地址邮编", Type.GetType("System.String"));
//dt1.Columns.Add("法定代表人", Type.GetType("System.String"));
//dt1.Columns.Add("官网", Type.GetType("System.String"));
////资质信息
//dt2.Columns.Add("企业名称", Type.GetType("System.String"));
//dt2.Columns.Add("资质类型", Type.GetType("System.String"));
//dt2.Columns.Add("资质证书编号", Type.GetType("System.String"));
//dt2.Columns.Add("发证机关", Type.GetType("System.String"));
//dt2.Columns.Add("发证日期", Type.GetType("System.String"));
//dt2.Columns.Add("有效期至", Type.GetType("System.String"));
//dt2.Columns.Add("资质范围", Type.GetType("System.String"));
}
//开始采集
private void BtnCai_Click(object sender, EventArgs e)
{
string jzurl = "query11.aspx?type=&typeNum=7&Province=1";//建筑企业
//string sjurl = "query11.aspx?type=工程设计&typeNum=2&Province=1";//设计企业
//string wsurl = "query41.aspx?Province=2";//外省企业
webBrowser1.Navigate(url + jzurl);//加载url
webBrowser1.Navigated += new WebBrowserNavigatedEventHandler(Web_Navigated);
webBrowser1.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(Web_DocumentCompleted); //装载WebBrowser.DocumentCompleted事件;
}
private void Web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
count = count - 1;
if (0 == count)
{
GetInfoByDOM(webBrowser1);
System.Windows.Forms.HtmlDocument htdoc = webBrowser1.Document;
HtmlElement htmlcounts = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_Label4");
HtmlElement htmlpages = htdoc.GetElementById("lblPageCount");
HtmlElement htmlpagesindex = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_lblPageIndex");
HtmlElement btnclicktag = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_btnNext");
lblCounts.Text = htmlcounts.InnerText;//总数据数
LblPages.Text = htmlpages.InnerText;//总页数
lblPageIndex.Text = htmlpagesindex.InnerText;//第几页
btnclicktag.InvokeMember("click");//执行下一页点击事件
}
}
private void Web_Navigated(object sender, WebBrowserNavigatedEventArgs e)
{
count++;
}
/// <summary>
/// 企业基本信息采集
/// </summary>
/// <param name="par"></param>
public void GetBasicInfo(string par)
{
var html = url + "CorpDetails.aspx?" + par;
var web = new HtmlWeb();
HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(html);
DataRow dr = dt1.NewRow();
dr["企业名称"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label10").InnerText;
dr["统一信用代码"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label3").InnerText;
dr["注册地址"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label1").InnerText;
dr["企业类型"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label2").InnerText;
dr["注册日期"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label4").InnerText;
dr["营业地址"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label6").InnerText;
dr["营业地址邮编"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label7").InnerText;
dr["法定代表人"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label8").InnerText;
dr["官网"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label13").InnerText;
dt1.Rows.Add(dr);
GDV.DataSource = dt1;
}
//测试数据库连接
private void BtnData_Click(object sender, EventArgs e)
{
string sql = "SELECT * FROM ims_hulu_info_shop";
DataSet ds = DbHelperMySQL.Query(sql);
DataTable dt = ds.Tables[0];
GDV.DataSource = dt;
} //采集企业基本信息数据入库dt1
private void GetInfoByDOM(WebBrowser WebBro)
{
var Doc = new HtmlAgilityPack.HtmlDocument();
Doc.LoadHtml(WebBro.DocumentText);
var res = Doc.GetElementbyId("ctl00_ContentPlaceHolder1_GridView1");//表格
if (res != null)
{
var trs = res.SelectNodes(@"tr");//获取所有行
trs.RemoveAt(0);//移除第一行,是表头
for (int r = 0; r < trs.Count - 1; r++)
{
HtmlNodeCollection tds = trs[r].SelectNodes(@"td");//td
if (tds != null)
{
for (int d = 0; d < tds.Count; d++)
{
if (d == 1)
{
//GetBasicInfo(GetHtmlAHref(tds[1].InnerHtml));//基本信息
GetCertByUrl(GetHtmlAHref(tds[1].InnerHtml),tds[1].InnerText);//资质信息
}
} } }
}
}
//采集企业资质信息数据入库dt2
private void GetCertByUrl(string par,string name)
{
var html = url + "SubCorpCert.aspx?" + par;
var web = new HtmlWeb();
HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(html);
var res = htmlDoc.GetElementbyId("DataList1");//表格
if (res != null)
{
var trs = res.SelectNodes(@"tr");//获取所有行
for (int r = 0; r < trs.Count; r++)
{
DataRow dr = dt2.NewRow();
var tds = trs[r].SelectNodes(@"td");//获取所有列
for (int d = 0; d < tds.Count; d++)
{
dr["企业名称"] = name;
dr["资质类型"] = GetInfoByDocStr(tds[0].InnerHtml,"DataList1_ctl0"+r+"_CertType");
dr["资质证书编号"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_CertIDLabel");
dr["发证机关"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_OrganNameLabel");
dr["发证日期"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label3");
dr["有效期至"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label1");
dr["资质范围"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label2");
}
dt2.Rows.Add(dr);
GDV.DataSource = dt2;//每读取一个table插入数据库
}
}
}
//采集企业人员信息数据入库dt3
//采集企业中标信息数据入库dt4
//采集企业良坏信息数据入库dt5
/// <summary>
/// 从html文章Table字符串中返回指定ID的文本
/// </summary>
/// <param name="table"></param>
/// <param name="ID"></param>
/// <returns></returns>
private string GetInfoByDocStr(string table,string ID)
{
var Doc = new HtmlAgilityPack.HtmlDocument();
Doc.LoadHtml(table);
var res = Doc.GetElementbyId(ID);
return res.InnerText;
}
/// <summary>
/// 获取超链接的参数值
/// </summary>
/// <param name="htmla"></param>
/// <returns></returns>
public string GetHtmlAHref(string htmla)
{
string reg = @"<a[^>]*href=([""'])?(?<href>[^'""]+)\1[^>]*>";
var item = Regex.Match(htmla, reg, RegexOptions.IgnoreCase);
int strindex = item.Groups["href"].Value.IndexOf("?");
return item.Groups["href"].Value.Substring(strindex + 1).Replace("&", "&");
}
}}
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Web;
using System.Text;
using HtmlAgilityPack;
using Conn;namespace DataApp
{
public partial class MainForm : Form
{
string url = @"http://117.159.3.6:9035/QueryWeb/";
int count = 0;
DataTable dt1 = new DataTable("t1");//企业信息
DataTable dt2 = new DataTable("t2");//企业资质
public MainForm()
{
InitializeComponent();
}
private void MainForm_Load(object sender, EventArgs e)
{
////企业信息
//dt1.Columns.Add("企业名称", Type.GetType("System.String"));
//dt1.Columns.Add("统一信用代码", Type.GetType("System.String"));
//dt1.Columns.Add("注册地址", Type.GetType("System.String"));
//dt1.Columns.Add("企业类型", Type.GetType("System.String"));
//dt1.Columns.Add("注册日期", Type.GetType("System.String"));
//dt1.Columns.Add("营业地址", Type.GetType("System.String"));
//dt1.Columns.Add("营业地址邮编", Type.GetType("System.String"));
//dt1.Columns.Add("法定代表人", Type.GetType("System.String"));
//dt1.Columns.Add("官网", Type.GetType("System.String"));
////资质信息
//dt2.Columns.Add("企业名称", Type.GetType("System.String"));
//dt2.Columns.Add("资质类型", Type.GetType("System.String"));
//dt2.Columns.Add("资质证书编号", Type.GetType("System.String"));
//dt2.Columns.Add("发证机关", Type.GetType("System.String"));
//dt2.Columns.Add("发证日期", Type.GetType("System.String"));
//dt2.Columns.Add("有效期至", Type.GetType("System.String"));
//dt2.Columns.Add("资质范围", Type.GetType("System.String"));
}
//开始采集
private void BtnCai_Click(object sender, EventArgs e)
{
string jzurl = "query11.aspx?type=&typeNum=7&Province=1";//建筑企业
//string sjurl = "query11.aspx?type=工程设计&typeNum=2&Province=1";//设计企业
//string wsurl = "query41.aspx?Province=2";//外省企业
webBrowser1.Navigate(url + jzurl);//加载url
webBrowser1.Navigated += new WebBrowserNavigatedEventHandler(Web_Navigated);
webBrowser1.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(Web_DocumentCompleted); //装载WebBrowser.DocumentCompleted事件;
}
private void Web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
count = count - 1;
if (0 == count)
{
GetInfoByDOM(webBrowser1);
System.Windows.Forms.HtmlDocument htdoc = webBrowser1.Document;
HtmlElement htmlcounts = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_Label4");
HtmlElement htmlpages = htdoc.GetElementById("lblPageCount");
HtmlElement htmlpagesindex = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_lblPageIndex");
HtmlElement btnclicktag = htdoc.GetElementById("ctl00_ContentPlaceHolder1_GridView1_ctl13_btnNext");
lblCounts.Text = htmlcounts.InnerText;//总数据数
LblPages.Text = htmlpages.InnerText;//总页数
lblPageIndex.Text = htmlpagesindex.InnerText;//第几页
btnclicktag.InvokeMember("click");//执行下一页点击事件
}
}
private void Web_Navigated(object sender, WebBrowserNavigatedEventArgs e)
{
count++;
}
/// <summary>
/// 企业基本信息采集
/// </summary>
/// <param name="par"></param>
public void GetBasicInfo(string par)
{
var html = url + "CorpDetails.aspx?" + par;
var web = new HtmlWeb();
HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(html);
DataRow dr = dt1.NewRow();
dr["企业名称"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label10").InnerText;
dr["统一信用代码"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label3").InnerText;
dr["注册地址"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label1").InnerText;
dr["企业类型"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label2").InnerText;
dr["注册日期"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label4").InnerText;
dr["营业地址"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label6").InnerText;
dr["营业地址邮编"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label7").InnerText;
dr["法定代表人"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label8").InnerText;
dr["官网"] = htmlDoc.GetElementbyId("ctl00_ContentPlaceHolder1_FormView1_Label13").InnerText;
dt1.Rows.Add(dr);
GDV.DataSource = dt1;
}
//测试数据库连接
private void BtnData_Click(object sender, EventArgs e)
{
string sql = "SELECT * FROM ims_hulu_info_shop";
DataSet ds = DbHelperMySQL.Query(sql);
DataTable dt = ds.Tables[0];
GDV.DataSource = dt;
} //采集企业基本信息数据入库dt1
private void GetInfoByDOM(WebBrowser WebBro)
{
var Doc = new HtmlAgilityPack.HtmlDocument();
Doc.LoadHtml(WebBro.DocumentText);
var res = Doc.GetElementbyId("ctl00_ContentPlaceHolder1_GridView1");//表格
if (res != null)
{
var trs = res.SelectNodes(@"tr");//获取所有行
trs.RemoveAt(0);//移除第一行,是表头
for (int r = 0; r < trs.Count - 1; r++)
{
HtmlNodeCollection tds = trs[r].SelectNodes(@"td");//td
if (tds != null)
{
for (int d = 0; d < tds.Count; d++)
{
if (d == 1)
{
//GetBasicInfo(GetHtmlAHref(tds[1].InnerHtml));//基本信息
GetCertByUrl(GetHtmlAHref(tds[1].InnerHtml),tds[1].InnerText);//资质信息
}
} } }
}
}
//采集企业资质信息数据入库dt2
private void GetCertByUrl(string par,string name)
{
var html = url + "SubCorpCert.aspx?" + par;
var web = new HtmlWeb();
HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(html);
var res = htmlDoc.GetElementbyId("DataList1");//表格
if (res != null)
{
var trs = res.SelectNodes(@"tr");//获取所有行
for (int r = 0; r < trs.Count; r++)
{
DataRow dr = dt2.NewRow();
var tds = trs[r].SelectNodes(@"td");//获取所有列
for (int d = 0; d < tds.Count; d++)
{
dr["企业名称"] = name;
dr["资质类型"] = GetInfoByDocStr(tds[0].InnerHtml,"DataList1_ctl0"+r+"_CertType");
dr["资质证书编号"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_CertIDLabel");
dr["发证机关"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_OrganNameLabel");
dr["发证日期"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label3");
dr["有效期至"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label1");
dr["资质范围"] = GetInfoByDocStr(tds[0].InnerHtml, "DataList1_ctl0" + r + "_Label2");
}
dt2.Rows.Add(dr);
GDV.DataSource = dt2;//每读取一个table插入数据库
}
}
}
//采集企业人员信息数据入库dt3
//采集企业中标信息数据入库dt4
//采集企业良坏信息数据入库dt5
/// <summary>
/// 从html文章Table字符串中返回指定ID的文本
/// </summary>
/// <param name="table"></param>
/// <param name="ID"></param>
/// <returns></returns>
private string GetInfoByDocStr(string table,string ID)
{
var Doc = new HtmlAgilityPack.HtmlDocument();
Doc.LoadHtml(table);
var res = Doc.GetElementbyId(ID);
return res.InnerText;
}
/// <summary>
/// 获取超链接的参数值
/// </summary>
/// <param name="htmla"></param>
/// <returns></returns>
public string GetHtmlAHref(string htmla)
{
string reg = @"<a[^>]*href=([""'])?(?<href>[^'""]+)\1[^>]*>";
var item = Regex.Match(htmla, reg, RegexOptions.IgnoreCase);
int strindex = item.Groups["href"].Value.IndexOf("?");
return item.Groups["href"].Value.Substring(strindex + 1).Replace("&", "&");
}
}}
解决方案 »
- |ZYCIIS| WPF中的MVVM中绑定的值为Bool型如IsRunning,而我XAML中要绑定他的!IsRunning要怎么办?难道我必须写一个转换? 谢
- 求个GIS开发解决方案
- 由于从数据库中查询出来的数据量过大,listview动不了了,在线等高手
- datalist 分页 怎么样循环每页信息
- SOCKET异步通讯问题
- 如何去掉asp.net2.0中TreeView 每个节点的Link 或者可以点Link但是不刷页面
- Visio 2002 数据库建模,真是爽.
- 如何将数据一个数据集按照指定的XML格式 输出成 XML?
- 有什么接口,可以让我直接调用firefox或者chrome
- Form2怎么才能取到Form1 的数据
- visionpro8.2和aca2500-14gm(basler)怎么实现IO通讯
- C#怎么在一个窗口中调用另一个窗口的串口啊
{
GetHsData();
btnNext.InvokeMember("click");
} private void btnDownload_Click(object sender, EventArgs e)
{
if (GetQueryForm())
{
//tboxHS.SetAttribute("value", "38089119");
//btnSubmit.InvokeMember("click"); if (GetNextPage())
{ timer1.Enabled = true;
timer1.Interval = 20000;
timer1.Start();
} } ////tboxStartDate.SetAttribute("value", "2017-01-01");
////tboxEndDate.SetAttribute("value", "2017-12-31");
//
}
private bool GetNextPage()
{
btnNext = null;
HtmlElementCollection htmlele = wbrMain.Document.GetElementsByTagName("a");
foreach (HtmlElement item in htmlele)
{
if (item.OuterHtml.IndexOf("下一页") > 0)
{
btnNext = item;
}
}
htmlele = null;
if (btnNext != null)
{
return true;
}
else
return false;
} private void GetHsData()
{ HtmlElementCollection tbs = wbrMain.Document.GetElementsByTagName("TABLE");
foreach (HtmlElement tb in tbs)
{
HtmlElementCollection trs = tb.GetElementsByTagName("TR");
foreach (HtmlElement tr in trs)
{
HtmlElementCollection tds = tr.GetElementsByTagName("TD");
if (tds.Count > 0)
{
DataRow dr = HsData.NewRow();
for (int i = 0; i < tds.Count; i++)
{
dr["ID"] = AutoPrimaryID.GenerateStringID();
dr["ITEM_NO"] = tds[0].InnerText;
dr["IE_DATE"] = tds[1].InnerText;
dr["HS_CODE"] = tds[2].InnerText;
dr["OWNER_NAME"] = tds[3].InnerText;
dr["PRODUCT_DESC"] = tds[4].InnerText;
dr["COUNTRY_NAME"] = tds[5].InnerText;
dr["CUSTOMS_NAME"] = tds[6].InnerText;
dr["ORIGIN_AREA"] = tds[7].InnerText;
dr["DOLLAR_CURR"] = tds[8].InnerText;
dr["QTY_UNIT"] = tds[9].InnerText;
}
HsData.Rows.Add(dr);
}
}
}
CMMBLL.UpateData("HS_TEMP", HsData);
HsData.AcceptChanges();
HsData.Clear();
HsData.AcceptChanges();
} private void wbrMain_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
if (onLogin)
{
btnDownload.Enabled = true;
isLogin = true;
}
else
Login(); }
http://data.10jqka.com.cn/financial/ggjy/
要切换页码就改Page后面的数字