我在做一个数据采集的程序,当同时在多个网站采集数据时,如果有采集网站出现故障打不开或打开得非常慢时,就会导致异常使程序无法响应。我的想法是当部分网站采集不了时,并不影响程序的运行,其他的采集任务还是能继续。觉得只有用多线程处理,但之前没有应用过,所以请大家给个示例代码,谢谢!~主要代码如下: //采集
private void getInfo(string url,Encoding coding)
{
//...
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 5000;
try
{ WebResponse response = request.GetResponse();
Stream strm = response.GetResponseStream();
StreamReader sr = new StreamReader(strm, coding);
collectInfo = sr.ReadToEnd();
strm.Close();
sr.Close();
}
catch(Exception ex)
{
MessageBox.Show(ex.Message);
}
//...
}//主程序里调用
private void button1_Click(object sender, EventArgs e)
{
//...
CollectionInfo coll = new CollectionInfo();
coll.getInfo("http://www.site1.com/index.html",Encoding.UTF8);
coll.getInfo("http://www.site2.com/index.aspx",Encoding.UTF8);
coll.getInfo("http://www.site3.com/index.php",Encoding.UTF8);
//...
}当我点采集的时候,遇到连接不上的网页,就会出错。我设置了HttpWebRequest的timeout也没用。请大家看看如何利用多线程改一下
private void getInfo(string url,Encoding coding)
{
//...
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 5000;
try
{ WebResponse response = request.GetResponse();
Stream strm = response.GetResponseStream();
StreamReader sr = new StreamReader(strm, coding);
collectInfo = sr.ReadToEnd();
strm.Close();
sr.Close();
}
catch(Exception ex)
{
MessageBox.Show(ex.Message);
}
//...
}//主程序里调用
private void button1_Click(object sender, EventArgs e)
{
//...
CollectionInfo coll = new CollectionInfo();
coll.getInfo("http://www.site1.com/index.html",Encoding.UTF8);
coll.getInfo("http://www.site2.com/index.aspx",Encoding.UTF8);
coll.getInfo("http://www.site3.com/index.php",Encoding.UTF8);
//...
}当我点采集的时候,遇到连接不上的网页,就会出错。我设置了HttpWebRequest的timeout也没用。请大家看看如何利用多线程改一下
{
this.Start("http://www.site1.com/index.html");
this.Start("http://www.site2.com/index.aspx");
this.Start("http://www.site3.com/index.php");}private void Start(string url)
{
ThreadStart ts=new ThreadStart(url,Run);
Thread thread=new Thread(ts);
thread.Start();
}private void Run(object obj)
{
string url=obj.ToString();
new CollectionInfo().getInfo(url,Encoding.UTF8);
}
{
CollectionInfo coll = new CollectionInfo();
coll.getInfo("http://www.site1.com/index.html",Encoding.UTF8);
}
private void run2()
{
CollectionInfo coll = new CollectionInfo();
coll.getInfo("http://www.site2.com/index.aspx",Encoding.UTF8);
}private void run3()
{
CollectionInfo coll = new CollectionInfo();
coll.getInfo("http://www.site3.com/index.php",Encoding.UTF8);
}
private void button1_Click(object sender, EventArgs e)
{
ThreadStart ts1=new ThreadStart(run1);
Thread tr1=new Thread(ts1);
tr1.Start();
ThreadStart ts2=new ThreadStart(run2);
Thread tr2=new Thread(ts2);
tr2.Start();
ThreadStart ts3=new ThreadStart(run3);
Thread tr3=new Thread(ts3);
tr3.Start();}
不能够传递参数的.问题相关: 主线程与创建线程的交换数据
这样就可以用类属性传值地。
分静态方法和非静态方法两种使用办法。using System;
using System.Threading;class Test
{
static void Main()
{
// To start a thread using a static thread procedure, use the
// class name and method name when you create the ThreadStart
// delegate. Beginning in version 2.0 of the .NET Framework,
// it is not necessary to create a delegate explicityly.
// Specify the name of the method in the Thread constructor,
// and the compiler selects the correct delegate. For example:
//
// Thread newThread = new Thread(Work.DoWork);
//
ThreadStart threadDelegate = new ThreadStart(Work.DoWork);
Thread newThread = new Thread(threadDelegate);
newThread.Start(); // To start a thread using an instance method for the thread
// procedure, use the instance variable and method name when
// you create the ThreadStart delegate. Beginning in version
// 2.0 of the .NET Framework, the explicit delegate is not
// required.
//
Work w = new Work();
w.Data = 42;
threadDelegate = new ThreadStart(w.DoMoreWork);
newThread = new Thread(threadDelegate);
newThread.Start();
}
}class Work
{
public static void DoWork()
{
Console.WriteLine("Static thread procedure.");
}
public int Data;
public void DoMoreWork()
{
Console.WriteLine("Instance thread procedure. Data={0}", Data);
}
}不过还是像楼上说的,把Url都存在一个队列里,把这个队列当作类的属性。
然后声明几个类的实例,调用即可。
不过要注意这几个线程的同步,用同步对象防止对队列的重入操作。
{
this.Start("http://www.site1.com/index.html");
this.Start("http://www.site2.com/index.aspx");
this.Start("http://www.site3.com/index.php");}private void Start(string url)
{
myThread info=new myThread(url);
ThreadStart ts=new ThreadStart(info.Run);
Thread thread=new Thread(ts);
thread.Start();
}public class myThread
{
private string url=string.Empty;
public myThread(string url){this.url=url;}
string Url{get{return url;}set{url=value;}}
public void Run()
{
new CollectionInfo().getInfo(url,Encoding.UTF8);
}
}做程序要学会思考,有时可以变通。
你还可以用委托的方式更改。
//采集站点一
private void collInfo1()
{
CollectionInfo coll = new CollectionInfo();
coll.compareToDB("http://www.site1.com/index.html", Encoding.UTF8);
} //采集站点二
private void collInfo2()
{
CollectionInfo coll = new CollectionInfo();
coll.compareToDB("http://www.site2.com/index.aspx", Encoding.Default);
} //开始采集
private void runThread()
{
ThreadStart threadDelegate = new ThreadStart(collInfo1);
Thread thread = new Thread(threadDelegate);
thread.Start(); threadDelegate = new ThreadStart(collInfo2);
thread = new Thread(threadDelegate);
thread.Start();
} //主程序里调用
private void button1_Click(object sender, EventArgs e)
{
this.runThread();
//MessageBox.Show("中断一下");
this.BindToDatagrid(); //我自己写的一个方法,绑定信息到一个已有的dataGrid
}当采集完两个站点后,是用采集到的新数据更新了数据库里的信息。我的想法是做完采集任务后,马上重新绑定DataGrid以显示最新的数据,但事实是它并不执行这个操作,而我在它们中间加上//MessageBox.Show("中断一下");会弹出一个对话框,我点确定以后DataGrid才重新绑定。请问如何在采集完后马上更新DataGrid的数据?
public delegate void DGetInfo(string url, Encoding coding);2.用委托的异步调用 BeginInvoke 比线程方便,而且传递参赛
用的时候
DGetInfo dinfo = new DGetInfo(getInfo);
dinfo.BeginInvoke("http://www.site1.com/index.html", Encoding.UTF8, null, null);具体如何封装,就是你自己的问题了不过这种东西没有用 MessageBox 提示错误的
如果多线程,会弹出很多的 MessageBox,写到文件或打印到命令行就可以
还有这种东西,一般是有一个队列的,
把初始参数先放到队列,然后线程去取,下载一个网页后,分析连接
在放回队列,然后再重队列里去一个 url 继续处理如果重试验1。。n 次没有成功就放弃本次的请求
在重队列取一个新的去试验