有很多网页要抓取,一开始的做法是用一个循环调用DownloadDataAsync,然后通过ServicePointManager.DefaultConnectionLimit可以调节下载的线程数.
foreach (Charpter entity in list)
{
if (!File.Exists(folderBrowserDialog1.SelectedPath + entity.URL))
{
Download(Object state);
}
}
private void Download(Object state)
{
string url = state.ToString();
WebClient client = new WebClient();
client.DownloadDataAsync(new Uri(HostUrl + url), folderBrowserDialog1.SelectedPath + url);
client.DownloadDataCompleted += client_DownloadDataCompleted;
}但是发现一旦下载开始了,增大ServicePointManager.DefaultConnectionLimit,并不能增加下载速度,通过工具看到线程数也没增加.所以改用ThreadPool实现.
ThreadPool.SetMaxThreads(con, con);
foreach (Charpter entity in list)
{
if (!File.Exists(folderBrowserDialog1.SelectedPath + entity.URL))
{
//flag = true;
ThreadPool.QueueUserWorkItem(Download, entity.URL);
}
}
Download函数如下
private void Download(Object state)
{
string url = state.ToString();
WebClient client = new WebClient();
byte[] content = client.DownloadData(new Uri(HostUrl + url));
string filePath = folderBrowserDialog1.SelectedPath + url;
string dir = filePath.Substring(0, filePath.LastIndexOf('/'));
if (!Directory.Exists(dir))
Directory.CreateDirectory(dir);
File.WriteAllBytes(filePath, content);
}
问题是,改用Threadpool后,虽然设置了很小的最大线程数(如5个),但是程序运行起来之后,线程数一直逐渐增多,大概增加到6,70个然后程序发生异常崩溃.期间线程数偶有减少,但是很少.为什么线程池里的线程没有及时的被回收呢?!
foreach (Charpter entity in list)
{
if (!File.Exists(folderBrowserDialog1.SelectedPath + entity.URL))
{
Download(Object state);
}
}
private void Download(Object state)
{
string url = state.ToString();
WebClient client = new WebClient();
client.DownloadDataAsync(new Uri(HostUrl + url), folderBrowserDialog1.SelectedPath + url);
client.DownloadDataCompleted += client_DownloadDataCompleted;
}但是发现一旦下载开始了,增大ServicePointManager.DefaultConnectionLimit,并不能增加下载速度,通过工具看到线程数也没增加.所以改用ThreadPool实现.
ThreadPool.SetMaxThreads(con, con);
foreach (Charpter entity in list)
{
if (!File.Exists(folderBrowserDialog1.SelectedPath + entity.URL))
{
//flag = true;
ThreadPool.QueueUserWorkItem(Download, entity.URL);
}
}
Download函数如下
private void Download(Object state)
{
string url = state.ToString();
WebClient client = new WebClient();
byte[] content = client.DownloadData(new Uri(HostUrl + url));
string filePath = folderBrowserDialog1.SelectedPath + url;
string dir = filePath.Substring(0, filePath.LastIndexOf('/'));
if (!Directory.Exists(dir))
Directory.CreateDirectory(dir);
File.WriteAllBytes(filePath, content);
}
问题是,改用Threadpool后,虽然设置了很小的最大线程数(如5个),但是程序运行起来之后,线程数一直逐渐增多,大概增加到6,70个然后程序发生异常崩溃.期间线程数偶有减少,但是很少.为什么线程池里的线程没有及时的被回收呢?!
{
Content=System.Text.Encoding.Default.GetString(new WebClient().DownloadData(""));
}).Start();