我家的宽带要断网了,我想把我在CSDN上的所有的网摘保存到我的硬盘上,请大家说一下有什么办法么?
我尝试使用注册的Teleport Pro 下载网页,但效果就是不咋的,唉!!我的网摘列表共有7页,从http://wz.csdn.net/spmzfz/null/1/
到http://wz.csdn.net/spmzfz/null/7/
总共收录193个网摘,我想把这193个网摘页面都保存到我的PC上。
我尝试使用注册的Teleport Pro 下载网页,但效果就是不咋的,唉!!我的网摘列表共有7页,从http://wz.csdn.net/spmzfz/null/1/
到http://wz.csdn.net/spmzfz/null/7/
总共收录193个网摘,我想把这193个网摘页面都保存到我的PC上。
using System.Text;
using System.Text.RegularExpression;
using System.Net; for (int i = 1; i <= 7; i++)
{
WebRequest req = HttpWebRequest.Create(string.Format("http://wz.csdn.net/spmzfz/null/{0}/", i));
WebResponse res = req.GetResponse();
using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.UTF8))
{
string html = sr.ReadToEnd();
foreach (Match m in Regex.Matches(html, @"<div class='fl'><h1><a href='([^']+)' target='_blank'>([^<]+)</a></h1>"))
File.AppendAllText("E:\\test.txt", string.Format("帖子名称:{0},链接地址:{1}{2}", m.Groups[2].Value, m.Groups[1].Value, Environment.NewLine));
sr.Close();
}
}
如webbrower,webclient,httpwebrequest
{
WebClient client = new WebClient();
string fileName = URL.Substring(URL.LastIndexOf("/") + 1);
string Path = Dir+fileName;
try
{
WebRequest myre=WebRequest.Create(URL);
}
catch(Exception ex)
{
return "Error:" + ex.Message;
}
try
{
client.DownloadFile(URL,Path);
return "Succefull";
}
catch(Exception ex)
{
return "Error:" + ex.Message;
}
}
参考
非常感谢ojlovecd的回复,有了技术真的很快:
帖子名称:200分求[C#屏幕取词],链接地址:http://topic.csdn.net/u/20090724/17/88dc00a8-5912-4e6a-bf6b-3d2a0942acf4.html?32916
帖子名称:求简单的木马程序或者设计思路,链接地址:http://topic.csdn.net/u/20090727/10/8cb9c07d-7f0c-4801-a0cf-6389a5449f98.html?95343
帖子名称:C# 怎么读取txt文件?,链接地址:http://topic.csdn.net/u/20090714/17/6dd47933-453c-49ce-850b-689b61810d20.html?67922
=========================================
可是我运行程序后可以知道链接地址,但我真的不知道如何将这些内容保存下来,因为我也是C#初学者,我不会webbrower,webclient,httpwebrequest 编程在之前我是使用Teleport Pro来下载网页的,唉!!!!!!!!
我综合了两位的代码,我已经把它们下载来了!程序有一点点问题,我现在在把它们修改一下
//=========================================================================
//1M=1024KB 1KB=1024Byte
System.IO.DirectoryInfo di = new DirectoryInfo("c:\\note");
foreach (System.IO.FileInfo item in di.GetFiles())
{
if((item.Length /1024)<5)
{
item.Delete();
}
}
System.Diagnostics.Debugger.Break();
//=========================================================================
private void button5_Click(object sender, EventArgs e)
{
//=========================================================================
//1M=1024KB 1KB=1024Byte
System.IO.DirectoryInfo di = new DirectoryInfo("c:\\note");
foreach (System.IO.FileInfo item in di.GetFiles())
{
if((item.Length /1024)<5)
{
item.Delete();
}
}
System.Diagnostics.Debugger.Break();
//=========================================================================
int counter = 0;
for (int i = 1; i <= 7; i++)
{
WebRequest req = HttpWebRequest.Create(string.Format("http://wz.csdn.net/spmzfz/null/{0}/", i));
WebResponse res = req.GetResponse(); using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.UTF8))
{
string html = sr.ReadToEnd();
foreach (Match m in Regex.Matches(html, @"<div class='fl'><h1><a href='([^']+)' target='_blank'>([^<]+)</a></h1>"))
{
File.AppendAllText("c:\\test.txt", string.Format("{0,3} {1}\t\t{2}{3}", ++counter, m.Groups[2].Value, m.Groups[1].Value, Environment.NewLine));
//Title:m.Groups[2].Value Url:m.Groups[1].Value string StringTemp = m.Groups[1].Value;
if (StringTemp.Contains("?"))
{
StringTemp = StringTemp.Substring(0, StringTemp.IndexOf('?'));
} Download(StringTemp, "c:\\csdn\\", counter, m.Groups[2].Value);//下载缺省主页面
#region 下载连续的分页面
//=========================================================================
/*
//http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c.html
//http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c_5.html string st = StringTemp.Substring(0, StringTemp.Length - 5); for (int t = 2; t < 7; t++)
{
StringTemp = string.Format("{0}_{1}.html", st, t); WebClient client = new WebClient();
string fileName = StringTemp.Substring(StringTemp.LastIndexOf("/") + 1);
string Path = "c:\\note\\" + fileName; try
{
WebRequest myre = WebRequest.Create(StringTemp );
}
catch (Exception ex)
{
Console.WriteLine("WRONG{0} {1}",StringTemp , ex.Message);
break;
} try
{
client.DownloadFile(StringTemp, Path);
Console.WriteLine("OK: " + StringTemp );
}
catch (Exception ex)
{
Console.WriteLine("WRONG{0} {1}", StringTemp, ex.Message);
break;
}
}
*/ #endregion }
sr.Close();
}
}
Console.WriteLine("End");
}public void Download(string URL, string Dir, int ct, string title)
{ WebClient client = new WebClient();
string fileName = URL.Substring(URL.LastIndexOf("/") + 1);
string Path = Dir + fileName;
try
{
WebRequest myre = WebRequest.Create(URL);
}
catch (Exception ex)
{
Console.WriteLine("WRONG{0} {1} {2}", ct,URL ,ex.Message );
} try
{
client.DownloadFile(URL, Path);
Console.WriteLine( "OK: " + URL);
}
catch (Exception ex)
{
Console.WriteLine("WRONG{0} {1} {2}", ct, URL, ex.Message);
} }
{
//http://hi.baidu.com/wzlv/blog/item/7ff57f90b12bbe89a877a446.html
//http://wz.csdn.net/spmzfz/null/1/ int counter = 0; //网摘主页面计数。
System.IO.Directory.CreateDirectory("g:\\csdn"); //请在这里修改存放MSDN网摘的路径。 for (int i = 1; i <=1; i++) //请在这里修改MSDN网摘列表的页面个数值。
{
WebRequest req = HttpWebRequest.Create(string.Format("http://wz.csdn.net/spmzfz/null/{0}/", i));
WebResponse res = req.GetResponse(); using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.UTF8))
{
string html = sr.ReadToEnd();
foreach (Match m in Regex.Matches(html, @"<div class='fl'><h1><a href='([^']+)' target='_blank'>([^<]+)</a></h1>"))
{
//Title:m.Groups[2].Value ; Url:m.Groups[1].Value
File.AppendAllText("g:\\csdn\\csdn.txt", string.Format("{0,3} {1}\t\t{2}{3}", ++counter, m.Groups[2].Value, m.Groups[1].Value, Environment.NewLine)); string UrlTemp = m.Groups[1].Value; //DoWith As: http://topic.csdn.net/u/20090818/11/632fc9ce-8df8-4b1e-a616-e2bcf4fd43cf.html?83313
if (UrlTemp.Contains("?"))
{
UrlTemp = UrlTemp.Substring(0, UrlTemp.IndexOf('?'));
} string StringTemp = UrlTemp.Substring(0, UrlTemp.Length - 5); //StringTemp :网址 ".html" 之前的字符串
//===========================================================================================
for (int t = 1; t < 9; t++)
{
//处理分页
//http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c.html
//http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c_5.html
if (t > 1)
{
UrlTemp = string.Format("{0}_{1}.html", StringTemp, t);
} WebClient client = new WebClient();
string fileName = UrlTemp.Substring(UrlTemp.LastIndexOf("/") + 1);
string Path = "g:\\csdn\\" + fileName ; //WebRequest myre = WebRequest.Create(UrlTemp);
try
{
client.DownloadFile(UrlTemp, Path); //若下载的文件小如5KB则删除. (1M=1024KB 1KB=1024Byte)
System.IO.FileInfo fi = new FileInfo(Path);
if ((fi.Length / 1024) < 5)
{
fi.Delete();
break;
} Console.WriteLine("OK: {0} {1}", counter, UrlTemp);
}
catch (Exception ex)
{
string page = (t == 1) ? "MainPage" : "PartPage";
Console.WriteLine("DownLoad {0} Wrong {1} {2} {3}", page, counter, UrlTemp, ex.Message);
} }
//===========================================================================================
}
sr.Close();
}
}
Console.WriteLine("End");
}