我家的宽带要断网了,我想把我在CSDN上的所有的网摘保存到我的硬盘上,请大家说一下有什么办法么?
我尝试使用注册的Teleport Pro 下载网页,但效果就是不咋的,唉!!我的网摘列表共有7页,从http://wz.csdn.net/spmzfz/null/1/
                     到http://wz.csdn.net/spmzfz/null/7/
总共收录193个网摘,我想把这193个网摘页面都保存到我的PC上

解决方案 »

  1.   

    自己写个工具吧。使用WebBrowser就可以,也很简单
      

  2.   


    using System.Text;
    using System.Text.RegularExpression;
    using System.Net;            for (int i = 1; i <= 7; i++)
                {
                    WebRequest req = HttpWebRequest.Create(string.Format("http://wz.csdn.net/spmzfz/null/{0}/", i));
                    WebResponse res = req.GetResponse();
                    using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.UTF8))
                    {
                        string html = sr.ReadToEnd();
                        foreach (Match m in Regex.Matches(html, @"<div class='fl'><h1><a href='([^']+)' target='_blank'>([^<]+)</a></h1>"))
                            File.AppendAllText("E:\\test.txt", string.Format("帖子名称:{0},链接地址:{1}{2}", m.Groups[2].Value, m.Groups[1].Value, Environment.NewLine));
                        sr.Close();
                    }
                }
      

  3.   

    写个工具遍历地址,实现下载
    如webbrower,webclient,httpwebrequest
      

  4.   

    public string Download(string URL,string Dir)
      {
       WebClient client = new WebClient();
       string fileName = URL.Substring(URL.LastIndexOf("/") + 1);  
       string Path = Dir+fileName;   
       try
       {
        WebRequest myre=WebRequest.Create(URL);    
       }
       catch(Exception ex)
       {
        return "Error:" + ex.Message; 
       }
       try
       {
        client.DownloadFile(URL,Path);
        return "Succefull";
       } 
       catch(Exception ex)
       {
        return "Error:" + ex.Message;
       } 
      } 
    参考
      

  5.   

    =========================================
    非常感谢ojlovecd的回复,有了技术真的很快:
    帖子名称:200分求[C#屏幕取词],链接地址:http://topic.csdn.net/u/20090724/17/88dc00a8-5912-4e6a-bf6b-3d2a0942acf4.html?32916
    帖子名称:求简单的木马程序或者设计思路,链接地址:http://topic.csdn.net/u/20090727/10/8cb9c07d-7f0c-4801-a0cf-6389a5449f98.html?95343
    帖子名称:C# 怎么读取txt文件?,链接地址:http://topic.csdn.net/u/20090714/17/6dd47933-453c-49ce-850b-689b61810d20.html?67922

    =========================================
    可是我运行程序后可以知道链接地址,但我真的不知道如何将这些内容保存下来,因为我也是C#初学者,我不会webbrower,webclient,httpwebrequest 编程在之前我是使用Teleport Pro来下载网页的,唉!!!!!!!!
      

  6.   

    呵呵,真的非常感谢wuyq11ojlovecd
    我综合了两位的代码,我已经把它们下载来了!程序有一点点问题,我现在在把它们修改一下
      

  7.   


                //=========================================================================
                //1M=1024KB 1KB=1024Byte
                System.IO.DirectoryInfo di = new DirectoryInfo("c:\\note");
                foreach (System.IO.FileInfo  item in di.GetFiles())
                {
                                if((item.Length /1024)<5)
                                {
                                    item.Delete();
                                }
                }
                System.Diagnostics.Debugger.Break();
                //=========================================================================
      

  8.   


            private void button5_Click(object sender, EventArgs e)
            {   
                //=========================================================================
                //1M=1024KB 1KB=1024Byte
                System.IO.DirectoryInfo di = new DirectoryInfo("c:\\note");
                foreach (System.IO.FileInfo  item in di.GetFiles())
                {
                                if((item.Length /1024)<5)
                                {
                                    item.Delete();
                                }
                }
                System.Diagnostics.Debugger.Break();
                //=========================================================================
                int counter = 0;
                for (int i = 1; i <= 7; i++)
                {
                    WebRequest req = HttpWebRequest.Create(string.Format("http://wz.csdn.net/spmzfz/null/{0}/", i));
                    WebResponse res = req.GetResponse();                using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.UTF8))
                    {
                        string html = sr.ReadToEnd();
                        foreach (Match m in Regex.Matches(html, @"<div class='fl'><h1><a href='([^']+)' target='_blank'>([^<]+)</a></h1>"))
                        {
                            File.AppendAllText("c:\\test.txt", string.Format("{0,3} {1}\t\t{2}{3}", ++counter, m.Groups[2].Value, m.Groups[1].Value, Environment.NewLine));
                            //Title:m.Groups[2].Value  Url:m.Groups[1].Value                        string StringTemp = m.Groups[1].Value;
                            if (StringTemp.Contains("?"))
                            {
                                StringTemp = StringTemp.Substring(0, StringTemp.IndexOf('?'));
                            }                        Download(StringTemp, "c:\\csdn\\", counter, m.Groups[2].Value);//下载缺省主页面
                            #region 下载连续的分页面
                            //=========================================================================
                            /*
                            //http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c.html
                            //http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c_5.html                        string st = StringTemp.Substring(0, StringTemp.Length - 5);                        for (int t = 2; t < 7; t++)
                            {
                                StringTemp = string.Format("{0}_{1}.html", st, t);                            WebClient client = new WebClient();
                                string fileName = StringTemp.Substring(StringTemp.LastIndexOf("/") + 1);
                                string Path = "c:\\note\\" + fileName;                            try
                                {
                                    WebRequest myre = WebRequest.Create(StringTemp );
                                }
                                catch (Exception ex)
                                { 
                                    Console.WriteLine("WRONG{0}  {1}",StringTemp , ex.Message);
                                    break;
                                }                            try
                                {
                                    client.DownloadFile(StringTemp, Path);
                                    Console.WriteLine("OK: " + StringTemp );
                                }
                                catch (Exception ex)
                                {
                                    Console.WriteLine("WRONG{0}  {1}", StringTemp, ex.Message);
                                    break;
                                }
                            }
                            */                        #endregion                    }
                        sr.Close();
                    }
                }
                Console.WriteLine("End");
            }public void Download(string URL, string Dir, int ct, string title)
            {            WebClient client = new WebClient();
                string fileName = URL.Substring(URL.LastIndexOf("/") + 1); 
                string Path = Dir + fileName;      
                            
                try
                {
                    WebRequest myre = WebRequest.Create(URL);
                }
                catch (Exception ex)
                {
                    Console.WriteLine("WRONG{0}  {1}  {2}", ct,URL ,ex.Message  );
                }            try
                {
                    client.DownloadFile(URL, Path);
                    Console.WriteLine( "OK: " + URL);
                }
                catch (Exception ex)
                {
                    Console.WriteLine("WRONG{0}  {1}  {2}", ct, URL, ex.Message);
                }        } 
      

  9.   

    最终的整理后的源代码:        private void button5_Click(object sender, EventArgs e)
            {  
                //http://hi.baidu.com/wzlv/blog/item/7ff57f90b12bbe89a877a446.html
                //http://wz.csdn.net/spmzfz/null/1/             int counter = 0;                                 //网摘主页面计数。
                System.IO.Directory.CreateDirectory("g:\\csdn"); //请在这里修改存放MSDN网摘的路径。            for (int i = 1; i <=1; i++)                      //请在这里修改MSDN网摘列表的页面个数值。
                {
                    WebRequest req = HttpWebRequest.Create(string.Format("http://wz.csdn.net/spmzfz/null/{0}/", i));
                    WebResponse res = req.GetResponse();                using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.UTF8))
                    {
                        string html = sr.ReadToEnd();
                        foreach (Match m in Regex.Matches(html, @"<div class='fl'><h1><a href='([^']+)' target='_blank'>([^<]+)</a></h1>"))
                        {
                            //Title:m.Groups[2].Value  ; Url:m.Groups[1].Value
                            File.AppendAllText("g:\\csdn\\csdn.txt", string.Format("{0,3} {1}\t\t{2}{3}", ++counter, m.Groups[2].Value, m.Groups[1].Value, Environment.NewLine));                        string UrlTemp = m.Groups[1].Value;                        //DoWith As: http://topic.csdn.net/u/20090818/11/632fc9ce-8df8-4b1e-a616-e2bcf4fd43cf.html?83313
                            if (UrlTemp.Contains("?"))  
                            {
                                UrlTemp = UrlTemp.Substring(0, UrlTemp.IndexOf('?'));      
                            }                        string StringTemp = UrlTemp.Substring(0, UrlTemp.Length - 5); //StringTemp :网址 ".html" 之前的字符串
                            //===========================================================================================
                            for (int t = 1; t < 9; t++)
                            {
                                //处理分页
                                //http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c.html
                                //http://topic.csdn.net/u/20081130/09/18d455a6-65e1-4d00-aa8c-d9742654cd8c_5.html  
                                if (t > 1)
                                {
                                    UrlTemp = string.Format("{0}_{1}.html", StringTemp, t);
                                }                            WebClient client = new WebClient();
                                string fileName = UrlTemp.Substring(UrlTemp.LastIndexOf("/") + 1);
                                string Path = "g:\\csdn\\" + fileName ;                            //WebRequest myre = WebRequest.Create(UrlTemp);
      
                                try
                                {
                                    client.DownloadFile(UrlTemp, Path);                                //若下载的文件小如5KB则删除. (1M=1024KB    1KB=1024Byte)
                                    System.IO.FileInfo fi = new FileInfo(Path);
                                    if ((fi.Length / 1024) < 5)    
                                    {
                                        fi.Delete(); 
                                        break;
                                    }                                Console.WriteLine("OK: {0}  {1}", counter, UrlTemp);
                                 }
                                catch (Exception ex)
                                {
                                    string page = (t == 1) ? "MainPage" : "PartPage";
                                    Console.WriteLine("DownLoad {0} Wrong {1}  {2}  {3}", page, counter, UrlTemp, ex.Message);
                                }                        }
                            //===========================================================================================
                        }
                        sr.Close();
                    }
                }
                Console.WriteLine("End");
            }