哪位仁兄能提供一个能够抓取网页内容,保存到本地的程序(源代码),包括抓取图片信息
谢谢了

解决方案 »

  1.   


            public void Download(string StrUrl, string FilePath)
            {            long lStartPos = 0;            int iNumber = StrUrl.LastIndexOf("/");
                string strFileName = StrUrl.Substring(iNumber + 1, StrUrl.Length - (iNumber + 1));
                string StrFileName = FilePath + "\\" + strFileName;
                if (strFileName == "")
                {
                    System.IO.Directory.CreateDirectory(StrFileName);
                    return;
                }
                System.IO.FileStream fs;            StrFileName = StrFileName.Replace("?", "问号");
                //打开上次下载的文件或新建文件
                if (System.IO.File.Exists(StrFileName))
                {                fs = System.IO.File.OpenWrite(StrFileName);                lStartPos = fs.Length;                fs.Seek(lStartPos, System.IO.SeekOrigin.Current);   //移动文件流中的当前指针            }            else
                {                fs = new System.IO.FileStream(StrFileName, System.IO.FileMode.Create);                lStartPos = 0;            }            //打开网络连接            try
                {                System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.HttpWebRequest.Create(StrUrl);                if (lStartPos > 0)                    request.AddRange((int)lStartPos);    //设置Range值                //向服务器请求,获得服务器回应数据流                System.IO.Stream ns = request.GetResponse().GetResponseStream();                byte[] nbytes = new byte[512];                int nReadSize = 0;                nReadSize = ns.Read(nbytes, 0, 512);                while (nReadSize > 0)
                    {                    fs.Write(nbytes, 0, nReadSize);                    nReadSize = ns.Read(nbytes, 0, 512);                }                fs.Close();                ns.Close();                //this.strFileList.AppendText("下载" + StrUrl + "完成!\n");            }            catch (Exception ex)
                {                fs.Close();               // this.strFileList.AppendText("下载过程中出现错误:" + ex.ToString());            }
            }    }
      

  2.   

    public class PageSession:Engine.Net.IServerSession
        {
            PageServer m_pServer;
            PageLogger logger;
            #region IServerSession Members
            internal PageSession(string url, PageServer pageServer, PageLogger logWriter)
                : base()
            {
                m_UrlAddress = url;
                m_pServer = pageServer;
                logger = logWriter;
                if (pageServer.LogCommands)
                {
                    logger.AddText("开始处理URL地址:"+url+"\r\n");
                    logger.Flush();
                }
                m_SessionStart = DateTime.Now;
                m_LastDataTime = DateTime.Now;
                StartSession();
            }        private void StartSession()
            {
                //增加到服务器处理列表
                m_pServer.AddSession(this);
                try
                {
                    //检查URL地址是否已处理过
                    if (m_pServer.OnValidate_UrlAddress(this.UrlAddress))
                    {
                        BeginRecieveCmd();
                    }
                    else
                    {
                        EndSession();
                    }
                }
                catch (Exception x)
                {
                    OnError(x);
                }
            }
           
            private void EndSession()
            {
                m_pServer.RemoveSession(this);            // 增加处理结束到日志
                if (m_pServer.LogCommands)
                {
                    logger.AddText(string.Format("处理{0}结束!--{1}\t\n",this.UrlAddress,DateTime.Now));                logger.Flush();
                }        }        private void OnError(object x)
            {
                try
                {
                    if (x is Exception)
                    {
                        m_pServer.OnSysError("", (Exception)x);
                        if (m_pServer.LogCommands)
                        {
                                              }                    EndSession();                    // Exception handled, return
                        return;
                    }              //  m_pServer.OnSysError("", x);
                }
                catch (Exception ex)
                {
                    m_pServer.OnSysError("", ex);
                }
            }
            private void BeginRecieveCmd()
            {            string PageStr = Tools.GetPage(m_UrlAddress);
              
                Regex re = new Regex(@"<[a|img|script|link]\s+[href|src]+=([""'])(?<url>[^'"">]+)\1\s*>(\s*(?<name>[^-\s|(<img\S\s</)]*)\s*-)?\s*(?<song>[^<]+)</", RegexOptions.IgnoreCase | RegexOptions.Singleline);
                MatchCollection mc = re.Matches(PageStr);
                foreach (Match m in mc)
                {                string url=m.Groups["url"].Value;
             //       m_pServer.OnShowUrls("URL地址:" + Tools.Extension(url) + "]\r\n");
                    if (!(url.ToLower().StartsWith("http://")))
                    {
                        url = Tools.GetUrlPath(UrlAddress)+"/" + url;
                    }
                       
                    m_pServer.Urls.Add(url);
           
                    m_pServer.OnShowUrls(m.Groups["song"].Value +" -- "+url+"\n");
                  //  m_pServer.OnShowUrls(m.Groups["name"].Value + "\n");
                   // m_pServer.OnShowUrls(m.Groups["song"].Value + "\n");
                    Tools.Download(url, GetDirPath(url));
                   // System.Threading.Thread.Sleep(100);
                }
        
                  
             
            }
            void Download()
            {
               
            }
            public string GetDirPath(string URL)
            {
                string RootPath = m_pServer.SavePath+"\\" + Tools.GetDomain(URL) + "\\";
                int c = 0;
                if (URL.ToLower().StartsWith("http://"))
                {
                    c = URL.Replace("http://", "").IndexOf("/") + 7;
                }
                if (URL.ToLower().StartsWith("ftp://"))
                {
                    c = URL.Replace("ftp://", "").IndexOf("/") + 6;
                }            string path = RootPath + URL.Substring(c, URL.Length - c - (URL.Length - URL.LastIndexOf("/"))).Replace("/", "\\");
                if (!System.IO.Directory.Exists(path))
                {
                    System.IO.Directory.CreateDirectory(path);
            
                }
                return path;
            }
            public void OnSessionTimeout()
            {
                EndSession();
            }        public DateTime SessionLastDataTime
            {
                get { return m_LastDataTime; }
            }
            public object Tag
            {
                get { return m_Tag; }            set { m_Tag = value; }
            }
            private DateTime m_SessionStart;
            private DateTime m_LastDataTime;
            private object m_Tag = null;
            private string m_UrlAddress = string.Empty;        public String UrlAddress
            {
                get { return m_UrlAddress; }
            }
            #endregion
        }