地址:http://www.chinaz.com/用aspx.net 2.0做一个采集这个网站的新闻的功能。如果分不够可以再补上!!

解决方案 »

  1.   

     public class PageSession:Engine.Net.IServerSession
        {
            PageServer m_pServer;
            PageLogger logger;
            #region IServerSession Members
            internal PageSession(string url, PageServer pageServer, PageLogger logWriter)
                : base()
            {
                m_UrlAddress = url;
                m_pServer = pageServer;
                logger = logWriter;
                if (pageServer.LogCommands)
                {
                    logger.AddText("开始处理URL地址:"+url+"\r\n");
                    logger.Flush();
                }
                m_SessionStart = DateTime.Now;
                m_LastDataTime = DateTime.Now;
                StartSession();
            }        private void StartSession()
            {
                //增加到服务器处理列表
                m_pServer.AddSession(this);
                try
                {
                    m_pServer.SnatchCount++;
                    //检查URL地址是否已处理过
                    if (m_pServer.OnValidate_UrlAddress(this.UrlAddress))
                    {                    BeginRecieveCmd();
                    }
                    else
                    {
                        EndSession();
                    }
                }
                catch (Exception x)
                {
                    OnError(x);
                }
            }
           
            private void EndSession()
            {
                m_pServer.RemoveUrl(m_UrlAddress);
                m_pServer.RemoveSession(this);            // 增加处理结束到日志
                if (m_pServer.LogCommands)
                {
                    logger.AddText(string.Format("处理{0}结束!--{1}\t\n",this.UrlAddress,DateTime.Now));                logger.Flush();
                }
              /*  if (m_pServer.Urls.Count <= 0)// 当URL中的列表处理完成,提示处理完成 
                {
                    m_pServer.OnMessage("抓取完成!");
                    m_pServer.OnCompleted();
                }*/
            }        private void OnError(object x)
            {
                try
                {
                    if (x is Exception)
                    {
                        m_pServer.OnSysError("", (Exception)x);
                        if (m_pServer.LogCommands)
                        {
                            // m_pLogWriter.AddEntry("Client aborted/disconnected",this.SessionID,this.RemoteEndPoint.Address.ToString(),"C");
                            logger.AddText("Client aborted/disconnected");
                        }                    EndSession();                    // Exception handled, return
                        return;
                    }              //  m_pServer.OnSysError("", x);
                }
                catch (Exception ex)
                {
                    m_pServer.OnSysError("", ex);
                }
            }
            private void BeginRecieveCmd()
            {            string PageStr = PageUtil.GetPageText(m_UrlAddress);     
              .............
              }写过一个通用的采集模块, 只要写正则,和sql就OK  方法是一个server类,一个session类 server类用来爬取列表,session类用来处理页面,发页面处理完成,从列表中移除URL