http://www.google.cn/search?hl=zh-CN&q=query&btnG=Google+%E6%90%9C%E7%B4%A2&meta=&aq=f&oq=
就这个页面,php curl如何模拟浏览器进行抓去还有就是在抓去的过程中,google有防刷新机制,如果一个页面打开的次数太多,就会把IP屏蔽掉哪位提示下呢??谢谢了

解决方案 »

  1.   


    //user_agent   
    $useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)";   
    //伪造header   
    $header = array('Accept-Language: zh-cn','Connection: Keep-Alive','Cache-Control: no-cache');   
      
    $ch = curl_init();   
    //伪造REFERER,HEADER,USERAGENT     
    curl_setopt($ch,CURLOPT_HTTPHEADER,$header);   
    curl_setopt($ch, CURLOPT_USERAGENT, $useragent); 
      

  2.   

    lz试一下下面的代码
     class mycurl {
         protected $_useragent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1';
         protected $_url;
         protected $_followlocation;
         protected $_timeout;
         protected $_maxRedirects;
         protected $_cookieFileLocation = 'c:\cookies\curl_cookie.txt';
        protected $_post;
         protected $_postFields;
         protected $_referer ="http://www.google.com";
        
        
         protected $_session;
         protected $_webpage;
         protected $_includeHeader;
         protected $_noBody;
         protected $_status;
         protected $_binaryTransfer;
        
        
         public function __construct($url,$followlocation = true,$timeOut = 30,$maxRedirecs = 4,$binaryTransfer = false,$includeHeader = false,$noBody = false)
         {
             $this->_url = $url;
             $this->_referer = $referer;
             $this->_followlocation = $followlocation;
             $this->_timeout = $timeOut;
             $this->_maxRedirects = $maxRedirecs;
             $this->_noBody = $noBody;
             $this->_includeHeader = $includeHeader;
             $this->_binaryTransfer = $binaryTransfer;
          
         }
            
    public function setReferer($referer)
    {
        $this->_referer = $referer;
    }
        
         public function setCookiFileLocation($path)
         {
             $this->_cookieFileLocation = $path;
         }
        
        
         public function setPost ($postFields)
         {
            $this->_post = true;
            $this->_postFields = $postFields;
         }
        
        
         public function setUserAgent($userAgent)
         {
             $this->_useragent = $userAgent;
         }
     
     
         public function createCurl()
         {
            
             $s = curl_init();
            
             curl_setopt($s,CURLOPT_URL,$this->_url);
             curl_setopt($s,CURLOPT_HTTPHEADER,array('Except:'));
             curl_setopt($s,CURLOPT_TIMEOUT,$this->_timeout);
             curl_setopt($s,CURLOPT_MAXREDIRS,$this->_maxRedirects);
             curl_setopt($s,CURLOPT_RETURNTRANSFER,true);
             curl_setopt($s,CURLOPT_FOLLOWLOCATION,$this->_followlocation);
             curl_setopt($s,CURLOPT_COOKIEJAR,$this->_cookieFileLocation);
             curl_setopt($s,CURLOPT_COOKIEFILE,$this->_cookieFileLocation);
             if($this->_post)
             {
                 curl_setopt($s,CURLOPT_POST,true);
                 curl_setopt($s,CURLOPT_POSTFIELDS,$this->_postFields);
                
             }
            
             if($this->_includeHeader)
             {
                   curl_setopt($s,CURLOPT_HEADER,true);
             }
            
             if($this->_noBody)
             {
                 curl_setopt($s,CURLOPT_NOBODY,true);
             }
             if($this->_binary)
             {
                 curl_setopt($s,CURLOPT_BINARYTRANSFER,true);
             }         curl_setopt($s,CURLOPT_USERAGENT,$this->_useragent);
             curl_setopt($s,CURLOPT_REFERER,$this->_referer);
            
             $this->_webpage = curl_exec($s);
                       $this->_status = curl_getinfo($s,CURLINFO_HTTP_CODE); 
             curl_close($s);
            
            
         }
     
     
       public function getHttpStatus()
       {
           return $this->_status;
       }
     
      public function __tostring()
      {
          return $this->_webpage;
      }
     
     }
     
     //using the above class
     
     $instance = new mycurl('http://google.com',true,30,4);
     $instance->createCurl();
     if($imdb->getHttpStatus() == 200)
     {
         echo $imdb;
     }