http://www.google.cn/search?hl=zh-CN&q=query&btnG=Google+%E6%90%9C%E7%B4%A2&meta=&aq=f&oq=
就这个页面,php curl如何模拟浏览器进行抓去还有就是在抓去的过程中,google有防刷新机制,如果一个页面打开的次数太多,就会把IP屏蔽掉哪位提示下呢??谢谢了
就这个页面,php curl如何模拟浏览器进行抓去还有就是在抓去的过程中,google有防刷新机制,如果一个页面打开的次数太多,就会把IP屏蔽掉哪位提示下呢??谢谢了
//user_agent
$useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)";
//伪造header
$header = array('Accept-Language: zh-cn','Connection: Keep-Alive','Cache-Control: no-cache');
$ch = curl_init();
//伪造REFERER,HEADER,USERAGENT
curl_setopt($ch,CURLOPT_HTTPHEADER,$header);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
class mycurl {
protected $_useragent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1';
protected $_url;
protected $_followlocation;
protected $_timeout;
protected $_maxRedirects;
protected $_cookieFileLocation = 'c:\cookies\curl_cookie.txt';
protected $_post;
protected $_postFields;
protected $_referer ="http://www.google.com";
protected $_session;
protected $_webpage;
protected $_includeHeader;
protected $_noBody;
protected $_status;
protected $_binaryTransfer;
public function __construct($url,$followlocation = true,$timeOut = 30,$maxRedirecs = 4,$binaryTransfer = false,$includeHeader = false,$noBody = false)
{
$this->_url = $url;
$this->_referer = $referer;
$this->_followlocation = $followlocation;
$this->_timeout = $timeOut;
$this->_maxRedirects = $maxRedirecs;
$this->_noBody = $noBody;
$this->_includeHeader = $includeHeader;
$this->_binaryTransfer = $binaryTransfer;
}
public function setReferer($referer)
{
$this->_referer = $referer;
}
public function setCookiFileLocation($path)
{
$this->_cookieFileLocation = $path;
}
public function setPost ($postFields)
{
$this->_post = true;
$this->_postFields = $postFields;
}
public function setUserAgent($userAgent)
{
$this->_useragent = $userAgent;
}
public function createCurl()
{
$s = curl_init();
curl_setopt($s,CURLOPT_URL,$this->_url);
curl_setopt($s,CURLOPT_HTTPHEADER,array('Except:'));
curl_setopt($s,CURLOPT_TIMEOUT,$this->_timeout);
curl_setopt($s,CURLOPT_MAXREDIRS,$this->_maxRedirects);
curl_setopt($s,CURLOPT_RETURNTRANSFER,true);
curl_setopt($s,CURLOPT_FOLLOWLOCATION,$this->_followlocation);
curl_setopt($s,CURLOPT_COOKIEJAR,$this->_cookieFileLocation);
curl_setopt($s,CURLOPT_COOKIEFILE,$this->_cookieFileLocation);
if($this->_post)
{
curl_setopt($s,CURLOPT_POST,true);
curl_setopt($s,CURLOPT_POSTFIELDS,$this->_postFields);
}
if($this->_includeHeader)
{
curl_setopt($s,CURLOPT_HEADER,true);
}
if($this->_noBody)
{
curl_setopt($s,CURLOPT_NOBODY,true);
}
if($this->_binary)
{
curl_setopt($s,CURLOPT_BINARYTRANSFER,true);
} curl_setopt($s,CURLOPT_USERAGENT,$this->_useragent);
curl_setopt($s,CURLOPT_REFERER,$this->_referer);
$this->_webpage = curl_exec($s);
$this->_status = curl_getinfo($s,CURLINFO_HTTP_CODE);
curl_close($s);
}
public function getHttpStatus()
{
return $this->_status;
}
public function __tostring()
{
return $this->_webpage;
}
}
//using the above class
$instance = new mycurl('http://google.com',true,30,4);
$instance->createCurl();
if($imdb->getHttpStatus() == 200)
{
echo $imdb;
}