http://www.google.cn/search?hl=zh-CN&q=query&btnG=Google+%E6%90%9C%E7%B4%A2&meta=&aq=f&oq=
就这个页面,php curl如何模拟浏览器进行抓去还有就是在抓去的过程中,google有防刷新机制,如果一个页面打开的次数太多,就会把IP屏蔽掉哪位提示下呢??谢谢了
就这个页面,php curl如何模拟浏览器进行抓去还有就是在抓去的过程中,google有防刷新机制,如果一个页面打开的次数太多,就会把IP屏蔽掉哪位提示下呢??谢谢了
解决方案 »
- 如何应用php+mysql开发一个crm库系统
- AJAX问题:怎么发送表单数据值?POST方法
- 请问如何实现在客户端缓存图片,我的网页总是每次都要从服务器上下载????
- php include()字符串,并解析字符串的问题
- PHP的memcached getversion报Notice
- php框架里的分页问题
- 如何利用动网的IP库来查找地址最快?
- 如何用php实现倒计时功能,并将时间在网页上显示出来?
- 难道歧视中国人?Zend studio 不支持中文?我的汉字为什么都是框框?
- 请问在安装pws时,出现“Microsoft Transaction Server未成功地安装。”如何解决?
- php中加载根目录外的文件地址应该怎么办?
- 弱弱的问一个输出的问题
//user_agent
$useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)";
//伪造header
$header = array('Accept-Language: zh-cn','Connection: Keep-Alive','Cache-Control: no-cache');
$ch = curl_init();
//伪造REFERER,HEADER,USERAGENT
curl_setopt($ch,CURLOPT_HTTPHEADER,$header);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
class mycurl {
protected $_useragent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1';
protected $_url;
protected $_followlocation;
protected $_timeout;
protected $_maxRedirects;
protected $_cookieFileLocation = 'c:\cookies\curl_cookie.txt';
protected $_post;
protected $_postFields;
protected $_referer ="http://www.google.com";
protected $_session;
protected $_webpage;
protected $_includeHeader;
protected $_noBody;
protected $_status;
protected $_binaryTransfer;
public function __construct($url,$followlocation = true,$timeOut = 30,$maxRedirecs = 4,$binaryTransfer = false,$includeHeader = false,$noBody = false)
{
$this->_url = $url;
$this->_referer = $referer;
$this->_followlocation = $followlocation;
$this->_timeout = $timeOut;
$this->_maxRedirects = $maxRedirecs;
$this->_noBody = $noBody;
$this->_includeHeader = $includeHeader;
$this->_binaryTransfer = $binaryTransfer;
}
public function setReferer($referer)
{
$this->_referer = $referer;
}
public function setCookiFileLocation($path)
{
$this->_cookieFileLocation = $path;
}
public function setPost ($postFields)
{
$this->_post = true;
$this->_postFields = $postFields;
}
public function setUserAgent($userAgent)
{
$this->_useragent = $userAgent;
}
public function createCurl()
{
$s = curl_init();
curl_setopt($s,CURLOPT_URL,$this->_url);
curl_setopt($s,CURLOPT_HTTPHEADER,array('Except:'));
curl_setopt($s,CURLOPT_TIMEOUT,$this->_timeout);
curl_setopt($s,CURLOPT_MAXREDIRS,$this->_maxRedirects);
curl_setopt($s,CURLOPT_RETURNTRANSFER,true);
curl_setopt($s,CURLOPT_FOLLOWLOCATION,$this->_followlocation);
curl_setopt($s,CURLOPT_COOKIEJAR,$this->_cookieFileLocation);
curl_setopt($s,CURLOPT_COOKIEFILE,$this->_cookieFileLocation);
if($this->_post)
{
curl_setopt($s,CURLOPT_POST,true);
curl_setopt($s,CURLOPT_POSTFIELDS,$this->_postFields);
}
if($this->_includeHeader)
{
curl_setopt($s,CURLOPT_HEADER,true);
}
if($this->_noBody)
{
curl_setopt($s,CURLOPT_NOBODY,true);
}
if($this->_binary)
{
curl_setopt($s,CURLOPT_BINARYTRANSFER,true);
} curl_setopt($s,CURLOPT_USERAGENT,$this->_useragent);
curl_setopt($s,CURLOPT_REFERER,$this->_referer);
$this->_webpage = curl_exec($s);
$this->_status = curl_getinfo($s,CURLINFO_HTTP_CODE);
curl_close($s);
}
public function getHttpStatus()
{
return $this->_status;
}
public function __tostring()
{
return $this->_webpage;
}
}
//using the above class
$instance = new mycurl('http://google.com',true,30,4);
$instance->createCurl();
if($imdb->getHttpStatus() == 200)
{
echo $imdb;
}