我是想频繁搜索 搜狗的搜索结果
频繁请求之后,会进入验证码页面,我想模拟提交验证码,然后进入到跳转到解封的搜索结果页面
现在做到了 模拟提交验证码没问题了,但是我不会抓跳转地址,我抓到的地址返回的信息在下面……
实在是惭愧,对curl太不熟悉。
从curl的返回结果看,现在是这个
HTTP/1.1 100 ContinueHTTP/1.1 302 Moved Temporarily
Server: nginx
Date: Sat, 18 Mar 2017 03:55:28 GMT
Content-Type: text/html
Transfer-Encoding: chunked
Connection: keep-alive
Location: http://weixin.sogou.com/antispider/?from=%2fweixin%3Fie%3dutf8%26query%3d%E4%B9%90%E8%A7%86%E5%80%92%E9%97%AD%26type%3d2%26dp%3d1%26page%3d1HTTP/1.1 200 OK
Server: nginx
Date: Sat, 18 Mar 2017 03:55:28 GMT
Content-Type: text/html
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
X-Powered-By: PHP/5.3.3
Expires: Thu, 19 Nov 1981 08:52:00 GMT
Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
Pragma: no-cache
我用的代码如下
<?php
$cookie_file = "./tmp.cookie";
$header = getSogouHead();if (isset($_POST['dosubmit'])) {
$login_url = 'http://weixin.sogou.com/antispider/thank.php'; $url = "http://weixin.sogou.com/weixin?ie=utf8&query=%E4%B9%90%E8%A7%86%E5%80%92%E9%97%AD&type=2&dp=1&page=1"; //这个是我需要返回的页面 $antispider_url = 'http://weixin.sogou.com/antispider/?from=%2fweixin%3Fie%3dutf8%26query%3d%E4%B9%90%E8%A7%86%E5%80%92%E9%97%AD%26type%3d2%26dp%3d1%26page%3d1'; //这个是频繁刷新$url之后跳转到的页面 $formData = $_POST; unset($formData['dosubmit']);
//进行设置随机字符
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $login_url);
curl_setopt($curl, CURLOPT_HEADER, $header);
curl_setopt($curl, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $formData);
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$data = curl_exec($curl);
$info = curl_getinfo($curl);
curl_close($curl); //获取提交验证码返回结果end
// $url = ""; //如何获取上面代码跳转的地址然后给下面的代码打开呢……
// exit;
//这以上代码是对的。问题出在下一步怎么做上,如何去获取跳转回来的页面并且用curl打开
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $antispider_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $formData);
curl_setopt($ch, CURLOPT_HEADER, $header);
curl_setopt($ch, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
$output = curl_exec($ch);
$infos = curl_getinfo($ch);
curl_close($ch);
echo "<pre>";
print_r($infos);
print_r($output);
echo "</pre>";} else {
$time = time();
//初始化变量
$login_url = 'http://weixin.sogou.com/antispider/?from=%2fweixin%3Fie%3dutf8%26query%3d乐视倒闭%26type%3d2%26dp%3d1%26page%3d1'; $verify_code_url = 'http://weixin.sogou.com/antispider/util/seccode.php?tc=' . $time; $curl = curl_init();
$timeout = 5;
curl_setopt($curl, CURLOPT_URL, $login_url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($curl, CURLOPT_HEADER, $header);
curl_setopt($curl, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file); //获取COOKIE并存储
$contents = curl_exec($curl);
curl_close($curl);
//进行提取Form信息
$pregInput = '<input[\s\S]*?name=[\'|"](.*?)[\'|"][\s\S]*?value=[\'|"](.*?)[\'|"][\s\S]*?>';
preg_match_all($pregInput, $contents, $math);
$formData = array();
foreach ($math[1] as $key => $value) {
$formData[$value] = $math[2][$key];
} //print_R($formData); //取出验证码
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $verify_code_url);
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($curl, CURLOPT_HEADER, $header);
curl_setopt($curl, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$img = curl_exec($curl);
curl_close($curl); $fp = fopen("verifyCode.jpg", "w");
fwrite($fp, $img);
fclose($fp);}function getSogouHead()
{
$head = array(
'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding:gzip, deflate, sdch',
'Accept-Language:zh-CN,zh;q=0.8',
'Cache-Control:max-age=0',
'Connection:keep-alive',
'Host:weixin.sogou.com',
'Upgrade-Insecure-Requests:1',
);
return $head;
}
?>
<!DOCTYPE html>
<html>
<head>
<title>SOGOU</title>
<meta charset="utf8">
</head>
<body>
<form action="" method="POST">
<?php
foreach ($formData as $key => $value) {
?>
<input type="hidden" name="<?php echo $key ?>" value="<?php echo $value ?>">
<?php
}
?>
输入验证码: <input type="text" name="c"><img src="verifyCode.jpg">
<input type="hidden" name="tc" value="<?php echo $time; ?>">
<input type="submit" value="提交" name="dosubmit">
</form>
</body>
</html>
频繁请求之后,会进入验证码页面,我想模拟提交验证码,然后进入到跳转到解封的搜索结果页面
现在做到了 模拟提交验证码没问题了,但是我不会抓跳转地址,我抓到的地址返回的信息在下面……
实在是惭愧,对curl太不熟悉。
从curl的返回结果看,现在是这个
HTTP/1.1 100 ContinueHTTP/1.1 302 Moved Temporarily
Server: nginx
Date: Sat, 18 Mar 2017 03:55:28 GMT
Content-Type: text/html
Transfer-Encoding: chunked
Connection: keep-alive
Location: http://weixin.sogou.com/antispider/?from=%2fweixin%3Fie%3dutf8%26query%3d%E4%B9%90%E8%A7%86%E5%80%92%E9%97%AD%26type%3d2%26dp%3d1%26page%3d1HTTP/1.1 200 OK
Server: nginx
Date: Sat, 18 Mar 2017 03:55:28 GMT
Content-Type: text/html
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
X-Powered-By: PHP/5.3.3
Expires: Thu, 19 Nov 1981 08:52:00 GMT
Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
Pragma: no-cache
我用的代码如下
<?php
$cookie_file = "./tmp.cookie";
$header = getSogouHead();if (isset($_POST['dosubmit'])) {
$login_url = 'http://weixin.sogou.com/antispider/thank.php'; $url = "http://weixin.sogou.com/weixin?ie=utf8&query=%E4%B9%90%E8%A7%86%E5%80%92%E9%97%AD&type=2&dp=1&page=1"; //这个是我需要返回的页面 $antispider_url = 'http://weixin.sogou.com/antispider/?from=%2fweixin%3Fie%3dutf8%26query%3d%E4%B9%90%E8%A7%86%E5%80%92%E9%97%AD%26type%3d2%26dp%3d1%26page%3d1'; //这个是频繁刷新$url之后跳转到的页面 $formData = $_POST; unset($formData['dosubmit']);
//进行设置随机字符
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $login_url);
curl_setopt($curl, CURLOPT_HEADER, $header);
curl_setopt($curl, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $formData);
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$data = curl_exec($curl);
$info = curl_getinfo($curl);
curl_close($curl); //获取提交验证码返回结果end
// $url = ""; //如何获取上面代码跳转的地址然后给下面的代码打开呢……
// exit;
//这以上代码是对的。问题出在下一步怎么做上,如何去获取跳转回来的页面并且用curl打开
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $antispider_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $formData);
curl_setopt($ch, CURLOPT_HEADER, $header);
curl_setopt($ch, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
$output = curl_exec($ch);
$infos = curl_getinfo($ch);
curl_close($ch);
echo "<pre>";
print_r($infos);
print_r($output);
echo "</pre>";} else {
$time = time();
//初始化变量
$login_url = 'http://weixin.sogou.com/antispider/?from=%2fweixin%3Fie%3dutf8%26query%3d乐视倒闭%26type%3d2%26dp%3d1%26page%3d1'; $verify_code_url = 'http://weixin.sogou.com/antispider/util/seccode.php?tc=' . $time; $curl = curl_init();
$timeout = 5;
curl_setopt($curl, CURLOPT_URL, $login_url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($curl, CURLOPT_HEADER, $header);
curl_setopt($curl, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file); //获取COOKIE并存储
$contents = curl_exec($curl);
curl_close($curl);
//进行提取Form信息
$pregInput = '<input[\s\S]*?name=[\'|"](.*?)[\'|"][\s\S]*?value=[\'|"](.*?)[\'|"][\s\S]*?>';
preg_match_all($pregInput, $contents, $math);
$formData = array();
foreach ($math[1] as $key => $value) {
$formData[$value] = $math[2][$key];
} //print_R($formData); //取出验证码
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $verify_code_url);
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($curl, CURLOPT_HEADER, $header);
curl_setopt($curl, CURLOPT_USERAGENT, 'User-Agent:Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50');
curl_setopt($curl, CURLOPT_HEADER, 0);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$img = curl_exec($curl);
curl_close($curl); $fp = fopen("verifyCode.jpg", "w");
fwrite($fp, $img);
fclose($fp);}function getSogouHead()
{
$head = array(
'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding:gzip, deflate, sdch',
'Accept-Language:zh-CN,zh;q=0.8',
'Cache-Control:max-age=0',
'Connection:keep-alive',
'Host:weixin.sogou.com',
'Upgrade-Insecure-Requests:1',
);
return $head;
}
?>
<!DOCTYPE html>
<html>
<head>
<title>SOGOU</title>
<meta charset="utf8">
</head>
<body>
<form action="" method="POST">
<?php
foreach ($formData as $key => $value) {
?>
<input type="hidden" name="<?php echo $key ?>" value="<?php echo $value ?>">
<?php
}
?>
输入验证码: <input type="text" name="c"><img src="verifyCode.jpg">
<input type="hidden" name="tc" value="<?php echo $time; ?>">
<input type="submit" value="提交" name="dosubmit">
</form>
</body>
</html>
解决方案 »
- PHP的array是真正的数组吗
- php 由页面向数据库中添加信息,差一行
- 进来看一下在PHP中实现Cookies这种功能,怎么整?谢谢了!
- php只能做中小型的规模?
- 关于open_basedir restriction in effect...File(/XXX/XXX) is not within the allowed path的问题
- 网站的图片被应用,能否准确判断页面来源?
- 请教,配IIS+MYSQL+PHP5+MANTIS时出错,在线等
- form 里提交表单,窗口的问题
- 我再次发现自己很菜...问cookie类型
- php字符串转换的问题
- 求助php大神帮忙
- mysql 连接更新为mysqli或者pdo 求解
跟着跳,很重要!
但我没看到 CURLOPT_FOLLOWLOCATION
也就是他并没有通过 http 头设置 cookie但目标 url 中,可以看到 js 设置 cookie 的代码你没去搞清楚他的运行机制和流程,当然就出问题了