菜鸟一枚,刚学习用curl采集,碰到个挠头的问题,用file_get_contents()可以正常采集,而curl需要目标URL在浏览器中打开一次才能采集,不然就会HTTP/1.1 302 Found,Object moved请高手帮忙$url='http://m.xxx.com/info/Standings/2017-2018/5.htm';
function curlGet($url){
//$www=file_get_contents($url);//这行假如不注释掉,下面的curl可以正常采集
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
return curl_exec($ch);
curl_close($ch);
}
echo curlGet($url), "\n";
function curlGet($url){
//$www=file_get_contents($url);//这行假如不注释掉,下面的curl可以正常采集
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
return curl_exec($ch);
curl_close($ch);
}
echo curlGet($url), "\n";
解决方案 »
- 关于php随机函数问题
- 使用easyphp搭建一个环境,提示信息如下,求助
- PHP 中的IFRAME
- php中ifelse问题
- json_encode与json_decode是什么意思
- 如何让用户在下载时同时看到html网页?
- 为什么这样写的Session会无值呢
- IE5对js的支持不好?document.write和str.replace正则的js,在ie5里面显示失败,请问为何?
- 如何判断一个datetime字段的值比 2003-12-12 08:00:00 早 或者 晚 ???
- 谁有好一点的国外代理???谢谢
- 这个正则,^$为什么在中间,表示什么含义
- xampp安装成功但是访问phpmyadmin失败
同一个目标网站,换不同的子URL,file_get_contents可以正常用,cur必须要把新的子URL在浏览器里运行一次,cur才能采集,不然就跳转到目标站的404提示页面
同一个目标网站,换不同的子URL,file_get_contents可以正常用,cur必须要把新的子URL在浏览器里运行一次,cur才能采集,不然就跳转到目标站的404提示页面http://m.win007.com/info/Standings/2017-2018/36.htm&b=1
http://m.win007.com/info/Standings/2017-2018/37.htm
http://m.win007.com/info/Standings/2017-2018/39.htm
以上三个url你可以用curl试一下,目标URL绝对是可以打开的,但你先不要在浏览器中打开,直接用curl测你就会明白我的问题,而file_get_contents是可以正常用的
并且返回的http头为Array
(
[0] => HTTP/1.1 200 OK
[1] => Date: Sat, 10 Mar 2018 08:40:34 GMT
[2] => Content-Type: text/html; charset=utf-8
[3] => Content-Length: 21183
[4] => Connection: close
[5] => Vary: Accept-Encoding
[6] => Cache-Control: private
[7] => Server: Win007/DX231SV
[8] => Win007-Cache-aspx: HIT
)也没有什么302跳转
感觉简单的采集,也没少啥呀
$url='http://m.xxx.com/info/Standings/2017-2018/5.htm';
function curlGet($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
return curl_exec($ch);
curl_close($ch);
}
echo curlGet($url), "\n";
function curlGet($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
$res = curl_exec($ch);
curl_close($ch);
return $res;
}
echo curlGet($url), "\n";
<xmp>
HTTP/1.1 200 OK
Date: Sat, 10 Mar 2018 23:00:48 GMT
Content-Type: text/html; charset=utf-8
Content-Length: 21183
Connection: keep-alive
Vary: Accept-Encoding
Cache-Control: private
Server: Win007/DX231SV
Win007-Cache-aspx: HIT<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<script type="text/javascript" src="/script/BomHelper.js" ></script>
<link rel="stylesheet" href="/Css/main.css" /><link rel="stylesheet" href="/Css/info.css" />
<script src="/Script/jquery.min.js"></script>
<script src="/Script/jquery.selectui.min.js"></script>
<link href="/Css/selectui.css" rel="stylesheet" /><title></title>
<style type="text/css">
.stTool li{
width:25%;
float:left;
text-align:center;
padding:6px 0px;
}
.stTool li a{
color:#216283;
} .stTool li.on a{
color:#f00;
}
</style>
</head>
<body>
<header id="header">
<div class="infoToolbar">
<div id="seasonDiv">
赛季
<select id="selSeason" onchange="changeSeason(this)">
<option value="2017-2018" selected >2017-2018</option>
<option value="2016-2017" >2016-2017</option>
<option value="2015-2016" >2015-2016</option>
<option value="2014-2015" >2014-2015</option>
<option value="2013-2014" >2013-2014</option>
<option value="2012-2013" >2012-2013</option>
<option value="2011-2012" >2011-2012</option>
<option value="2010-2011" >2010-2011</option>
<option value="2009-2010" >2009-2010</option>
<option value="2008-2009" >2008-2009</option>
<option value="2007-2008" >2007-2008</option>
<option value="2006-2007" >2006-2007</option>
<option value="2005-2006" >2005-2006</option>
<option value="2004-2005" >2004-2005</option>
</select>
</div>
<a href="/info/SclassList/1.htm"><img style="margin-right:8px;" src="/images/left_icon.png" height="18"/>英格兰冠军联赛</a>
</div>
</header>
<div id="content">
<div id="tabHeader">
<ul class="infoTab">
<li class="on"><a href="#">积分</a></li>
<li><a href="/info/Fixture/2017-2018/37_87_0.htm">赛程</a></li>
<li><a href="/info/Handicap/2017-2018/37_87_0.htm">让球</a></li>
<li><a href="/info/OverUnder/2017-2018/37_87_0.htm">大小</a></li>
<li><a href="/info/Archer/2017-2018/37_87_0.htm">射手榜</a></li>
</ul>
</div>
<form method="post" action=" " id="mainForm">
<div class="aspNetHidden">
<input type="hidden" name="__EVENTTARGET" id="__EVENTTARGET" value="" />
<input type="hidden" name="__EVENTARGUMENT" id="__EVENTARGUMENT" value="" />
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJNjQ0ODIxMDgzD2QWCAIBDxYCHgtfIUl0ZW1Db3VudAIOFhxmD2QWAmYPFQMJMjAxNy0yMDE4CHNlbGVjdGVkCTIwMTctMjAxOGQCAQ9kFgJmDxUDCTIwMTYtMjAxNwAJMjAxNi0yMDE3ZAICD2QWAmYPFQMJMjAxNS0yMDE2AAkyMDE1LTIwMTZkAgMPZBYCZg8VAwkyMDE0LTIwMTUACTIwMTQtMjAxNWQCBA9kFgJmDxUDCTIwMTMtMjAxNAAJMjAxMy0yMDE0ZAIFD2QWAmYPFQMJMjAxMi0yMDEzAAkyMDEyLTIwMTNkAgYPZBYCZg8VAwkyMDExLTIwMTIACTIwMTEtMjAxMmQCBw9kFgJmDxUDCTIwMTAtMjAxMQAJMjAxMC0yMDExZAIID2QWAmYPFQMJMjAwOS0yMDEwAAkyMDA5LTIwMTBkAgkPZBYCZg8VAwkyMDA4LTIwMDkACTIwMDgtMjAwOWQCCg9kFgJmDxUDCTIwMDctMjAwOAAJMjAwNy0yMDA4ZAILD2QWAmYPFQMJMjAwNi0yMDA3AAkyMDA2LTIwMDdkAgwPZBYCZg8VAwkyMDA1LTIwMDYACTIwMDUtMjAwNmQCDQ9kFgJmDxUDCTIwMDQtMjAwNQAJMjAwNC0yMDA1ZAIDDxYCHwBmZAIEDxYCHwACGBYwAgEPZBYCZg8VCgExByNDQ0NDRkYG54u86ZifAjM2AjIzATcBNgI2MwIzMgI3NmQCAg9kFgJmDxUKATIHI0NDQ0NGRgzljaHov6rlpKvln44CMzYCMjIBNwE3AjU1AjMwAjczZAIDD2QWAmYPFQoBMwcjMDBDQ0NDD+mYv+aWr+mhv+e7tOaLiQIzNgIyMAE5ATcCNjACMzMCNjlkAgQPZBYCZg8VCgE0ByMwMENDQ0MJ5a+M5YuS5aeGAjM3AjE5AjExATcCNjQCMzkCNjhkAgUPZBYCZg8VCgE1ByMwMENDQ0MJ5b635q+U6YOhAjM2AjE2AjEzATcCNTUCMzQCNjFkAgYPZBYCZg8VCgE2ByMwMENDQ0MP57Gz5b635bCU5pav5aChAjM3AjE4ATcCMTICNTMCMzUCNjFkAgcPZBYCZg8VCgE3AA/luIPph4zmlq/miZjln44CMzcCMTUCMTMBOQI1MwI0MgI1OGQCCA9kFgJmDxUKATgADOaZrumbt+aWr+mhvwIzNwIxNAIxNQE4AjQ2AjM3AjU3ZAIJD2QWAmYPFQoBOQAS6LCi6I+y5bCU5b636IGU6ZifAjM2AjE3ATUCMTQCNDkCNDMCNTZkAgoPZBYCZg8VCgIxMAAM57Gz5bCU5rKD5bCUAjM3AjE0AjEzAjEwAjQ0AjM3AjU1ZAILD2QWAmYPFQoCMTEAD+W4g+S8pueJueemj+W+twIzNgIxNAIxMQIxMQI1MgI0MgI1M2QCDA9kFgJmDxUKAjEyAA/kvIrmma7mlq/nu7TlpYcCMzYCMTUBNwIxNAI0NwI0NAI1MmQCDQ9kFgJmDxUKAjEzAAnliKnlhbnogZQCMzcCMTQBOAIxNQI0OQI1MQI1MGQCDg9kFgJmDxUKAjE0AAnor7rnu7TlpYcCMzYCMTICMTICMTICMzcCNDECNDhkAg8PZBYCZg8VCgIxNQAP6K+65LiB5rGJ5qOu5p6XAjM2AjEzATUCMTgCNDMCNTQCNDRkAhAPZBYCZg8VCgIxNgAV5aWz546L5YWs5Zut5beh5ri46ICFAjM2AjExAjEwAjE1AjQwAjUyAjQzZAIRD2QWAmYPFQoCMTcACeiwouWRqOS4iQIzNwE4AjE0AjE1AjM5AjUxAjM4ZAISD2QWAmYPFQoCMTgACei1q+WwlOWfjgIzNgE4AjEyAjE2AjUwAjU2AjM2ZAITD2QWAmYPFQoCMTkABumbt+S4gQIzNgE4AjEyAjE2AjQyAjUxAjM2ZAIUD2QWAmYPFQoCMjAACeWNmuWwlOmhvwIzNwE4AjEyAjE3AjMyAjU2AjM2ZAIVD2QWAmYPFQoCMjEADOW3tOaBqeaWr+WIqQIzNgE3AjExAjE4AjM2AjUzAjMyZAIWD2QWAmYPFQoCMjIHI0IxQTdBNwnkvK/mmI7nv7ACMzcBOAE2AjIzAjI1AjU3AjMwZAIXD2QWAmYPFQoCMjMHI0IxQTdBNwbkvK/pob8CMzYBNwE5AjIwAjI2AjYyAjMwZAIYD2QWAmYPFQoCMjQHI0IxQTdBNwnmoZHlvrflhbACMzcBNQIxMwIxOQIzOAI2NgIyOGQCBQ8WAh8AAgMWBmYPZBYCZg8VAgcjQ0NDQ0ZGDOWNh+e6p+eQg+mYn2QCAQ9kFgJmDxUCByMwMENDQ0MM5Y2H57qn6ZmE5YqgZAICD2QWAmYPFQIHI0IxQTdBNwzpmY3nuqfnkIPpmJ9kZIU1wuvYvh4xGJ3qg4TdNYhRtDXB" />
</div><script type="text/javascript">
//<![CDATA[
var theForm = document.forms['mainForm'];
if (!theForm) {
theForm = document.mainForm;
}
function __doPostBack(eventTarget, eventArgument) {
if (!theForm.onsubmit || (theForm.onsubmit() != false)) {
theForm.__EVENTTARGET.value = eventTarget;
theForm.__EVENTARGUMENT.value = eventArgument;
theForm.submit();
}
}
//]]>
</script>
<div class="aspNetHidden"> <input type="hidden" name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="ED999FA7" />
<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEdAAXFVcvhbbq/GYBpVS4f6PGoqzo1juKxIWZQ0eQclIPq+vAFM6EDeOlcjHWhJ4h7ZKzWgjgXXbF3YWKiAfg3iw7H1tete3CFUzIY3iMFqDK68LuwPxRQlcgx8sVQI2/YZl0iLwUd" />
</div>
<div class="stTool">
<ul>
<li class='on'><a id="TotalType" href="javascript:__doPostBack('TotalType','')">总积分</a></li>
<li ><a id="HalfType" href="javascript:__doPostBack('HalfType','')">半场积分</a></li>
<li ><a id="HomeType" href="javascript:__doPostBack('HomeType','')">主场积分</a></li>
<li ><a id="AwayType" href="javascript:__doPostBack('AwayType','')">客场积分</a></li>
</ul>
</div>
</form>
<div>
<table id="mainTable" style="width:100%;" border="0" cellpadding="0" cellspacing="1" class="infoTable">
$url='http://m.xxxxxx.com/info/Standings/2017-2018/116.htm';
function curlGet($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
$res = curl_exec($ch);
curl_close($ch);
return $res;
}
echo curlGet($url), "\n"; HTTP/1.1 302 Found
Date: Sun, 11 Mar 2018 13:18:29 GMT
Content-Type: text/html; charset=utf-8
Content-Length: 174
Connection: keep-alive
Location: /ErrorInformation.aspx?aspxerrorpath=/info/Standings.aspx
Server: Win007/DX231SV
Win007-Cache-aspx: EXPIRED<html><head><title>Object moved</title></head><body>
<h2>Object moved to <a href="/ErrorInformation.aspx?aspxerrorpath=/info/Standings.aspx">here</a>.</h2>
</body></html>