CURL抓取疑问 有高人熟悉CURL不,我想抓回www.shenzhenair.com的国内机票信息,这个shenzhenair会在第1次查询的时候生成一个strIdentity值,但我把这个值获取回来后再次查询就说这次查询已过期,有人知道解决方法不,急啊,分不够还可以再加。 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 这个strIdentity值应该是在使用过后就销毁了,LZ如果有办法可以在他销毁前得到这个值并且应用就可以了 把上一次查询的cookie也送过去试一下 我在获取strIdentity后就中止这个页面运行了,但还是不行。 一定要用CURL吗。具体要求还是听得有点模糊! 我是说通过curl把旧的cookie送过去,等你接到内容,那边已经把session改变了 LZ抓一下他在http://www.shenzhenair.com/common/flightSearch.do这个时候还传了什么参数 这个用抓包工具 如 httpwatch 看一下传了那些参数然后模拟post过去,要模拟浏览器,要伪造来路每一次都要抓cookie,传cookie 估计是没有携带session cookie,开启cookiejar属性试试看? 没搜索到合适的资料,有哪位大佬有抓取和发送COOKIES的代码呀,万分感谢呀 我在第1次抓取的时候加了curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);对应的路径也生成了COOKIES文件,在第2次抓取的时候curl_setopt($curl, CURLOPT_COOKIE, $cookie_jar);但结果还是显示本页面已失效,有高手能花点时间试试不,急 验证通过,及时结贴给分啊$url = "http://www.shenzhenair.com/common/flightSearch.do"; $referer = 'http://www.shenzhenair.com/common/flightSearch.do?operate=goLoadingPage&originalPage=index&lan=zh';$cookie_jar = dirname(__FILE__)."/cookie.txt"; $timeout = 1; $mudedi1 = urlencode("SZX;S-深圳");$mudedi2 = urlencode("PEK;B-北京");$data = "productId=00000000001&PRODUCT_ID=000000000448&orgDate10=2011-04-10&orgDate20=&allVar=&operate=goLoadingPage&platID=plat_1&originalPage=wshdpGN&flightConditionEntity.changedOrgCity=&flightConditionEntity.hbType=GN&flightConditionEntity.orgCity1[0]=".$mudedi1."&orgCity10=".$mudedi1."&flightConditionEntity.dstCity0=".$mudedi2."&flightConditionEntity.orgDate1[0]=2011-04-10&flightConditionEntity.orgDate2[0]=&flightConditionEntity.hcType=DC";$ch = curl_init(); curl_setopt($ch, CURLOPT_POSTFIELDS, $data);curl_setopt($ch, CURLOPT_POST, 1);curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_REFERER, $referer); curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);$contents = curl_exec($ch);curl_close($ch); if(preg_match('/<input\s+type="hidden"\s*name="strIdentity"\s+value="(.*?)"\/>/i',$contents,$arr)){ $strIdentity = $arr[1];}$ch = curl_init();$timeout = 1; $mudedi1 = "SZX;S-深圳";$mudedi2 = "PEK;B-北京";$mudedi1 = urlencode("SZX;S-深圳");$mudedi2 = urlencode("PEK;B-北京");$data = "operate=flightSearch&originalPage=index&flightConditionEntity.hcType=DC&flightConditionEntity.hbType=GN&flightConditionEntity.productId=&flightConditionEntity.orgDate=&flightConditionEntity.orgCity2=".$mudedi1."&flightConditionEntity.orgCity3=".$mudedi1."&flightConditionEntity.orgCity=&orgCity10=".$mudedi1."&orgDate10=2011-04-10&orgDate20=&transfer=0&flightConditionEntity.dstCity=&flightConditionEntity.dstCity0=".$mudedi2."&flightConditionEntity.dstCity1=&flightConditionEntity.dstCity2=&flightConditionEntity.dstCity3=&flightConditionEntity.isExchangeFreeTicket=false&isChangedDay=null&strIdentity=".$strIdentity;curl_setopt($ch, CURLOPT_POSTFIELDS, $data);curl_setopt($ch, CURLOPT_POST, 1);curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_REFERER, $referer); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);$contents = curl_exec($ch);echo $contents.'<br>';curl_close($ch); http://localhost/ 为什么要连接计算机?用户名和密码指的是什么? php+apache+mySQl php代码一段求教!!急! PHPexcel有人很熟悉吗?能小弟一个忙吧,谢谢 点击message_action的确定按钮后为何可以多次执行? 报告! PHP 文件上传提交后取不到 <input type="file" name="upfile"> 的值是什么原因. 我要php论坛好点的,单位用的。 php为什么安上后不能用 求PHP算法 求大神帮助 怎么抓取shenzhenair查询后的机票信息(二) 如何通过URL判断页面是否为HTML?
然后模拟post过去,要模拟浏览器,要伪造来路
每一次都要抓cookie,传cookie
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);
对应的路径也生成了COOKIES文件,在第2次抓取的时候
curl_setopt($curl, CURLOPT_COOKIE, $cookie_jar);
但结果还是显示本页面已失效,有高手能花点时间试试不,急
$referer = 'http://www.shenzhenair.com/common/flightSearch.do?operate=goLoadingPage&originalPage=index&lan=zh';
$cookie_jar = dirname(__FILE__)."/cookie.txt";
$timeout = 1;
$mudedi1 = urlencode("SZX;S-深圳");
$mudedi2 = urlencode("PEK;B-北京");
$data = "productId=00000000001&PRODUCT_ID=000000000448&orgDate10=2011-04-10&orgDate20=&allVar=&operate=goLoadingPage&platID=plat_1&originalPage=wshdpGN&flightConditionEntity.changedOrgCity=&flightConditionEntity.hbType=GN&flightConditionEntity.orgCity1[0]=".$mudedi1."&orgCity10=".$mudedi1."&flightConditionEntity.dstCity0=".$mudedi2."&flightConditionEntity.orgDate1[0]=2011-04-10&flightConditionEntity.orgDate2[0]=&flightConditionEntity.hcType=DC";
$ch = curl_init();
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);
$contents = curl_exec($ch);
curl_close($ch); if(preg_match('/<input\s+type="hidden"\s*name="strIdentity"\s+value="(.*?)"\/>/i',$contents,$arr)){
$strIdentity = $arr[1];
}
$ch = curl_init();
$timeout = 1;
$mudedi1 = "SZX;S-深圳";
$mudedi2 = "PEK;B-北京";
$mudedi1 = urlencode("SZX;S-深圳");
$mudedi2 = urlencode("PEK;B-北京");
$data = "operate=flightSearch&originalPage=index&flightConditionEntity.hcType=DC&flightConditionEntity.hbType=GN&flightConditionEntity.productId=&flightConditionEntity.orgDate=&flightConditionEntity.orgCity2=".$mudedi1."&flightConditionEntity.orgCity3=".$mudedi1."&flightConditionEntity.orgCity=&orgCity10=".$mudedi1."&orgDate10=2011-04-10&orgDate20=&transfer=0&flightConditionEntity.dstCity=&flightConditionEntity.dstCity0=".$mudedi2."&flightConditionEntity.dstCity1=&flightConditionEntity.dstCity2=&flightConditionEntity.dstCity3=&flightConditionEntity.isExchangeFreeTicket=false&isChangedDay=null&strIdentity=".$strIdentity;
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
$contents = curl_exec($ch);
echo $contents.'<br>';
curl_close($ch);