我想抓取http://www.travelzen.com.cn/这个站的国际机票信息,思路如下:
第一步抓取flightLoading.php
$url = "http://www.travelzen.com.cn/flightLoading.php";
$referer = 'http://www.travelzen.com.cn/index.php?type=flight';
$cookie_jar = dirname(__FILE__)."/cookie.txt";
$timeout = 10;
$data = "flightType=Int&selectedDeparture=&selectedReturn=&selectedBookId=&fromCity=HKG&toCity=BKK&adult=1&child=0&datefrom=2011-06-20&searchToken=&type=&dep1=&arr1=&dep2=&arr2=&queryToken=&tripType=2&flightClass=All";
$ch = curl_init();
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);
$contents = curl_exec($ch);
curl_close($ch); 第二步抓取初步显示页面
$url = "http://www.travelzen.com.cn/getFlightSchPreview.php";
$referer = "http://www.travelzen.com.cn/flightLoading.php";
$ch = curl_init();
$timeout = 10;
$data = "movePrevNext_dep=&movePrevNext_ret=&departureCityIATACode=HKG&departureCountryIATACode=HK&destinationCityIATACode=BKK&destinationCountryIATACode=TH&isReturn=0&typ=2&cityfromDomestic=香港&citytoDomestic=曼谷&datefrom=2011-06-20&dateto=2011-06-21&numOfAdult=1&numOfChild=0&flightClass=All&nonstop=&searchToken=";
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
$contents = curl_exec($ch);
curl_close($ch); 第三步抓取最后刷新的真实数据
$url = "http://www.travelzen.com.cn/flightResult.php";
$referer = "http://www.travelzen.com.cn/flightLoading.php";
$ch = curl_init();
$timeout = 10;
$data = "movePrevNext_dep=&movePrevNext_ret=&departureCityIATACode=HKG&departureCountryIATACode=HK&destinationCityIATACode=BKK&destinationCountryIATACode=TH&isReturn=0&typ=2&cityfromDomestic=香港&citytoDomestic=曼谷&datefrom=2011-06-20&dateto=2011-06-21&numOfAdult=1&numOfChild=0&flightClass=All&nonstop=&searchToken=";
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
$contents = curl_exec($ch);
curl_close($ch);整个过程,到第2步都可以正常抓回,但第3步最后只抓回了架子,里面的数据没抓回来,取消第2步,直接抓第3步也是一样的效果,希望life169能帮我看看,万分感谢,有高手有时间帮忙看看
http://topic.csdn.net/u/20110620/15/549ba42b-102d-4244-a7ee-216753f4329a.html
能解决,这个帖子的100分也一并送上。
第一步抓取flightLoading.php
$url = "http://www.travelzen.com.cn/flightLoading.php";
$referer = 'http://www.travelzen.com.cn/index.php?type=flight';
$cookie_jar = dirname(__FILE__)."/cookie.txt";
$timeout = 10;
$data = "flightType=Int&selectedDeparture=&selectedReturn=&selectedBookId=&fromCity=HKG&toCity=BKK&adult=1&child=0&datefrom=2011-06-20&searchToken=&type=&dep1=&arr1=&dep2=&arr2=&queryToken=&tripType=2&flightClass=All";
$ch = curl_init();
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);
$contents = curl_exec($ch);
curl_close($ch); 第二步抓取初步显示页面
$url = "http://www.travelzen.com.cn/getFlightSchPreview.php";
$referer = "http://www.travelzen.com.cn/flightLoading.php";
$ch = curl_init();
$timeout = 10;
$data = "movePrevNext_dep=&movePrevNext_ret=&departureCityIATACode=HKG&departureCountryIATACode=HK&destinationCityIATACode=BKK&destinationCountryIATACode=TH&isReturn=0&typ=2&cityfromDomestic=香港&citytoDomestic=曼谷&datefrom=2011-06-20&dateto=2011-06-21&numOfAdult=1&numOfChild=0&flightClass=All&nonstop=&searchToken=";
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
$contents = curl_exec($ch);
curl_close($ch); 第三步抓取最后刷新的真实数据
$url = "http://www.travelzen.com.cn/flightResult.php";
$referer = "http://www.travelzen.com.cn/flightLoading.php";
$ch = curl_init();
$timeout = 10;
$data = "movePrevNext_dep=&movePrevNext_ret=&departureCityIATACode=HKG&departureCountryIATACode=HK&destinationCityIATACode=BKK&destinationCountryIATACode=TH&isReturn=0&typ=2&cityfromDomestic=香港&citytoDomestic=曼谷&datefrom=2011-06-20&dateto=2011-06-21&numOfAdult=1&numOfChild=0&flightClass=All&nonstop=&searchToken=";
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
$contents = curl_exec($ch);
curl_close($ch);整个过程,到第2步都可以正常抓回,但第3步最后只抓回了架子,里面的数据没抓回来,取消第2步,直接抓第3步也是一样的效果,希望life169能帮我看看,万分感谢,有高手有时间帮忙看看
http://topic.csdn.net/u/20110620/15/549ba42b-102d-4244-a7ee-216753f4329a.html
能解决,这个帖子的100分也一并送上。
CURLOPT_COOKIEJAR 是发送的 cookie
CURLOPT_COOKIEFILE 是接收的 cookie
你当好弄反了
而且第三步是做什么用的,和第二步有区别?
注释掉CURLOPT_COOKIESESSION可以获取到机票信息列表,这个东西一般都不设置什么true,有什么cookie就直接请求到服务端算了。
<?php
$url = "http://www.travelzen.com.cn/flightLoading.php";
$referer = 'http://www.travelzen.com.cn/index.php?type=flight';
$cookie_jar = dirname(__FILE__)."/cookie.txt";
$timeout = 10;
$data = "flightType=Int&selectedDeparture=&selectedReturn=&selectedBookId=&fromCity=HKG&toCity=BKK&adult=1&child=0&datefrom=2011-06-20&searchToken=&type=&dep1=&arr1=&dep2=&arr2=&queryToken=&tripType=2&flightClass=All";
$ch = curl_init();
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_jar);
$contents = curl_exec($ch);
curl_close($ch); #第二步抓取初步显示页面
$url = "http://www.travelzen.com.cn/getFlightSchPreview.php";
$referer = "http://www.travelzen.com.cn/flightLoading.php";
$ch = curl_init();
$timeout = 10;
$data = "movePrevNext_dep=&movePrevNext_ret=&departureCityIATACode=HKG&departureCountryIATACode=HK&destinationCityIATACode=BKK&destinationCountryIATACode=TH&isReturn=0&typ=2&cityfromDomestic=香港&citytoDomestic=曼谷&datefrom=2011-06-20&dateto=2011-06-21&numOfAdult=1&numOfChild=0&flightClass=All&nonstop=&searchToken=";
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, $referer);
//注释掉
#curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
$contents = curl_exec($ch);
curl_close($ch);
echo $contents;
?>