下面是我写的PHP<?php
header("Content-Type: text/html;charset=UTF-8");
$url="http://www.google.com.hk/search?hl=zh-CN&q=site%3Awww.cctv.com";
$cookie_file=dirname(__FILE__)."/temp/google.txt";
$ch= curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_USERAGENT,$_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_COOKIEJAR,$cookie_file);
$contents= curl_exec($ch);
curl_close($ch);
?>可以获取到数据,Google的页面是UTF-8 我的页面是GBK,但是我用正则怎么都取不到title还有页面里的收录数量,求高手帮忙看下
header("Content-Type: text/html;charset=UTF-8");
$url="http://www.google.com.hk/search?hl=zh-CN&q=site%3Awww.cctv.com";
$cookie_file=dirname(__FILE__)."/temp/google.txt";
$ch= curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_USERAGENT,$_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_COOKIEJAR,$cookie_file);
$contents= curl_exec($ch);
curl_close($ch);
?>可以获取到数据,Google的页面是UTF-8 我的页面是GBK,但是我用正则怎么都取不到title还有页面里的收录数量,求高手帮忙看下
iconv('utf-8', 'GBK', $string);
至于正则//title
preg_match('/<title>([\D\d]*)<\/title>/i', $string, $title);
//counter
preg_match('/找到约\s*([\d,]+)\s*条结果/', $string, $counter);
//然后把counter的','去掉就是了。