function CURL_BAIDU($url){
$url_2 = eregi_replace('^http://', '', $url);
$temp = explode('/', $url_2);
$host = array_shift($temp);
$url_2 = ''.implode('/', $temp);
$temp = explode(':', $host);
$host = $temp[0];
$ch = curl_init($url);
$headers_get_mail = array(
"Accept: */*",
"Referer: http://video.baidu.com/",
"Accept-Language: zh-cn",
"Accept-Encoding: gzip, deflate",
"User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)",
"Host: video.baidu.com",
"Connection: Keep-Alive",
"Cookie: BAIDUID=3D4E393F790D3295AFEE84C64465F597:FG=1"
);//curl_setopt($ch, CURLOPT_HTTPHEADER, $headers_get_mail);
//print_r($loginInfo);
//$this->getCode = null;
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT,0);
curl_setopt ($ch, CURLOPT_TIMEOUT,10);/*
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
*/
// curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $header = curl_exec($ch);
return $header;
}
用CURL的话10次采集到不到5次 但在本机上测试没问题 因为是放在海外服务器上用下面这个函数的命中率是百分之百 但是经常会卡死 让服务器挂掉 几乎两天就会有一次function get_content_by_socket($url){
$url = eregi_replace('^http://', '', $url);
$temp = explode('/', $url);
$host = array_shift($temp);
$url = ''.implode('/', $temp);
$temp = explode(':', $host);
$host = $temp[0];
$port = isset($temp[1]) ? $temp[1] : 80;
$fp = @fsockopen($host, 80,$errno, $errstr, 5) or die("Open ". $url ." failed");
$header = "GET /".$url ." HTTP/1.1\r\n";
$header .= "Accept: */*\r\n";
$header .= "Accept-Language: zh-cn\r\n";
// $header .= "Accept-Encoding: gzip, deflate\r\n";
$header .= "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; InfoPath.1; .NET CLR 2.0.50727)\r\n";
$header .= "Host: ". $host ."\r\n";
$header .= "Connection: Keep-Alive\r\n";
//$header .= "Cookie: cnzz02=2; rtime=1; ltime=1148456424859; cnzz_eid=56601755-\r\n\r\n";
$header .= "Connection: Close\r\n\r\n"; fwrite($fp, $header);
while (!feof($fp)) {
$contents .= fgets($fp, 8192);
}
fclose($fp);
return $contents;
}
主要是卡死的问题上 用CURL不会卡死 但命中率低的吓人 用fsockopen虽然高 服务器负荷不起所想我想通过设置CURL来改善命中率 但不知道如何设置 主要是因为放在国外服务器上 访问国内网站速度上的确是有些影响
set_time_limit(int )也可能是内存不够
ini_set('memory_limit','512M');