Mar 8 12:51:26 10.10.255.5 haproxy[4843]: 124.240.42.88:54351[10.10.255.5:36672]->121.14.0.101:80[121.14.0.101:80] [08/Mar/2012:12:51:26.627] http_check http_image/<NOSRV> 6/0/9/10/26 200 1520 - - ---- 500
/500/9/0/0 0/0 {news.sohu.com|http://wei.sohu.com/s2011/wei/?20120308} "GET /upload/sogou_world_20100618/h_bg_tag74on.gif HTTP/1.1"
/500/9/0/0 0/0 {news.sohu.com|http://wei.sohu.com/s2011/wei/?20120308} "GET /upload/sogou_world_20100618/h_bg_tag74on.gif HTTP/1.1"
2.正则(可能性能不是很好)
3.php 按照行读取然后分割就可以了
Mar 8 12:51:26 10.10.255.5 haproxy[4843]: 124.240.42.88:54351[10.10.255.5:36672]->121.14.0.101:80[121.14.0.101:80] [08/Mar/2012:12:51:26.627] http_check http_image/<NOSRV> 6/0/9/10/26 200 1520 - - ---- 500
/500/9/0/0 0/0 {news.sohu.com|http://wei.sohu.com/s2011/wei/?20120308} "GET /upload/sogou_world_20100618/h_bg_tag74on.gif HTTP/1.1"
LOG;
preg_match('/\|(http[^}]*?)}/s',$log,$matche);
print_r($matche);不知道是不是你要的
awk我看了一点
Mar 8 12:51:26 10.10.255.5 haproxy[4843]: 124.240.42.88:54351[10.10.255.5:36672]->121.14.0.101:80[121.14.0.101:80] [08/Mar/2012:12:51:26.627] http_check http_image/<NOSRV> 6/0/9/10/26 200 1520 - - ---- 500
/500/9/0/0 0/0 {news.sohu.com|http://wei.sohu.com/s2011/wei/?20120308} "GET /upload/sogou_world_20100618/h_bg_tag74on.gif HTTP/1.1"
';preg_match('/http:\/\/(.*?)\s*}/s', $log, $m);
print_r($m[1]);
preg_match('"/[^\.\/]+\.[^\.\/]+$/', $s, $r);
if(empty($r[1])) continue;
@$res[$r[1]]++;
}
=============
这样匹配域名为什么不准确啊?
IP地址 总访问次数 访问主域名1/访问次数 访问主域名2/访问次数 访问主域名3/访问次数 ......
10.10.255.31 1000 Efly.cc/230 Abc.cn/240 Cdf.com/280 .......
$res = array();
for($i=0;$i<100*100*100;$i++){
$line = fgets($fp);
if(preg_match('/\|(http[^}]*?)}/s',$line,$match)){
//preg_match('/\[[\d.:]+\].+\[([\d.:]+)\]/', $s, $r);
@$res[$match[1]]++;
}
if(feof($fp)){
break;
}
}
if(empty($res)){
echo "do_100w error no match Ip\n";
}
arsort($res);
$res = array_slice($res,0,10000);
return $res;
}
function array_union($a,$b){
//
foreach($b as $key=>$value){
@$a[$key] += $value;
}
arsort($a);
$a = array_slice($a,0,10000);
return $a;
}$fp = fopen('haproxy.log.2','r');
$a=$b=array();
while(!feof($fp)){
$b = do_100w($fp);
$a = array_union($a,$b);
@$i++;
echo 'count to fp:'.(int) (ftell($fp)/(1024*1024)).'MB nums:'.($i).'X100W'."\n";
}
fclose($fp);
$rs_fp = fopen('result.txt3','w');
foreach($a as $key=>$value){
fwrite($rs_fp, $key.' - '.$value."\n");
}
fclose($rs_fp);?>