<?php
$str = '
hello HTTP/1.1 200 OK Date: Tue, 23 Mar 2004 19:29:43 GMT Server: Apache/1.3.22 Set-Cookie: BAIDUID=6E67AA72C34CD67E19FA4F0C8A58C9CB; expires=Tue, 23-Mar-34 19:29:43 GMT; path=/; domain=.baidu.com Cache-Control: max-age=86400 Expires: Wed, 24 Mar 2004 19:29:43 GMT Last-Modified: Wed, 17 Mar 2004 18:05:00 GMT ETag: "3979-11ae-4058934c" Accept-Ranges: bytes Content-Length: 4526 Connection: close Content-Type: text/html
';
$reg = '|HTTP/1.1 200 OK Date: Tue, 23 Mar 2004 19:29:43 GMT Server: Apache/1.3.22 Set-Cookie: BAIDUID=6E67AA72C34CD67E19FA4F0C8A58C9CB; expires=Tue, 23-Mar-34 19:29:43 GMT; path=/; domain=.baidu.com Cache-Control: max-age=86400 Expires: Wed, 24 Mar 2004 19:29:43 GMT Last-Modified: Wed, 17 Mar 2004 18:05:00 GMT ETag: "3979-11ae-4058934c" Accept-Ranges: bytes Content-Length: 4526 Connection: close Content-Type: text/html|';$abc = preg_replace($reg,"",$str);
echo $abc;
?>
$str = '
hello HTTP/1.1 200 OK Date: Tue, 23 Mar 2004 19:29:43 GMT Server: Apache/1.3.22 Set-Cookie: BAIDUID=6E67AA72C34CD67E19FA4F0C8A58C9CB; expires=Tue, 23-Mar-34 19:29:43 GMT; path=/; domain=.baidu.com Cache-Control: max-age=86400 Expires: Wed, 24 Mar 2004 19:29:43 GMT Last-Modified: Wed, 17 Mar 2004 18:05:00 GMT ETag: "3979-11ae-4058934c" Accept-Ranges: bytes Content-Length: 4526 Connection: close Content-Type: text/html
';
$reg = '|HTTP/1.1 200 OK Date: Tue, 23 Mar 2004 19:29:43 GMT Server: Apache/1.3.22 Set-Cookie: BAIDUID=6E67AA72C34CD67E19FA4F0C8A58C9CB; expires=Tue, 23-Mar-34 19:29:43 GMT; path=/; domain=.baidu.com Cache-Control: max-age=86400 Expires: Wed, 24 Mar 2004 19:29:43 GMT Last-Modified: Wed, 17 Mar 2004 18:05:00 GMT ETag: "3979-11ae-4058934c" Accept-Ranges: bytes Content-Length: 4526 Connection: close Content-Type: text/html|';$abc = preg_replace($reg,"",$str);
echo $abc;
?>
则
$text = preg_replace("|^.+Content-Type: text/html|","",$text);
如果是那样子就确实比较简单了.
继续求救~~~
curl函数虽然是可以,但是不是系统默认支持的函数库,也不好:(
表示数据体的长度,你自己分析一下
分析content length确实可以
不过有些网站可能用了类似 ob_start() ... 之类的函数
它的header返回的信息是 Transfer-Encoding: chunked ,而没有content length的!
get回来的网页内容每一小段中间会自己插入一个 1000 在里面,估计是1k个字节打印一个1000出来(有兴趣的网友可以试试).
不过这个东西把原来的网页排版都给搅乱了~~!