如果要抓取POST请求使用 <?php class HTTPRequest { var $mArray=array("GET","POST"); var $_fp; // HTTP socket var $_url; // full URL var $_host; // HTTP host var $_protocol; // protocol (HTTP/HTTPS) var $_uri; // request URI var $_method; // method var $_query; // query var $_port; // port
// scan url function _scan_url() { $req = $this->_url;
<?
/**
*在$file中截取从$from到$end之间的内容
*
*/function cut($file,$from,$end){ $message=explode($from,$file);
$message=explode($end,$message[1]);
return $message[0];
}
?>然后用str_replace()
或正则进一步处理
就行了
$r = file_get_contents('http://weather.tq121.com.cn/mapanel/index1.php?city='.urlencode("石家庄"));
$start=' <option value="伦敦">伦敦</option>
</select>
<span class="big-cn">';
$end='</span></td>
</tr>
</table>
<table width="166" height="5" border="0" cellpadding="0" cellspacing="0">
<tr>
<td width="160" align="center" valign="middle"><hr width="100%" size="1"></td>';$str2= strip_tags(cut($r,$start,$end)); //去掉标记
echo preg_replace("/([\r\n])[\s]+/", "", $str2);//去掉空格换行
?>
//用到上面提到的cut函数,重写一遍function cut($file,$from,$end){ $message=explode($from,$file);
$message=explode($end,$message[1]);
return $message[0];
}
?>还有要注意被抓取的网站的编码与程序的编码是不是一致
这个程序保存时用默认编码如果程序用UTF-8编码,需要替换上面第一行为下面的代码,因为,这里被抓取的网站使用GB2312编码,不是UTF-8header("Content-type:text/html;charset=utf-8;");
$r = file_get_contents('http://weather.tq121.com.cn/mapanel/index1.php?city='.urlencode(iconv("UTF-8", "GB2312", "石家庄")));
$r=iconv("GB2312","UTF-8" , $r);
<?php
class HTTPRequest
{
var $mArray=array("GET","POST");
var $_fp; // HTTP socket
var $_url; // full URL
var $_host; // HTTP host
var $_protocol; // protocol (HTTP/HTTPS)
var $_uri; // request URI
var $_method; // method
var $_query; // query
var $_port; // port
// scan url
function _scan_url()
{
$req = $this->_url;
$pos = strpos($req, '://');
$this->_protocol = strtolower(substr($req, 0, $pos));
$req = substr($req, $pos+3);
$pos = strpos($req, '/');
if($pos === false)
$pos = strlen($req);
$host = substr($req, 0, $pos);
if(strpos($host, ':') !== false)
{
list($this->_host, $this->_port) = explode(':', $host);
}
else
{
$this->_host = $host;
$this->_port = ($this->_protocol == 'https') ? 443 : 80;
}
$this->_uri = substr($req, $pos);
if($this->_uri == '')
$this->_uri = '/';
}
// constructor
function HTTPRequest($url,$method="GET",$query="")
{
$this->_method=(in_array($method,$this->mArray))?$method:"GET";
$this->_url = $url;
$this->_query = $query;
$this->_scan_url();
}
// download URL to string
function DownloadToString()
{
$crlf = "\r\n";
// generate request
$req = $this->_method ." ". $this->_uri . ' HTTP/1.0' . $crlf
. 'Host: ' . $this->_host . $crlf.(($this->_method=="POST")?("Content-type: application/x-www-form-urlencoded\r\nUser-Agent: Mozilla 4.0\r\nContent-length: ".strlen($this->_query)."\r\nAccept: */*\r\n\r\n$this->_query".$crlf.$crlf):$crlf);
// fetch
$this->_fp = fsockopen(($this->_protocol == 'https' ? 'ssl://' : '') . $this->_host, $this->_port, $errno, $errstr, 30);
fwrite($this->_fp, $req);
while(is_resource($this->_fp) && $this->_fp && !feof($this->_fp))
$response .= fread($this->_fp, 1024);
fclose($this->_fp);
// split header and body
$pos = strpos($response, $crlf . $crlf);
if($pos === false)
return($response);
$header = substr($response, 0, $pos);
$body = substr($response, $pos + 2 * strlen($crlf));
// parse headers
$headers = array();
$lines = explode($crlf, $header);
foreach($lines as $line)
if(($pos = strpos($line, ':')) !== false)
$headers[strtolower(trim(substr($line, 0, $pos)))] = trim(substr($line, $pos+1));
// redirection?
if(isset($headers['location']))
{
$http = new HTTPRequest($headers['location']);
return($http->DownloadToString($http));
}
else
{
return($body);
}
}
}//使用//下面是假设要提交的数据部分
$data = "login=" . urlencode("guiyan") . "&password=" . urlencode("123456")."&checkpass=".urlencode("Login!");
//例如'http://localhost/pass.php'是被请求的URL
$r = new HTTPRequest('http://localhost/pass.php',"POST",$data);
echo $r->DownloadToString(); //返回响应的字符串
?>
我正则很次