// 把得到的叶面内容保存到指定的文件中 $fp = fopen($destfile,"w+"); fwrite($fp, $result, strlen($result)); fclose($fp); $source= null; *********************************************************************************/ /** * @title Html Source class - api for getting/posting to websites * @author C.Small * @version 1.0 * * - HTTP 1.0 specs - http://www.w3.org/Protocols/rfc1945/rfc1945] * - Easier to read version at http://www.ics.uci.edu/pub/ietf/http/rfc1945.html */ Class HtmlSource { /* All vars are public */
// Common var $host; var $port = 80; var $page; var $request; var $httpversion; var $method = "GET"; var $timeout = 30;
var $striptags; var $showsource; var $strip_responseheader = true;
// Cookie var $cookies = array();
// GET var $getvars = array();
// POST var $postvars = array();
// Request fields var $accept; //format: Accept: */* var $accept_encoding;//format: gzip,deflate var $accept_language;//format: en-gb var $authorization; //format: username:password var $content_length; //format: 40 (for POST) var $content_type; //format: application/x-www-form-urlencoded var $date; //format: Date: Tue, 15 Nov 1994 08:12:31 GMT var $referer; //format: Referer: http://www.domain.com var $useragent; //format: User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)
function addPostVar($name,$value) { if (!empty($name) && !empty($value)) { $this->postvars[] =$name."=".$value; } } function addGetVar($name,$value) { if (!empty($name) && !empty($value)) { $this->getvars[] = $name."=".$value; } } function addCookie($name,$value) { if (!empty($name) && !empty($value)) { $this->cookies[] = $name."=".$value; } } function getSource() { // Error check //if (empty($this->httpversion)) //{ $this->httpversion = "1.0"; //} if (empty($this->method)) { $this->method = "GET"; }
// Make GET variables $vars = ""; $cookiehead = ""; if (sizeof($this->getvars) >0 && $this->method == "GET") { $vars = "?"; $vars .= join($this->getvars,"&"); // Knock last '&' off // Remove this..? if (sizeof($this->getvars) >1) { $vars = substr($vars,0,strlen($vars) -1); } } // Make POST variables if (sizeof($this->postvars) >0 && $this->method == "POST") { $vars = "\r\n"; $strpostvar = join($this->postvars,"&"); $vars .= $strpostvar; $vars .= "\r\n"; }
// Make Cookies if (sizeof($this->cookies) >0) { $cookiehead = "Cookie: "; $cookiehead .= join($this->cookies,"; "); $cookiehead .= "\r\n"; }
// Make up request. Host isn't strictly needed except IIS winges if ($this->method == "POST") { $this->content_length = strlen($strpostvar); $this->content_type = "application/x-www-form-urlencoded";
/*********************************************************************************
func_HtmlSource.inc 获取http协议页面的类。
使用举例:
$hostname = "www.mydomain.com"; // 主域地址
$srcfile = "/index.php"; //
$destfile = "/index.html"; // 准备输出到的文件名。 $source= new HtmlSource();
$source->port = 80;
$source->host = $hostname;
$source->page = $srcfile;
$source->method = "GET"; // 也可以改为POST
$source->httpversion = "1.0"; // 请一定要加双引号,否则在php4.2.1环境下数字的小数点会被自动截掉
$source->timeout = 10; // 连接超时设置
$source->striptags = false; // 是否去除字串中包含的任何 HTML 及 PHP 的标记字串标记,一般来说不需要
$source->showsource = false; // 影响到getSource()的调用结果,设为true则getSource()自动把内容显示出来。
$source->addGetVar("typeid",urlencode($typename)); // 脚本页面的传入参数,如果上面的method='POST',则这里相应的应该用addPostVar()
$result = $source->getSource();
// 把得到的叶面内容保存到指定的文件中
$fp = fopen($destfile,"w+");
fwrite($fp, $result, strlen($result));
fclose($fp); $source= null;
*********************************************************************************/
/**
* @title Html Source class - api for getting/posting to websites
* @author C.Small
* @version 1.0
*
* - HTTP 1.0 specs - http://www.w3.org/Protocols/rfc1945/rfc1945]
* - Easier to read version at http://www.ics.uci.edu/pub/ietf/http/rfc1945.html
*/
Class HtmlSource
{
/* All vars are public */
// Common
var $host;
var $port = 80;
var $page;
var $request;
var $httpversion;
var $method = "GET";
var $timeout = 30;
var $striptags;
var $showsource;
var $strip_responseheader = true;
// Cookie
var $cookies = array();
// GET
var $getvars = array();
// POST
var $postvars = array();
// Request fields
var $accept; //format: Accept: */*
var $accept_encoding;//format: gzip,deflate
var $accept_language;//format: en-gb
var $authorization; //format: username:password
var $content_length; //format: 40 (for POST)
var $content_type; //format: application/x-www-form-urlencoded
var $date; //format: Date: Tue, 15 Nov 1994 08:12:31 GMT
var $referer; //format: Referer: http://www.domain.com
var $useragent; //format: User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)
function addPostVar($name,$value)
{
if (!empty($name) && !empty($value))
{
$this->postvars[] =$name."=".$value;
}
}
function addGetVar($name,$value)
{
if (!empty($name) && !empty($value))
{
$this->getvars[] = $name."=".$value;
}
}
function addCookie($name,$value)
{
if (!empty($name) && !empty($value))
{
$this->cookies[] = $name."=".$value;
}
}
function getSource()
{
// Error check
//if (empty($this->httpversion))
//{
$this->httpversion = "1.0";
//}
if (empty($this->method))
{
$this->method = "GET";
}
// Make GET variables
$vars = "";
$cookiehead = "";
if (sizeof($this->getvars) >0 && $this->method == "GET")
{
$vars = "?";
$vars .= join($this->getvars,"&");
// Knock last '&' off
// Remove this..?
if (sizeof($this->getvars) >1)
{
$vars = substr($vars,0,strlen($vars) -1);
}
}
// Make POST variables
if (sizeof($this->postvars) >0 && $this->method == "POST")
{
$vars = "\r\n";
$strpostvar = join($this->postvars,"&");
$vars .= $strpostvar;
$vars .= "\r\n";
}
// Make Cookies
if (sizeof($this->cookies) >0)
{
$cookiehead = "Cookie: ";
$cookiehead .= join($this->cookies,"; ");
$cookiehead .= "\r\n";
}
// Make up request. Host isn't strictly needed except IIS winges
if ($this->method == "POST")
{
$this->content_length = strlen($strpostvar);
$this->content_type = "application/x-www-form-urlencoded";
$this->request = $this->method." ".$this->page." HTTP/".$this->httpversion."\r\n";
$this->request .= "Host: ".$this->host."\r\n";
$this->request .= $cookiehead;
$this->request .= $this->privateMakeRequest();
$this->request .= $vars."\r\n";
} else{
$this->request = $this->method." ".$this->page.$vars." HTTP/".$this->httpversion."\r\n";
$this->request .= "Host: ".$this->host."\r\n";
$this->request .= $cookiehead;
$this->request .= $this->privateMakeRequest();
$this->request .= "\r\n";
} // Open socket to URL
$sHnd = fsockopen ($this->host, $this->port, $errno, $errstr, $this->timeout);
fputs ($sHnd, $this->request);
// Get source
while (!feof($sHnd))
{
$result .= fgets($sHnd,128);
}
// Strip header
if ($this->strip_responseheader)
{
$result = $this->privateStripResponseHeader($result);
}
// Strip tags
if ($this->striptags)
{
$result = strip_tags($result);
}
// Show the source only
if ($this->showsource && !$this->striptags)
{
$result = htmlentities($result);
$result = nl2br($result);
}
return $result;
}
// Make up headers
function privateMakeRequest()
{
if (!empty($this->accept))
{
$result .= "Accept: ".$this->accept."\r\n";
}
if (!empty($this->accept_encoding))
{
$result .= "Accept-Encoding: ".$this->accept_encoding."\r\n";
}
if (!empty($this->accept_language))
{
$result .= "Accept-Language: ".$this->accept_language."\r\n";
}
if (!empty($this->authorization))
{
$result .= "Authorization: Basic ".base64_encode($this->authorization)."\r\n";
}
if (!empty($this->content_length))
{
$result .= "Content-length: ".$this->content_length."\r\n";
}
if (!empty($this->content_type))
{
$result .= "Content-type: ".$this->content_type."\r\n";
}
if (!empty($this->date))
{
$result .= "Date: ".$this->date."\r\n";
}
if (!empty($this->referer))
{
$result .= "Referer: ".$this->referer."\r\n";
}
if (!empty($this->useragent))
{
$result .= "User-Agent: ".$this->useragent."\r\n";
}
return $result;
}
function privateStripResponseHeader($source)
{
$headerend = strpos($source,"\r\n\r\n");
if (is_bool($headerend))
{
$result = $source;
} else{
$result = substr($source,$headerend+4,strlen($source) - ($headerend+4));
}
return $result;
}}
icewolf_li(冰狼) ( ) 信誉:99
了
以新浪的新闻为例 http://news.sina.com.cn内容为动态的; 上海专家周鼎新海南被害案告破
链接为:http://news.sina.com.cn/c/2003-11-05/17581063473s.shtml 这种技术如何实现呢?2003-11-05/17581063473s.shtml这个文件是动态产生的,那什么时候删除它呢?如果不删除那服务器受得了吗?