<?php
/**
 * OpenSource code of CurvePHP framework
 * Create by Curve WorkRoom Group at 2010
 * http://curvephp.xiao688.com/
 * 处理http协议交互内容
 */
class socket_http{

public $timeout=30;

protected $url;

protected $url_info=array();

protected $http_header=array();

protected $response_status=array();

protected $response_header=array();

protected $response_data;

protected $cookie_file='socket_http_cookie.txt';

public $save_cookie=true;

public $http_method='GET';

public $http_protocol='HTTP/1.1';

function __construct($userAgent='firefox'){
//这些初始化一些默认user-Agent设置
$defaultAgent=array(
'chrome'=>'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.44 Safari/534.7',
'firefox'=>'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12',
'IE'=>'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)'
);
if(array_key_exists($userAgent,$defaultAgent)){
$this->http_header['User-Agent']=$defaultAgent[$userAgent];
}
$this->http_header['Accept']='*/*';
$this->http_header['Accept']='text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$this->http_header['Accept-Language']='zh-cn,zh;q=0.5';
$this->http_header['Accept-Encoding']='deflate';
$this->http_header['Accept-Charset']='GB2312,utf-8;q=0.7,*;q=0.7';
$this->http_header['Keep-Alive']='115';
$this->http_header['Connection']='close'; }

function __get($variable){
if($variable=='data'){
return $this->response_data;
}
}

/**
 * 通过socket 进行一次url请求
 */
function fetch($url){
return $this->request($url);
}

解决方案 »

  1.   

    /**
     * 根据已设置的http请求头信息进行http请求,并将反回数据保存
     */
    protected function request($url){

    $url_info=parse_url($url);
    $url_info['port']=isset($url_info['port'])?$url_info['port']:'80';

    $socket= @fsockopen ($url_info['host'], $url_info['port']); $this->url=$url;
    $this->url_info=$url_info;

    if(!is_resource($socket)){return false;}

    $this->response_header=array();
    $this->response_status=array();
    $this->response_data='';

    //read cookie if it that save cookie
    if($this->save_cookie){
    if($cookie_file_content=system::get_file_contents($this->cookie_file)&&
    $cookie_file_arr=unserialize($cookie_file_content))
    {
    foreach($cookie_file_arr as $domain_cookie=>$domain_cookie_list){

    if(!preg_match('|'.$domain_cookie.'$|',$this->url_info['host'])){continue;}

    foreach($domain_cookie_list as $path_cookie=>$path_cookie_list){

    if(substr($this->url_info['path'],0,strlen($path_cookie))!=$path_cookie){continue;}

    foreach($path_cookie_list as $cookie_name=>$one_cookie){

    if($one_cookie[1]<time()){unset($cookie_file_arr[$domain_cookie][$path_cookie][$cookie_name]);$d_cookie=true;continue;}
    if($one_cookie[2]&&strpos($this->url,'https://')===false){continue;}
    $this->set_cookie($cookie_name,$one_cookie[0]);

    }
    if(!$cookie_file_arr[$domain_cookie][$path_cookie]){
    unset($cookie_file_arr[$domain_cookie][$path_cookie]);$d_cookie=true;
    }
    }
    if(!$cookie_file_arr[$domain_cookie]){
    unset($cookie_file_arr[$domain_cookie]);$d_cookie=true;
    }
    }

    if($d_cookie){
    $handle=fopen($this->cookie_file,'w');
    fwrite($handle,serialize($cookie_file_arr));
    fclose($handle);
    }
    }
    }

    $uri=$this->url_info['path'];
    if(isset($this->url_info['query'])){$uri.='?'.$this->url_info['query'];}
    if(!is_string($uri)){$uri='/';}

    $request=$this->http_method.' '.$uri.' '.$this->http_protocol."\r\n";
    $request.="Host: {$this->url_info['host']}\r\n";
    if($this->http_header){
    foreach($this->http_header as $key=>$value){
    $request.=$key.": ".$value."\r\n";
    }
    }
    $request.="\r\n"; @fputs ($socket, $request);

    $result='';


    while (!feof($socket)){
    $result .= fgets($socket,4096);
    }
    fclose($socket); $headerend = strpos($result,"\r\n\r\n"); if (is_bool($headerend))
    //return direct result if no http format
    {
    return $result;
    }
    else{

    $header=substr($result,0,$headerend); //return status information
    $status=substr($header,0,strpos($header,"\r\n")); $this->response_status['protocol']=substr($status,0,strpos($status,' '));
    $status=substr($status,strpos($status,' ')+1);
    $this->response_status['code']=substr($status,0,strpos($status,' '));
    $this->response_status['message']=substr($status,strpos($status,' ')+1); //return header information
    $header=explode("\r\n",substr($header,strpos($header,"\r\n")+2));
    foreach($header as $header_item){
    $header_key=substr($header_item,0,strpos($header_item,':'));
    $header_key_val=substr($header_item,strpos($header_item,':')+2);
    if($header_key=='Set-Cookie')
    {
    //处理cookie
    $a_cookie=array();
    $cookie_name='';
    $a_cookie_info=explode(';',$header_key_val);
    foreach($a_cookie_info as $cookie_info_val){
    list($cookie_set,$cookie_set_v)=explode('=',$cookie_info_val);

    $cookie_set=trim($cookie_set);

    if($cookie_set!='domain'&&$cookie_set!='path'){
    $a_cookie[$cookie_set]=$cookie_set_v;
    }
    if(!in_array($cookie_set,array('domain','path','expire','secure'))){
    $cookie_name=$cookie_set;
    }
    }

    $cookie_domain=isset($a_cookie['domain'])?$a_cookie['domain']:$this->url_info['host'];

    if(isset($a_cookie['path'])){
    if($a_cookie['path']{0}=='/'){
    $cookie_path=$a_cookie['path'];
    }else{
    $cookie_path=substr($this->url_info['path'],0,strrpos($this->url_info['path'],'/')).'/'.$a_cookie['path'];
    }
    }else{
    $cookie_path=$this->url_info['path'];
    }

    if(!isset($this->response_header['Set-Cookie'])){$this->response_header['Set-Cookie']=array();}
    $this->response_header['Set-Cookie'][$cookie_domain][$cookie_path][$cookie_name]=
    array($a_cookie[$cookie_name],strtotime($a_cookie['expire']),(boolean)$a_cookie['secure']);
    }
    else
    {
    $this->response_header[$header_key]=$header_key_val;
    }
    }

    //save cookie
    if($this->response_header['Set-Cookie']){
    if($cookie_file_content=system::get_file_contents($this->cookie_file)){
    $save_cookies=serialize(array_merge(unserialize($cookie_file_content),$this->response_header['Set-Cookie']));
    }else{
    $save_cookies=serialize($this->response_header['Set-Cookie']);
    }
    $handle=fopen($this->cookie_file,'w');
    fwrite($handle,$save_cookies);
    fclose($handle);
    } if(isset($this->response_header['Location'])|| isset($this->response_header['URI']))
    //redirect to new page
    {

    $redirect=isset($this->response_header['Location'])?$this->response_header['Location']:$this->response_header['URI'];

    $redirect=system::vitrul_to_real_url($this->url,$redirect);
    $this->fetch($redirect);

    }
    //process data
    elseif($this->response_status['code']=='200')
    {
    $this->response_data=substr($result,$headerend+4);

    if(isset($this->response_header['Transfer-Encoding'])){
    switch($this->response_header['Transfer-Encoding']){
    case 'chunked':
    $this->response_data=$this->get_chunked_data($this->response_data);
    }
    }

    }
    }
    return true;
    }

    /**
     * 读取text 内容字符集
     */
    public function get_data_charset(){
    if(isset($this->response_header['Content-Type'])&&preg_match('|text/.*|is',$this->response_header['Content-Type'])){

    if(preg_match('#charset=(.*?)([;\r\n]|$)#is',$this->response_header['Content-Type'],$match_charset))
    {
    $charset=$match_charset[1];
    }
    elseif(preg_match('|charset=(.*?)[;> \'"]|is',$this->response_data,$match_charset))
    {

    $charset=$match_charset[1];

    }
    elseif($this->http_header['Accept-Charset'])
    {

    $chs=explode(';',$this->http_header['Accept-Charset']);

    $chs=explode(',',$chs[0]);

    $charset=$chs[0];

    }else{
    $charset='utf-8';
    }
    return $charset;
    }
    }

    /**
     * 映射网页内容,代理网页
     */
    public function web_proxy($url,$charset=null,$ip=null){

    $this->save_cookie=true;

    if(is_null($ip)){$ip=rand(128,255).'.'.rand(1,255).'.'.rand(1,255).'.'.rand(1,255);}
    $this->http_header['X_FORWARDED_FOR']=$ip;

    $this->fetch($url);

    if(is_null($charset)){$page_data = $this->response_data;}
    else{ $page_data =  preg_replace('|(charset=)(.*?)([> \'"])|is','$1'.$charset.'$3',
    iconv($this->get_data_charset(), $charset.'//IGNORE', $this->response_data)
    );

    } $proxy_url=system::querystring_remove_arg($_SERVER['REQUEST_URI'],'url');
    $proxy_url.=is_bool(strpos($proxy_url,'?'))?'?url=':'&url=';

    if(preg_match_all('|(<link .*?href=["\']?)(.*?)([>"\' ])|is',$page_data,$match_href)){
    foreach($match_href[2] as $key=>$val){
    $cur_url=system::vitrul_to_real_url($url,$val);
    $page_data=str_replace($match_href[1][$key].$val.$match_href[3][$key],
    $match_href[1][$key].$cur_url.$match_href[3][$key],$page_data);
    }
    }

    if(preg_match_all('|(<a .*?href=["\']?)(.*?)([>"\' ])|is',$page_data,$match_href)){
    foreach($match_href[2] as $key=>$val){
    $cur_url=system::vitrul_to_real_url($url,$val);
    $page_data=str_replace($match_href[1][$key].$val.$match_href[3][$key],
    $match_href[1][$key].$proxy_url.urlencode($cur_url).$match_href[3][$key],$page_data);
    }
    }
    return $page_data;

    }
    /**
     * 以指定字符集返回内容
     */
    public function get_text_data($charset=null){ if(is_null($charset)){return $this->response_data;}

    return preg_replace('|(charset=)(.*?)([> \'"])|is','$1'.$charset.'$3',
    iconv($this->get_data_charset(), $charset.'//TRANSLIT', $this->response_data)
    );
    }
    /**
     * 加载完成后处理chunked data,这个data是数数的开始包括chunked定义,笑话不包括http头
     */
    public function get_chunked_data($data){
    $fp = 0;
    $outData = "";
    while ($fp < strlen($data)) {
    $rawnum = substr($data, $fp, strpos(substr($data, $fp), "\r\n") + 2);
    $num = hexdec(trim($rawnum));
    $fp += strlen($rawnum);
    $chunk = substr($data, $fp, $num);
    $outData .= $chunk;
    $fp += strlen($chunk);
    }
    return $outData;
    }

    /**
     * set http request method
     */
    public function set_method($method='GET')
    {
    $this->http_method=$method;
    }

    /**
     * set http header item
     */
    public function set_header($var,$info){
    $this->http_header[$var]=$info;
    }

    /**
     * set http request cookie
     */
    public function set_cookie($c_name,$c_value){
    if(preg_match('|[=,; \t\r\n\013\014]|',$c_name)){
     throw new Exception("Cookie names can not contain any of the folllowing '=,; \t\r\n\013\014'");return;
    }
    $this->http_header['Cookie'].=($this->http_header['Cookie']?'; ':'').$c_name.'='.urlencode($c_value);
    }
    }
    ?>