php函数 escape,unescape,unicodeToUtf8....请大家测试。

/*
* 用处：此函数用来逆转javascript的escape函数编码后的字符。
* 关键的正则查找我不知道有没有问题.
* 参数：javascript编码过的字符串。
* 如：unicodeToUtf8("%u5927")= 大
* 2005-12-10
*
*/
function phpUnescape($escstr){
  preg_match_all("/%u[0-9A-Za-z]{4}|%.{2}|[0-9a-zA-Z.+-_]+/",$escstr,$matches); //prt($matches);
  $ar = &$matches[0];
  $c = "";
  foreach($ar as $val){
if (substr($val,0,1)!="%") { //如果是字母数字+-_.的ascii码
    $c .=$val;
}
elseif (substr($val,1,1)!="u") { //如果是非字母数字+-_.的ascii码
$x = hexdec(substr($val,1,2));
    $c .=chr($x);
}
else { //如果是大于0xFF的码
$val = intval(substr($val,2),16);
if($val < 0x7F){        // 0000-007F
$c .= chr($val);
}elseif($val < 0x800) { // 0080-0800
$c .= chr(0xC0 | ($val / 64));
$c .= chr(0x80 | ($val % 64));
}else{                // 0800-FFFF
$c .= chr(0xE0 | (($val / 64) / 64));
$c .= chr(0x80 | (($val / 64) % 64));
$c .= chr(0x80 | ($val % 64));
}
}
  }
  return $c;
}/*
* 等同escape
* 来自网上。本文件其他几个函数都参考了这个函数里面的关键算法。
*/
function phpEscape($str,$encode="") {
  if ($encode=="" && !(function_exists("mb_detect_encoding"))) {
      echo "error You must enter the string's encoding or extend the php for mb_string";
  return ;
  }
  elseif($encode=="") {
  echo "Use mb_string function to detect the string's encoding <br/>";
      $encode = mb_detect_encoding($str);
  }
  preg_match_all("/[\xC0-\xE0].|[\xE0-\xF0]..|[\x01-\x7f]+/",$str,$r);
  //prt($r);
  $ar = $r[0];
  foreach($ar as $k=>$v) {
$ord = ord($v[0]);
    if( $ord<=0x7F)
      $ar[$k] = rawurlencode($v);
    elseif ($ord<0xE0) {
      $ar[$k] = "%u".bin2hex(iconv($encode,"UCS-2",$v));
    }
elseif ($ord<0xF0) {
      $ar[$k] = "%u".bin2hex(iconv($encode,"UCS-2",$v));
}
  }//foreach
  return join("",$ar);
}

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

echo.php<?php
/** 函数 listDirTree( $dirName = null )
* 功能对目录下所有文件及子目录下所有文件进行操作
* 参数 $dirName 目录名称
*/
function listDirTree( $dirName = null )
{global $tree,$dirTree;
if( empty( $dirName ) )
exit( "IBFileSystem: directory is empty." );
if( is_dir( $dirName ) )
{
if( $dh = opendir( $dirName ) )
{
//$tree = array();
while( ( $file = readdir( $dh ) ) !== false )
{
if( $file != "." && $file != ".." )
{
$filePath = $dirName . "/" . $file;
if( is_dir( $filePath ) )//为目录,递归
{
$dirTree[] = $filePath;
listDirTree( $filePath );
}
else//为文件,进行处理
{
//eval($this->callFunc);//
$tree[] = $filePath;
} //文件处理结束
}
}
closedir( $dh );
}
else
{
exit( "IBFileSystem: can not open directory $dirName.");
}
//return $tree;
}
else
{
exit( "IBFileSystem: $dirName is not a directory.");
}
}// end func listDirTreefunction echot(&$str,$width=400,$height=100){
echo "<TEXTAREA  style=\"width:{$width}px ;height:{$height}px;\">$str</TEXTAREA><br/>";
}
/**
* echothead
*/
function echothead()
{
echo '<TEXTAREA NAME="" ROWS="20" COLS="80">';
} // end func echothead/**
*  echotfoot
*/
function echotfoot()
{
    echo "</textarea>";
} // end funcfunction pr(&$row){
echo "<pre>";
print_R($row);
}
function echosql($q){
$q = str_replace("#_","mos",$q);
echo "$q <br/>\n";
}
function echoredsql($q){
$q = str_replace("#_","mos",$q);
echo "<font color=red> $q </font>\n";
}
function echored($str){
echo "<font color=red> $str </font>\n";
}/**
*  prt
*/
function prt($arr)
{
echothead();
print_R($arr);
echotfoot();
} // end funcfunction echojs($js){
echo"<script>$js</script>";
}
/**
* show table,$rows is get by $database->loadOjbectLIst();
* 这个函数只适用于用database类取到的结果集。$row[$i]->id。。
*/
function rowstable( &$rows , $heads = array() , $widths = array() , $tkeys = array() )
{$n=count($rows);
if ($n>0) {
if (count($heads)==0) {
$obj = &$rows[0];
$fields=get_object_vars($obj) ;
foreach ($fields as $fk=>$f) {
$heads[$fk]=$fk;
}
}    echo "<table class=\"dxtable\">\n";
/**
* 输出头部
*/
if (count($heads) >0) {
echo"<tr>\n";
$j = 0;
foreach ($heads as $key=>$v) {
echo "<th width=\"".$widths[$j]."%\">$v</th>\n";
$j++;
$keys[] = $key;
}
echo "</tr>\n";
}
/**
* 输出具体信息
*/
for ($i=0;$i<$n ;$i++ ) {
$row = &$rows[$i];
        echo "<tr>\n"; foreach($keys as $key){//遍历行列,keys是列数组的名字
//echo "$key=$value";
$t = false;
foreach($tkeys as $tk){
if ($key == $tk) {
echo "<td>";
echot($row->$key);
echo"</td>\n";
$t = true;
}
   }//for tkey
   if ($t===false) {
echo "<td>".$row->$key."</td>\n";
   }      }//foreach row
echo"</tr>\n";    }
echo"</table>\n";}
} // end func/**
* unicode ord ,FROM manual,从手册中拿的检查unicode编码的函数，用来替代ord
*/
function uniord($u) {
   $k = $u;
   $k1 = ord(substr($k, 0, 1));
   $k2 = ord(substr($k, 1, 1));
   //$k3 = ord(substr($k, 2, 1));echo "k1 = $k1 k2=$k2 k3=$k3 <br/>";
   return $k2 * 256 + $k1;}//end func uniordfunction uniord2($u) {
   $k = $u;
   $k1 = ord(substr($k, 0, 1));
   $k2 = ord(substr($k, 1, 1));
   $k3 = ord(substr($k, 2, 1));
   //echo "k1 = $k1 k2=$k2 k3=$k3 <br/>";
   return "$k1=$k2=$k3";
}//end func uniordfunction getmicrotime(){
list($usec, $sec) = explode(" ",microtime());
return ((float)$usec + (float)$sec);
/*example
$time_start = getmicrotime();
//code here
$time_end = getmicrotime();
$time = $time_end - $time_start;echo "Did nothing in $time seconds <BR>";*/}//end func /**
* mkdirp is used to instead of mkdir ,mkdirp can create deep multiple directory.
*/
function mkdirp($target) {
  // If the path already exists && is a directory, all is well.
  // If the path is not a directory, we've a problem.
  if (file_exists($target)) {
   if (!is_dir($target)) return false;
   else return true;
  }  // Attempting to create the directory may clutter up our display.
  if ( @mkdir($target) ) return true;  // If the above failed, attempt to create the parent node, then try again.
  if ( mkdirp(dirname($target)) ) return mkdirp($target);  return false;
}//</function mkdirp>/** * function cut is used to cut string between $from and $to */function cut2($str,$from,$to,$direct='out')
{
//echo "$str \n $from \n $to \n"; //$from = "\""; $to = "\""; $frompos = strpos($str,$from); $topos = strpos($str,$to,$frompos+strlen($from)); if($direct == 'in'){ $start = $frompos+strlen($from); $end = $topos-$start; $txt = substr($str,$start,$end); } else { $start = $frompos; $end = $topos+strlen($to)-$frompos; $txt = substr($str,$start,$end);      } return $txt;} // end func cut
<?
// utf8 - unicode
function utf8_unicode($c) {
  switch(strlen($c)) {
    case 1:
      return ord($c);
    case 2:
      $n = (ord($c[0]) & 0x3f) << 6;
      $n += ord($c[1]) & 0x3f;
      return $n;
    case 3:
      $n = (ord($c[0]) & 0x1f) << 12;
      $n += (ord($c[1]) & 0x3f) << 6;
      $n += ord($c[2]) & 0x3f;
      return $n;
    case 4:
      $n = (ord($c[0]) & 0x0f) << 18;
      $n += (ord($c[1]) & 0x3f) << 12;
      $n += (ord($c[2]) & 0x3f) << 6;
      $n += ord($c[3]) & 0x3f;
      return $n;
  }
}// unicode - utf8
function u2utf8($c) {
$str="";
if ($c < 0x80) {
$str.=$c;
} else if ($c < 0x800) {
$str.=chr(0xC0 | $c>>6);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x10000) {
$str.=chr(0xE0 | $c>>12);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x200000) {
$str.=chr(0xF0 | $c>>18);
$str.=chr(0x80 | $c>>12 & 0x3F);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
}
return $str;
}
?>
<?php
/**
* php版的javascript同名函数
**/
if(! function_exists("unescape")):
  function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/%u.{4}|&#x.{4};|&#\d+;|&#\d+?|.+/U",$str,$r);
$ar = $r[0];
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u")
$ar[$k] = iconv("UCS-2","GBK",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x")
$ar[$k] = iconv("UCS-2","GBK",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#") {
$ar[$k] = iconv("UCS-2","GBK",pack("n",preg_replace("/[^\d]/","",$v)));
}
}
return join("",$ar);
  }
endif;/**
* php版的javascript同名函数
**/
if(! function_exists("escape")):
  function escape($str) {
preg_match_all("/[\x80-\xff].|[\x01-\x7f]+/",$str,$r);
$ar = $r[0];
foreach($ar as $k=>$v) {
if(ord($v[0]) < 128)
$ar[$k] = rawurlencode($v);
else
$ar[$k] = "%u".bin2hex(iconv("GB2312","UCS-2",$v));
}
return join("",$ar);
  }
endif;?>
多谢。之所以写出这个臃肿的过程，原因1.对unicode转utf8的公式不理解2.看手册不仔细，关于位运算$a << $b Shift left（左移）将 $a 中的位向左移动 $b 次（每一次移动都表示“乘以 2”）。
$a >> $b Shift right（右移）将 $a 中的位向右移动 $b 次（每一次移动都表示“除以 2”）。
总算理解了这个转换公式。
我将唠叨老大的函数化入我的字符串互转函数之中。可能要提高一些效率。
编码互转。
/**
* 此函数将utf8编码字串转为unicode编码字符串
* 参数 str ,utf8编码的字符串。
* 参数 order,存放数据格式，是big endian还是little endian，默认的unicode存放次序是little.
* 如："大"的unicode码是 5927。little方式存放即为：27 59 。big方式则顺序不变：59 27.
* little 存放格式文件的开头均需有FF FE。big 存放方式的文件开头为 FE FF。否则。将会产生严重混乱。
* 本函数只转换字符，不负责增加头部。
* iconv转换过来的字符串是 big endian存放的。
* 返回 ucs2string , 转换过的字符串。
*/
function utf8ToUnicode($str,$order="little")
{
$ucs2string ="";
    $n=strlen($str);
    for ($i=0;$i<$n ;$i++ ) {
$v = $str[$i];
$ord = ord($v);
if( $ord<=0x7F){ //  0xxxxxxx
   if ($order=="little") {
    $ucs2string .= $v.chr(0);
}
else {
    $ucs2string .= chr(0).$v;
}
}
elseif ($ord<0xE0 && ord($str[$i+1])>0x80) {  //110xxxxx 10xxxxxx
$a = (ord($str[$i]) & 0x3F )<<6;
$b =  ord($str[$i+1]) & 0x3F ;
$ucsCode = dechex($a+$b); //echot($ucsCode);
$h = intval(substr($ucsCode,0,2),16);
$l  =  intval(substr($ucsCode,2,2),16);
if ($order=="little") {
    $ucs2string   .= chr($l).chr($h);
}
else {
     $ucs2string   .= chr($h).chr($l);
}
$i++;
}elseif ($ord<0xF0  && ord($str[$i+1])>0x80  && ord($str[$i+2])>0x80) { //1110xxxx 10xxxxxx 10xxxxxx
    $a = (ord($str[$i]) & 0x1F)<<12;
$b = (ord($str[$i+1]) & 0x3F )<<6;
$c =  ord($str[$i+2]) & 0x3F ;
$ucsCode = dechex($a+$b+$c); //echot($ucsCode);
$h = intval(substr($ucsCode,0,2),16);
$l  =  intval(substr($ucsCode,2,2),16);
if ($order=="little") {
    $ucs2string   .= chr($l).chr($h);
}
else {
     $ucs2string   .= chr($h).chr($l);
}
$i +=2;
}
    }
return $ucs2string;
} // end func/*
* 此函数将unicode编码字串转为utf8编码字符串
* 参数 str ,unicode编码的字符串。
* 参数 order ,unicode字串的存放次序，为big endian还是little endian.
* 返回 utf8string , 转换过的字符串。
*
*/
function unicodeToUtf8($str,$order="little")
{
$utf8string ="";
    $n=strlen($str);
    for ($i=0;$i<$n ;$i++ ) {
if ($order=="little") {
    $val = dechex(ord($str[$i+1])).dechex(ord($str[$i]));
}
else {
$val = dechex(ord($str[$i])).dechex(ord($str[$i+1]));
}
$val = intval($val,16); //由于上次的.连接，导致$val变为字符串，这里得转回来。
$i++; //两个字节表示一个unicode字符。
$c = "";
if($val < 0x7F){        // 0000-007F
$c .= chr($val);
}elseif($val < 0x800) { // 0080-0800
$c .= chr(0xC0 | ($val / 64));
$c .= chr(0x80 | ($val % 64));
}else{                // 0800-FFFF
$c .= chr(0xE0 | (($val / 64) / 64));
$c .= chr(0x80 | (($val / 64) % 64));
$c .= chr(0x80 | ($val % 64));
//echot($c);
}
$utf8string .= $c;
    }
return $utf8string;
} // end func
另外：我的phpescape函数对字符编码做了判断或者指定，所以从使用的角度来说应该更友好一些。/*
* 用处：此函数用来逆转javascript的escape函数编码后的字符。
* 关键的正则查找我不知道有没有问题.
* 参数：javascript编码过的字符串。
* 如：unicodeToUtf8("%u5927")= 大
* 2005-12-10
*
*/
function phpUnescape($escstr){
  preg_match_all("/%u[0-9A-Za-z]{4}|%.{2}|[0-9a-zA-Z.+-_]+/",$escstr,$matches); //prt($matches);
  $ar = &$matches[0];
  $c = "";
  foreach($ar as $val){
if (substr($val,0,1)!="%") { //如果是字母数字+-_.的ascii码
    $c .=$val;
}
elseif (substr($val,1,1)!="u") { //如果是非字母数字+-_.的ascii码
$x = hexdec(substr($val,1,2));
    $c .=chr($x);
}
else { //如果是大于0xFF的码
$val = intval(substr($val,2),16);
if($val < 0x7F){        // 0000-007F
$c .= chr($val);
}elseif($val < 0x800) { // 0080-0800
$c .= chr(0xC0 | ($val / 64));
$c .= chr(0x80 | ($val % 64));
}else{                // 0800-FFFF
$c .= chr(0xE0 | (($val / 64) / 64));
$c .= chr(0x80 | (($val / 64) % 64));
$c .= chr(0x80 | ($val % 64));
}
}
  }
  return $c;
}/*
* 等同escape
* 来自网上。本文件其他几个函数都参考了这个函数里面的关键算法。
*/
function phpEscape($str,$encode="") {
  if ($encode=="" && !(function_exists("mb_detect_encoding"))) {
      echo "error You must enter the string's encoding or extend the php for mb_string";
  return ;
  }
  elseif($encode=="") {
  echo "Use mb_string function to detect the string's encoding <br/>";
      $encode = mb_detect_encoding($str);
  }
  preg_match_all("/[\xC0-\xE0].|[\xE0-\xF0]..|[\x01-\x7f]+/",$str,$r);
  //prt($r);
  $ar = $r[0];
  foreach($ar as $k=>$v) {
$ord = ord($v[0]);
    if( $ord<=0x7F)
      $ar[$k] = rawurlencode($v);
    elseif ($ord<0xE0) {
      $ar[$k] = "%u".bin2hex(iconv($encode,"UCS-2",$v));
    }
elseif ($ord<0xF0) {
      $ar[$k] = "%u".bin2hex(iconv($encode,"UCS-2",$v));
}
  }//foreach
  return join("",$ar);
}