$str = "张三";//这两个的结果都一样:张三
print_r( iconv('', 'UTF-8', $str) );
print_r( unescape($str) );function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r);
$ar = $r[0];
//print_r($ar);
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u"):
$ar[$k] = iconv("UCS-2","UTF-8",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x"):
$ar[$k] = iconv("UCS-2","UTF-8",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#"):
//echo substr($v,2,-1)."\r\n";
$ar[$k] = iconv("UCS-2","UTF-8",pack("n",substr($v,2,-1)));
endif;
}
return join("",$ar);
}
此代码浏览器地址栏运行即可...那个是escape转换过的代码.
所以用unescape转回来就可以了.
首先把它转换成10进制,然后在对照unicode表转成值就可以了.
过程是这样滴.javascript:alert(String.fromCharCode(parseInt(9996, 16)));
$s = "%u9996%u9875";
echo(unescape($s));function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/%u.{4}|&#x.{4};|&#\d+;|.+/U",$str,$r);
$ar = $r[0];
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u")
$ar[$k] = iconv("UCS-2","GBK",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x")
$ar[$k] = iconv("UCS-2","GBK",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#") {
$ar[$k] = iconv("UCS-2","GBK",pack("n",substr($v,2,-1)));
}
}
return join("",$ar);
}
?>
$str = "张张三%u9996%u9875";print_r( unescape($str) );function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#x.{4};?|&#\d+;|.+/U",$str,$r);
$ar = $r[0];
print_r($ar);
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u"):
$ar[$k] = iconv("UCS-2","UTF-8",pack("H4",substr($v,-4)));
elseif(substr($v,0,3) == "&#x"):
$ar[$k] = iconv("UCS-2","UTF-8",pack("H4",substr($v,3,-1)));
elseif(substr($v,0,2) == "&#"):
echo substr($v,2,-1)."\r\n";
$ar[$k] = iconv("UCS-2","UTF-8",pack("n",substr($v,2,-1)));
endif;
}
return join("",$ar);
}
输出编码:utf-8。
+ (加号) 解析为空格。
%?? 解析为单字节字符。
%u???? 解析为 unicode 字符。<?phpfunction unescape($s) {
return preg_replace_callback('/%u([\da-fA-F]{4})/', 'unesacpe_unicode_match', urldecode($s));
}function unesacpe_unicode_match($matches) {
return iconv('utf-16', 'utf-8', pack('H4', $matches[1]));
}echo unescape('this+is+%u9996%u9875.%20'); // prints "this is 主页. ". ?>