<?
include "jpgraph_gb2312.php";
$bm = new GB2312toUTF8;echo $bm->gb2utf8("中国");
?>
jpgraph_gb2312.php(120097字节)在jpgraph-1.16包中,低版本的没有jpgraph是一个很专业的统计图生成软件包。不过7625035字节的尺寸实在有些偏大,对于普通的应用有点“牛刀”了

解决方案 »

  1.   

    许多人都已经谈过此问题了!
    <?
    // Program by Donald,Milddragon Studio.
    // Email: wilddragon#sina.com
    // gb2312.txt请用google搜索下载//初始化gb2312--unicode数组对应表作为全程变量,以提高处理速度
    $____global_codetable=array();
    $____global_filename=pathinfo($_SERVER["SCRIPT_FILENAME"]);
    $____global_filename=$____global_filename["dirname"]."/gb2312.txt";
    $____global_tmp=file($____global_filename);
    while(list($key,$value)=each($____global_tmp))
    {
    if (strcmp($value{0},'#')!=0)
    $____global_codetable[hexdec(substr($value,2,4))]=substr($value,9,4);
    }
    reset($____global_tmp);
    while(list($key,$value)=each($____global_tmp))
    {
    if (strcmp($value{0},'#')!=0)
    $____global_codetable2[hexdec(substr($value,9,4))]=hexdec(substr($value,2,4));
    }
    unset($____global_filename);
    unset($____global_tmp);
    /*
    将带 &#x3F8F;&#x5354;格式的文本(可以包含其它ASCII字符)转换成gb2312格式的文本;
    可以用于XML编码的转换
    需要注意的是,函数不改变xml中关于编码的声明
    */
    function unicode2gb($un)
    {
    if(!trim($un))
    return $un;
    $gb="";
    global $____global_codetable2;
    while(strlen($un)>0)
    {
    $p=strpos($un,"&#");
    if ($p===FALSE)//串中已无unicode字符
    {
    $gb.=$un;
    return $gb;
    }
    else
    {
    if ($p!=0)//串中unicode字符前缀不是第一个字符
    {
    $gb.=substr($un,0,$p);
    $un=substr($un,$p);
    }
    $p=strpos($un,";");
    if ($p===FALSE)//此前缀非unicode前缀,串中已无unicode字符
    {
          $gb.=$un;
          return $gb;
    }
    else
    {
    $code=substr($un,2,$p-2);
    $un=substr($un,$p+1);
    if (strcasecmp($code{0},"x")==0)//unicode码16进制表示
    {
    $code=hexdec(substr($code,1));
    }else
    {
    $code=intval($code);
    }
            $code=0x8080|$____global_codetable2[$code];
    $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
    $gb.=chr($code & 0xFF);
    }
    }
    }
    return $gb;
    }/*
    将 gb2312格式的文本(可以包含其它ASCII字符)转化为 带 &#x3F8F;&#x5354;格式的unicode文本;
    可以用于XML编码的转换
    需要注意的是,函数不改变xml中关于编码的声明
    */
    function gb2unicode($gb)
    {
       if(!trim($gb))
          return $gb;
       $utf="";
       global $____global_codetable;
       while(strlen($gb)>0)
        {
         if (ord(substr($gb,0,1))>127)
            {
            $this=substr($gb,0,2);
            $gb=substr($gb,2);
            $code=$____global_codetable[hexdec(bin2hex($this))&0x7F7F];
            $utf.="&#x".$code.";";
            }
         else
            {
             $utf.=substr($gb,0,1);
             $gb=substr($gb,1);
            }
         }
       return $utf;
    }/*
    将utf8格式的文本转化为gb2312格式的文本;这与上述的unicode2gb不同,是二进制格式的转换
    */
    function utf82gb($utf8)
    {
       if(!trim($utf8))
          return $utf8;
       global $____global_codetable2;
       $gb="";
       while(strlen($utf8)>0)
        {
    $c=substr($utf8,0,1);
    $d=ord($c);
    if (($d&0x80) == 0)//1位
    {
    $gb.=$c;
    $utf8=substr($utf8,1);
    }
    else
    if (($d&0xC0)==0x80)//错位
    {
    $utf8=substr($utf8,1);
    }
    else
    if (($d&0xE0)==0xC0)//2位
    {
    $utf8=substr($utf8,2);
    }
    else
    if (($d&0xF0)==0xE0)//3位
    {
    $d1=ord($utf8{1}) & 0x3F;
    $d2=ord($utf8{2}) & 0x3F;
    $d=$d & 0x0F;
    $d=($d<<12) + ($d1 <<6) + $d2;
            $code=0x8080|$____global_codetable2[$d];
    $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
    $gb.=chr($code & 0xFF);
    $utf8=substr($utf8,3);
    }
    else
    if (($d&0xF8)==0xF0)//4位
    {
    $d1=ord($utf8{1}) & 0x3F;
    $d2=ord($utf8{2}) & 0x3F;
    $d3=ord($utf8{3}) & 0x3F;
    $d=$d & 0x07;
    $d=($d<<18) + ($d1 <<12) + ($d2 << 6) +$d3;
    //$code=0x8080+getgb($d);
            $code=0x8080|$____global_codetable2[$d];
    $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
    $gb.=chr($code & 0xFF);
    $utf8=substr($utf8,4);
    }
    else
    {
    $utf8=substr($utf8,1);
    }
        }
       return $gb;
    }/*
    将gb2312格式的文本转化为utf8格式的文本;这与上述的gb2unicode不同,是二进制格式的转换
    */
    function gb2utf8($gb)
    {
       if(!trim($gb))
          return $gb;
       global $____global_codetable;
       $utf8="";
       while(strlen($gb)>0)
       {
    if (ord(substr($gb,0,1))>127)
    {
            $code=substr($gb,0,2);
            $gb=substr($gb,2);
            //echo "gb=$code;";
            $code=bin2hex($code);
            //echo "code=$code;";
            $code=hexdec($code)&0x7F7F;
            //echo "newcode=".dechex($code);
            $code=$____global_codetable[$code];
            //echo "unicode=$code";
            $code=hexdec($code);
            //11位:6+5
            if (($code&0x7FF)==$code)
            {
             $utf8.=chr(0xC0|((($code&0x7C0)>>6)&0x3F));
             $utf8.=chr(0x80|($code&0x3F));
            }else
            //16位:12+4
            if (($code&0xFFFF)==$code)
            {
             $utf8.=chr(0xE0|((($code&0xF000)>>12)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
             $utf8.=chr(0x80|($code&0x3F));
             //echo "16位==$utf8;\n";
            }
            else
            //21位:18+3
            if (($code&0x1FFFFF)==$code)
            {
             $utf8.=chr(0xF0|((($code&0x1C0000)>>18)&0x3F));
             $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
             $utf8.=chr(0x80|($code&0x3F));
            }
            /*
            else
            //26位:24+2
            if (($code&0x3FFFFFF)==$code)
            {
             $utf8.=chr(0xF8|((($code&0x3000000)>>24)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
             $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
             $utf8.=chr(0x80|($code&0x3F));
            }
            else
            //31位:30+1
            if (($code&0x7FFFFFFF)==$code)
            {
             $utf8.=chr(0xFC|((($code&0x40000000)>>30)&0x3F));
             $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
             $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
             $utf8.=chr(0x80|($code&0x3F));
            }
            //36位
            else
            {
             //首字节全部作为前缀,无数据
             $utf8.=chr(0x80|((($code&0xC0000000)>>30)&0x3F));
             $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
             $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
             $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
             $utf8.=chr(0x80|($code&0x3F));
            }
            */
    }
    else
    {
    $utf8.=substr($gb,0,1);
    $gb=substr($gb,1);
    }
      }
      return $utf8;
    }
    ?>
      

  2.   

    网上有很多gbk对应的unicode库,不到100K,还包括解析我这里就有,需要的话,留下email
      

  3.   

    偶想学习,[email protected]
    先谢了.
      

  4.   

    [email protected]
    学习
    3ks