Encoding.UTF8 Property Gets an encoding for the UTF-8 format. [Visual Basic] Public Shared ReadOnly Property UTF8 As Encoding [C#] public static Encoding UTF8 {get;} [C++] public: __property static Encoding* get_UTF8(); [JScript] public static function get UTF8() : Encoding;The UTF8Encoding class encodes unicode characters using the UTF-8 encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This encoding supports all unicode character values, and can also be accessed as code page 65001.When the data to be converted is only available in sequential blocks (such as data read from a stream), an application may choose to use a Decoder or an Encoder to perform the conversion.
一个不错的汉字代码转换软件: MView Convert 可以把转换 UTF-8 编码的 文件转换为 GB 或其他编码的文件。GB->UTF8 (php) function gb2utf8($gb) { if(!trim($gb)) return $gb; $filename="gb2312.txt"; $tmp=file($filename); $codetable=array(); while(list($key,$value)=each($tmp)) $codetable[hexdec(substr($value,0,6))]=substr($value,7,6); $utf8=""; while($gb) { if (ord(substr($gb,0,1))>127) { $this=substr($gb,0,2); $gb=substr($gb,2,strlen($gb)); $utf8.=u2utf8(hexdec($codetable[hexdec(bin2hex($this))-0x8080])); } else { $gb=substr($gb,1,strlen($gb)); $utf8.=u2utf8(substr($gb,0,1)); } } $ret=""; for($i=0;$i<strlen($utf8);$i+=3) $ret.=chr(substr($utf8,$i,3)); return $ret; } function u2utf8($c) { for($i=0;$i<count($c);$i++) $str=""; if ($c < 0x80) { $str.=$c; } else if ($c < 0x800) { $str.=(0xC0 | $c>>6); $str.=(0x80 | $c & 0x3F); } else if ($c < 0x10000) { $str.=(0xE0 | $c>>12); $str.=(0x80 | $c>>6 & 0x3F); $str.=(0x80 | $c & 0x3F); } else if ($c < 0x200000) { $str.=(0xF0 | $c>>18); $str.=(0x80 | $c>>12 & 0x3F); $str.=(0x80 | $c>>6 & 0x3F); $str.=(0x80 | $c & 0x3F); } return $str; } ?> //将UTF-8字符串转为代码页为CodePage的AnsiString。 function UTF8ToAnsiString(utf8str:string; CodePage:integer):AnsiString; var i:integer; buffer:widestring; ch,c1,c2:byte;begin result:=''; i:=1; while i<=Length(utf8str) do begin ch:=byte(utf8str[i]); setlength(buffer,length(buffer)+1); if (ch and $80)=0 then //1-byte buffer[length(buffer)]:=widechar(ch) else begin if (ch AND $E0) = $C0 then begin // 2-byte inc(i); c1 := byte(utf8str[i]); buffer[length(buffer)]:=widechar((word(ch AND $1F) SHL 6) OR (c1 AND $3F)); end else begin // 3-byte inc(i); c1 := byte(utf8str[i]); inc(i); c2 := byte(utf8str[i]); buffer[length(buffer)]:=widechar( (word(ch AND $0F) SHL 12) OR (word(c1 AND $3F) SHL 6) OR (c2 AND $3F)); end; end; inc(i); end; //while i := WideCharToMultiByte(codePage, WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR, @buffer[1], -1, nil, 0, nil, nil); if i>1 then begin SetLength(Result, i-1); WideCharToMultiByte(codePage, WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR, @buffer[1], -1, @Result[1], i-1, nil, nil); end; end;
Gets an encoding for the UTF-8 format.
[Visual Basic]
Public Shared ReadOnly Property UTF8 As Encoding
[C#]
public static Encoding UTF8 {get;}
[C++]
public: __property static Encoding* get_UTF8();
[JScript]
public static function get UTF8() : Encoding;The UTF8Encoding class encodes unicode characters using the UTF-8 encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This encoding supports all unicode character values, and can also be accessed as code page 65001.When the data to be converted is only available in sequential blocks (such as data read from a stream), an application may choose to use a Decoder or an Encoder to perform the conversion.
文件转换为 GB 或其他编码的文件。GB->UTF8 (php)
function gb2utf8($gb)
{
if(!trim($gb))
return $gb;
$filename="gb2312.txt";
$tmp=file($filename);
$codetable=array();
while(list($key,$value)=each($tmp))
$codetable[hexdec(substr($value,0,6))]=substr($value,7,6); $utf8="";
while($gb)
{
if (ord(substr($gb,0,1))>127)
{
$this=substr($gb,0,2);
$gb=substr($gb,2,strlen($gb));
$utf8.=u2utf8(hexdec($codetable[hexdec(bin2hex($this))-0x8080]));
}
else
{
$gb=substr($gb,1,strlen($gb));
$utf8.=u2utf8(substr($gb,0,1));
}
} $ret="";
for($i=0;$i<strlen($utf8);$i+=3)
$ret.=chr(substr($utf8,$i,3)); return $ret;
} function u2utf8($c)
{
for($i=0;$i<count($c);$i++)
$str="";
if ($c < 0x80) {
$str.=$c;
}
else if ($c < 0x800) {
$str.=(0xC0 | $c>>6);
$str.=(0x80 | $c & 0x3F);
}
else if ($c < 0x10000) {
$str.=(0xE0 | $c>>12);
$str.=(0x80 | $c>>6 & 0x3F);
$str.=(0x80 | $c & 0x3F);
}
else if ($c < 0x200000) {
$str.=(0xF0 | $c>>18);
$str.=(0x80 | $c>>12 & 0x3F);
$str.=(0x80 | $c>>6 & 0x3F);
$str.=(0x80 | $c & 0x3F);
}
return $str;
}
?> //将UTF-8字符串转为代码页为CodePage的AnsiString。
function UTF8ToAnsiString(utf8str:string; CodePage:integer):AnsiString;
var
i:integer;
buffer:widestring;
ch,c1,c2:byte;begin
result:='';
i:=1;
while i<=Length(utf8str) do begin
ch:=byte(utf8str[i]);
setlength(buffer,length(buffer)+1);
if (ch and $80)=0 then //1-byte
buffer[length(buffer)]:=widechar(ch)
else begin
if (ch AND $E0) = $C0 then begin // 2-byte
inc(i);
c1 := byte(utf8str[i]);
buffer[length(buffer)]:=widechar((word(ch AND $1F) SHL 6) OR (c1 AND $3F));
end
else begin // 3-byte
inc(i);
c1 := byte(utf8str[i]);
inc(i);
c2 := byte(utf8str[i]);
buffer[length(buffer)]:=widechar(
(word(ch AND $0F) SHL 12) OR
(word(c1 AND $3F) SHL 6) OR
(c2 AND $3F));
end;
end;
inc(i);
end; //while
i := WideCharToMultiByte(codePage,
WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR,
@buffer[1], -1, nil, 0, nil, nil);
if i>1 then begin
SetLength(Result, i-1);
WideCharToMultiByte(codePage,
WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR,
@buffer[1], -1, @Result[1], i-1, nil, nil);
end;
end;
http://www.csdn.net/filebbs/read_topic.asp?id=981