怎样将ascii的英文字符转成unicode的开这种格式? 搜出了的都是vc的。都用到了某个winapi。 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 ? C# 默认 即是 Unicode 编码 , 可以如下转换ASCII to unicode/////////////////////////////////System.IO.FileStream fs = new FileStream("F:\\test.txt",FileMode.Open );byte[] bt = new Byte[fs.Length];fs.Read(bt,0,bt.Length);Encoding CnEnconding = Encoding.GetEncoding("GB18030");string str = CnEnconding.GetString(bt);unicode to ascii///////////////////////////////////char[] chs = CnEnconding.GetChars("斯大林开发加速度"); class encode{ function utf82utf16($utf8) { // oh please oh please oh please oh please oh please if(function_exists('mb_convert_encoding')) { return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8'); } switch(strlen($utf8)) { case 1: // this case should never be reached, because we are in ASCII range // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return $utf8; case 2: // return a UTF-16 character from a 2-byte UTF-8 char // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0x07 & (ord($utf8{0}) >> 2)) . chr((0xC0 & (ord($utf8{0}) << 6)) | (0x3F & ord($utf8{1}))); case 3: // return a UTF-16 character from a 3-byte UTF-8 char // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr((0xF0 & (ord($utf8{0}) << 4)) | (0x0F & (ord($utf8{1}) >> 2))) . chr((0xC0 & (ord($utf8{1}) << 6)) | (0x7F & ord($utf8{2}))); } // ignoring UTF-32 for now, sorry return ''; } function encode($var,$iconv='1') { if($iconv == 1) { $var = iconv("gbk","utf-8",$var); //先将GBK的编码都转移为UTF-8,这样就统一了编码,解决了GBK转移为UTF-16或者GB2312时候的乱码情况! } // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT $ascii = ''; $strlen_var = strlen($var); /* * Iterate over every character in the string, * escaping with a slash or encoding to UTF-8 where necessary */ for ($c = 0; $c < $strlen_var; ++$c) { $ord_var_c = ord($var{$c}); switch (true) { case $ord_var_c == 0x08: $ascii .= '\b'; break; case $ord_var_c == 0x09: $ascii .= '\t'; break; case $ord_var_c == 0x0A: $ascii .= '\n'; break; case $ord_var_c == 0x0C: $ascii .= '\f'; break; case $ord_var_c == 0x0D: $ascii .= '\r'; break; case $ord_var_c == 0x22: case $ord_var_c == 0x2F: case $ord_var_c == 0x5C: // double quote, slash, slosh $ascii .= '\\'.$var{$c}; break; case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)): // characters U-00000000 - U-0000007F (same as ASCII) $ascii .= $var{$c}; break; case (($ord_var_c & 0xE0) == 0xC0): // characters U-00000080 - U-000007FF, mask 110XXXXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c + 1})); $c += 1; $utf16 = $this->utf82utf16($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; case (($ord_var_c & 0xF0) == 0xE0): // characters U-00000800 - U-0000FFFF, mask 1110XXXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2})); $c += 2; $utf16 = $this->utf82utf16($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; case (($ord_var_c & 0xF8) == 0xF0): // characters U-00010000 - U-001FFFFF, mask 11110XXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2}), ord($var{$c + 3})); $c += 3; $utf16 = $this->utf82utf16($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; case (($ord_var_c & 0xFC) == 0xF8): // characters U-00200000 - U-03FFFFFF, mask 111110XX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2}), ord($var{$c + 3}), ord($var{$c + 4})); $c += 4; $utf16 = $this->utf82utf16($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; case (($ord_var_c & 0xFE) == 0xFC): // characters U-04000000 - U-7FFFFFFF, mask 1111110X // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $char = pack('C*', $ord_var_c, ord($var{$c + 1}), ord($var{$c + 2}), ord($var{$c + 3}), ord($var{$c + 4}), ord($var{$c + 5})); $c += 5; $utf16 = $this->utf82utf16($char); $ascii .= sprintf('\u%04s', bin2hex($utf16)); break; } } return '"'.$ascii.'"'; }} PHP Error: Could not connect to database. Please try again later. 基础求助 递归 一个php网址采集的问题 求助一个简单的分类显示的代码 php与xml怎么结合运用 关于 php连接sybase的问题,帮忙解决。 帮我看看这两个告警是怎么回事 字符串截取的问题,请教高手! PHP的FTP函数怎么用??? 正则置换问题 请帮忙看一下 这个PHP中的函数的静态变量为什么在两次调用中值变了?
?
C# 默认 即是 Unicode 编码 ,
可以如下转换ASCII to unicode
/////////////////////////////////
System.IO.FileStream fs = new FileStream("F:\\test.txt",FileMode.Open );
byte[] bt = new Byte[fs.Length];
fs.Read(bt,0,bt.Length);
Encoding CnEnconding = Encoding.GetEncoding("GB18030");
string str = CnEnconding.GetString(bt);unicode to ascii
///////////////////////////////////
char[] chs = CnEnconding.GetChars("斯大林开发加速度");
function utf82utf16($utf8)
{
// oh please oh please oh please oh please oh please
if(function_exists('mb_convert_encoding')) {
return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
} switch(strlen($utf8)) {
case 1:
// this case should never be reached, because we are in ASCII range
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return $utf8; case 2:
// return a UTF-16 character from a 2-byte UTF-8 char
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr(0x07 & (ord($utf8{0}) >> 2))
. chr((0xC0 & (ord($utf8{0}) << 6))
| (0x3F & ord($utf8{1}))); case 3:
// return a UTF-16 character from a 3-byte UTF-8 char
// see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
return chr((0xF0 & (ord($utf8{0}) << 4))
| (0x0F & (ord($utf8{1}) >> 2)))
. chr((0xC0 & (ord($utf8{1}) << 6))
| (0x7F & ord($utf8{2})));
} // ignoring UTF-32 for now, sorry
return '';
} function encode($var,$iconv='1')
{
if($iconv == 1)
{
$var = iconv("gbk","utf-8",$var); //先将GBK的编码都转移为UTF-8,这样就统一了编码,解决了GBK转移为UTF-16或者GB2312时候的乱码情况!
}
// STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
$ascii = '';
$strlen_var = strlen($var); /*
* Iterate over every character in the string,
* escaping with a slash or encoding to UTF-8 where necessary
*/
for ($c = 0; $c < $strlen_var; ++$c) { $ord_var_c = ord($var{$c}); switch (true) {
case $ord_var_c == 0x08:
$ascii .= '\b';
break;
case $ord_var_c == 0x09:
$ascii .= '\t';
break;
case $ord_var_c == 0x0A:
$ascii .= '\n';
break;
case $ord_var_c == 0x0C:
$ascii .= '\f';
break;
case $ord_var_c == 0x0D:
$ascii .= '\r';
break; case $ord_var_c == 0x22:
case $ord_var_c == 0x2F:
case $ord_var_c == 0x5C:
// double quote, slash, slosh
$ascii .= '\\'.$var{$c};
break; case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
// characters U-00000000 - U-0000007F (same as ASCII)
$ascii .= $var{$c};
break; case (($ord_var_c & 0xE0) == 0xC0):
// characters U-00000080 - U-000007FF, mask 110XXXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c, ord($var{$c + 1}));
$c += 1;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break; case (($ord_var_c & 0xF0) == 0xE0):
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}));
$c += 2;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break; case (($ord_var_c & 0xF8) == 0xF0):
// characters U-00010000 - U-001FFFFF, mask 11110XXX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}));
$c += 3;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break; case (($ord_var_c & 0xFC) == 0xF8):
// characters U-00200000 - U-03FFFFFF, mask 111110XX
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}));
$c += 4;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break; case (($ord_var_c & 0xFE) == 0xFC):
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
$char = pack('C*', $ord_var_c,
ord($var{$c + 1}),
ord($var{$c + 2}),
ord($var{$c + 3}),
ord($var{$c + 4}),
ord($var{$c + 5}));
$c += 5;
$utf16 = $this->utf82utf16($char);
$ascii .= sprintf('\u%04s', bin2hex($utf16));
break;
}
} return '"'.$ascii.'"'; }}