这个是HTML转义编码吧。var s0 = '中华人民共和国';
var s1 = '';
for (var i = 0; i < s0.length; i ++) {
s1 += '&#' + s0.charCodeAt(i) + ';';
}
alert('原串:'+s0+'/n/n/n编码:'+s1);
var s1 = '';
for (var i = 0; i < s0.length; i ++) {
s1 += '&#' + s0.charCodeAt(i) + ';';
}
alert('原串:'+s0+'/n/n/n编码:'+s1);
我试了一下,您这个编码应该是unicode
我用c++写了类似的,并将字拆成双字节,得到的编码,还是不对:
tryocr#139;#198;#82;#43;#119;#11;#119;#11;tryocr
bool Str2Utf8Dec( const TCHAR* src, int sLen, TCHAR* dst, int &dLen)
{
wstring unic;
ustring str, tmp;
TCHAR chr[10];
int utf8Len, i;
unsigned short code; str.assign( src, sLen );
USES_CONVERSION;
unic = T2W(str.c_str()); ustring utf8 = CMarkup::AToUTF8(str); utf8Len = unic.length(); for( i = 0; i < utf8Len; i++ )
{
code = (unsigned short)unic[i];
if( code & 0xff00 )
{
_stprintf( chr, _T("#%u;"), (code>>8)&0x00ff );
tmp += chr;
_stprintf( chr, _T("#%u;"), code&0x00ff );
}
else
{
chr[0] = (unsigned char)unic[i];
chr[1] = _T('\0');
}
tmp += chr;
}//
// ustring utf8 = CMarkup::AToUTF8(str);
//
// utf8Len = utf8.length();
//
// tmp = STR_EMPTY;
// for( i = 0; i < utf8Len; i++ )
// {
// if( (unsigned char)utf8[i] >= 0xC2 )
// {
// _stprintf( chr, _T("#%u;"), (unsigned char)utf8[i] );
// tmp += chr;
// i++;
// _stprintf( chr, _T("#%u;"), (unsigned char)utf8[i] );
// if( (unsigned char)utf8[i-1] >= 0xE0 )
// {
// tmp += chr;
// i++;
// _stprintf( chr, _T("#%u;"), (unsigned char)utf8[i] );
// }
// }
// else
// {
// chr[0] = utf8[i];
// chr[1] = _T('\0');
// }
// tmp += chr;
// }
if( dLen < tmp.length() )
return false;
dLen = tmp.length();
_tcscpy( dst, tmp.c_str() );
return true;
}
有名的查表(如<),像上面这样数字的,为字符的UNICODE值