setlocale(LC_ALL, ""); //calc block size to be returned int len = mbstowcs(0, szUtf8, 0); //malloc and fill the returned block wchar_t* szUnicode = new wchar_t[len + 1]; mbstowcs(szUnicode, szUtf8, len); szUnicode[len] = 0; std::wstring rs = szUnicode; delete[] szUnicode;
return rs; } //return value is autorelease, besure do not need to invoke NSString's release static NSString* Unicode2Ns(std::wstring szUnicode) { setlocale(LC_CTYPE, ""); //calc block size to be returned int len = wcstombs(0, szUnicode.c_str(), 0); //malloc and fill the returned block char* szUtf8 = new char[len + 1]; wcstombs(szUtf8, szUnicode.c_str(), len); szUtf8[len] = 0; NSString* rs = [NSString stringWithUTF8String:szUtf8]; delete[] szUtf8;
#import <Foundation/Foundation.h>
#include <string>
#include <stdlib.h>int main(int argc, const char * argv[]) {
std::wstring sz = L"abc123你我他";
wprintf(L"%s\n", sz.c_str());
NSLog(@"%s\n", sz.c_str());
return 0;
}-------------------
以上代码输出:
a
2012-11-14 09:13:22.932 test[572:303] a
--------------------
另外无论64位,还是32位,在xcode下sizeof(wchar_t)为4,而windows下为2.
是不是mac系统的UNICODE是UTF32,而不是UTF16?
另外不能用c_str()表示unicode,遇到0就结束了
wprintf,wcout之类的c函数似乎对unicode支持有限,不太了解
可以转成NSString输出:
std::wstring wstr = L"abc123你我他";
NSData *data = [NSData dataWithBytes:wstr.data() length:sizeof(wchar_t) * wstr.size()];
NSString *str = [[NSString alloc] initWithData:data encoding:NSUTF32LittleEndianStringEncoding];
NSLog(@"str %@", str);
#include <string>
#import <Cocoa/Cocoa.h>class CStringConverter {
public:
static std::wstring Ansi2Unicode(std::string szAnsi) {
NSString* szNs = [NSString stringWithCString:szAnsi.c_str()];
return Ns2Unicode(szNs);
}
static std::string Unicode2Ansi(std::wstring szUnicode) {
NSString* szNs = Unicode2Ns(szUnicode);
const char* rs = [szNs cString];
return rs;
}
static std::wstring Utf82Unicode(std::string szUtf8) {
NSString* szNs = [NSString stringWithUTF8String:szUtf8.c_str()];
return Ns2Unicode(szNs);
}
static std::string Unicode2Utf8(std::wstring szUnicode) {
NSString* szNs = Unicode2Ns(szUnicode);
return [szNs UTF8String];
}
static std::string Ansi2Utf8(std::string szAnsi) {
return Unicode2Utf8(Ansi2Unicode(szAnsi));
}
static std::string Utf82Ansi(std::string szUtf8) {
return Unicode2Ansi(Utf82Unicode(szUtf8));
} static std::wstring Ns2Unicode(NSString* szNs) {
if(!szNs) return L""; //note: wchar_t is utf32 under mac
const char* szUtf8 = [szNs UTF8String];
setlocale(LC_ALL, "");
//calc block size to be returned
int len = mbstowcs(0, szUtf8, 0);
//malloc and fill the returned block
wchar_t* szUnicode = new wchar_t[len + 1];
mbstowcs(szUnicode, szUtf8, len);
szUnicode[len] = 0;
std::wstring rs = szUnicode;
delete[] szUnicode;
return rs;
}
//return value is autorelease, besure do not need to invoke NSString's release
static NSString* Unicode2Ns(std::wstring szUnicode) {
setlocale(LC_CTYPE, "");
//calc block size to be returned
int len = wcstombs(0, szUnicode.c_str(), 0);
//malloc and fill the returned block
char* szUtf8 = new char[len + 1];
wcstombs(szUtf8, szUnicode.c_str(), len);
szUtf8[len] = 0;
NSString* rs = [NSString stringWithUTF8String:szUtf8];
delete[] szUtf8;
return rs;
}
}; std::string szUtf8 = "abc123你我他";
std::wstring szUnicode = CStringConverter::Utf82Unicode(szUtf8);
wprintf(szUnicode.c_str());
printf("\n");
std::wstring sz = L"abc123你我他";
wprintf(sz.c_str());
printf("\n");=================
第一个打印出来了,第二个打印不出来,为什么?
<61000000 62000000 63000000 31000000 32000000 33000000 e4000000 bd000000 a0000000 e6000000 88000000 91000000 e4000000 bb000000 96000000>
abc123都没有问题,但"你我他"这三个字的编码却是UTF8编码把每个字节扩展成32位,所以szUnicode里面的东西其实是"abc123你我他"对应宽字符的UTF8编码,而不是UTF32
由此看wprintf与printf一样,只能处理UTF8编码的汉字,不能处理UTF32