UTF8格式的网页如何解析? 请问,UFT8编码的网页,怎么样才能转换成ACSII进行处理?谢谢了 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 http://young.xh.blog.163.com/blog/static/95625320071154108561/从网上找的,用着挺好的:),实际用的话还要整理一下char *Convert(char *str, int sourceCodepage, int targetCodepage) { int unicodeLen, targetLen; wchar_t *pUnicode; BYTE *pTargetData; char rt[256]; unicodeLen=MultiByteToWideChar(sourceCodepage,0,str,-1,NULL,0); pUnicode = (wchar_t *)malloc((unicodeLen + 1) * sizeof(wchar_t)); memset(pUnicode,0,(unicodeLen+1)*sizeof(wchar_t)); MultiByteToWideChar(sourceCodepage, 0, str, -1, (LPWSTR)pUnicode, unicodeLen); targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char *)pTargetData, 0, NULL, NULL); pTargetData = (BYTE *)malloc((targetLen + 1) * sizeof(BYTE)); memset(pTargetData, 0, targetLen + 1); WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char *)pTargetData, targetLen, NULL, NULL); memcpy(rt, (char *)pTargetData, targetLen); free(pUnicode); free(pTargetData); return rt; } 使用:编码 Convert(NewName, 936, CP_UTF8);解码 Convert(NewName, CP_UTF8, 936); 谢谢楼上的,这个我也有不过用在UNICODE字符的工程里行不能哇:) unicode工程下直接转化成unicode就可以了MultiByteToWideChar(CP_UTF8, ... );转化一次就可以了. UTF8转Unicode: CA2T(szUtf8, CP_UTF8)Unicode转UTF8: AtlUnicodeToUtf8(...) UTF8toUnicode(char *s){ int len = 0; WCHAR* r = new WCHAR[strlen(s) * 2]; while(s[0]) { int bytes = 1; if(s[0] & 0x80) while(s[0] & (0x80 >> bytes)) { bytes++; } if(bytes == 1) { r[len] = s[0]; } else { r[len] = 0; for(char*p = s + (bytes - 1); p > s; p--) r[len] |= ((*p) & 0x3F) << ((bytes - (p - s) - 1) * 6); r[len] |= (s[0] & ((1 << (7 - bytes)) - 1)) << ((bytes - 1) * 6); } len++; s += bytes; } r[len] = 0; char*buffer = new char[len * 2 + 1]; ZeroMemory(buffer, len * 2 + 1); ::WideCharToMultiByte(CP_ACP, NULL, r, len, buffer, 1+ 2 * len, NULL, NULL); CString str = buffer; delete[] r; delete[] buffer; return str; } headerSz = 2; bUnicode = TRUE; char* pUTF8 = (char*)malloc((l=l*2)+1); ::MultiByteToWideChar( CP_UTF8, NULL, buf, l/2+1, reinterpret_cast<LPWSTR>(pUTF8), l+1); free(buf); buf = pUTF8;if (bUnicode) { char* pGB2312 = (char*)malloc(l+1); memset(pGB2312, NULL, l+1); ::WideCharToMultiByte( CP_ACP, NULL, reinterpret_cast<LPWSTR>(buf+headerSz), (l+1-headerSz)/2, pGB2312, l+1, NULL, NULL); free(buf); buf = pGB2312; headerSz = 0; }即:UTF8->Unicode->GB2312; XMLNode XMLNode::parseFile(const char *filename, XMLCSTR tag, XMLResults *pResults){ if (pResults) { pResults->nLine=0; pResults->nColumn=0; } FILE *f=fopen(filename,"rb"); if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; } fseek(f,0,SEEK_END); int l=ftell(f),headerSz=0; if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; return emptyXMLNode; } fseek(f,0,SEEK_SET); char *buf=(char*)malloc(l+1); fread(buf,l,1,f); fclose(f); buf[l]=0; BOOL bUnicode = FALSE; BYTE btUTF8[3] = {0xEF, 0xBB, 0xBF}; BYTE btUnicode[2] = {0xFF, 0xFE}; BYTE btUnicodeBE[2] = {0xFE, 0xFF}; // UTF8 code if (memcmp(buf, btUTF8, 3) == 0) { headerSz = 2; bUnicode = TRUE; char* pUTF8 = (char*)malloc((l=l*2)+1); ::MultiByteToWideChar( CP_UTF8, NULL, buf, l/2+1, reinterpret_cast<LPWSTR>(pUTF8), l+1); free(buf); buf = pUTF8; } // Unicode big endian code else if (memcmp(buf, btUnicodeBE, 2) == 0) { headerSz = 2; bUnicode = TRUE; for (int i=0; i<l; i+=headerSz) { BYTE bt = buf[i]; buf[i] = buf[i+1]; buf[i+1] = bt; } } // Unicode code else if (memcmp(buf, btUnicode, 2) == 0) { headerSz = 2; bUnicode = TRUE; } // GB2312 code else { headerSz = 0; bUnicode = FALSE; } if (bUnicode) { char* pGB2312 = (char*)malloc(l+1); memset(pGB2312, NULL, l+1); ::WideCharToMultiByte( CP_ACP, NULL, reinterpret_cast<LPWSTR>(buf+headerSz), (l+1-headerSz)/2, pGB2312, l+1, NULL, NULL); free(buf); buf = pGB2312; headerSz = 0; } if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; } XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults); free(buf); return x;} 本来挺简单的问题,就获取UTF8编码的网页,再转换解析搞的我头都大了,测试了网上的楼上的代码,乱码越解越乱了汗~~~~~~接着试,先谢谢楼上各位 问个create和bind关系的简单问题,接分! 请问下在Win32 Console 程序中使用MFC类 求拦截网络数据包的方法~ 一段创建客户端端口的代码,各位帮我看看有什么问题 哪位大虾告诉小弟网上关于数据包截取,数据提取,和用之编程的文章或书籍的地址 推荐近期所作的一个小游戏,希望大家喜欢!来者有分! 请教caj nlc kdh 是什么文件! 新手问题:如何使一个UI组件在Visible和Invisible之间切换? matlab 我想问一下!! 如何在OnMouseMove事件中出现一个Tooltips,就像金山词霸取词效果 C#中,两个一样的按钮,一个有反应,一个没有。
http://young.xh.blog.163.com/blog/static/95625320071154108561/
从网上找的,用着挺好的:),实际用的话还要整理一下char *Convert(char *str, int sourceCodepage, int targetCodepage)
{
int unicodeLen, targetLen;
wchar_t *pUnicode;
BYTE *pTargetData;
char rt[256]; unicodeLen=MultiByteToWideChar(sourceCodepage,0,str,-1,NULL,0);
pUnicode = (wchar_t *)malloc((unicodeLen + 1) * sizeof(wchar_t));
memset(pUnicode,0,(unicodeLen+1)*sizeof(wchar_t));
MultiByteToWideChar(sourceCodepage, 0, str, -1, (LPWSTR)pUnicode, unicodeLen); targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1,
(char *)pTargetData, 0, NULL, NULL);
pTargetData = (BYTE *)malloc((targetLen + 1) * sizeof(BYTE));
memset(pTargetData, 0, targetLen + 1);
WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1,
(char *)pTargetData, targetLen, NULL, NULL); memcpy(rt, (char *)pTargetData, targetLen); free(pUnicode);
free(pTargetData); return rt;
}
使用:编码 Convert(NewName, 936, CP_UTF8);解码 Convert(NewName, CP_UTF8, 936);
不过用在UNICODE字符的工程里行不能哇:)
MultiByteToWideChar(CP_UTF8, ... );
转化一次就可以了.
Unicode转UTF8: AtlUnicodeToUtf8(...)
{
int len = 0;
WCHAR* r = new WCHAR[strlen(s) * 2];
while(s[0])
{
int bytes = 1;
if(s[0] & 0x80)
while(s[0] & (0x80 >> bytes))
{
bytes++;
}
if(bytes == 1)
{
r[len] = s[0];
}
else
{
r[len] = 0;
for(char*p = s + (bytes - 1); p > s; p--)
r[len] |= ((*p) & 0x3F) << ((bytes - (p - s) - 1) * 6);
r[len] |= (s[0] & ((1 << (7 - bytes)) - 1)) << ((bytes - 1) * 6);
}
len++;
s += bytes;
}
r[len] = 0;
char*buffer = new char[len * 2 + 1];
ZeroMemory(buffer, len * 2 + 1);
::WideCharToMultiByte(CP_ACP, NULL, r, len, buffer, 1+ 2 * len, NULL, NULL);
CString str = buffer;
delete[] r;
delete[] buffer;
return str;
}
bUnicode = TRUE; char* pUTF8 = (char*)malloc((l=l*2)+1);
::MultiByteToWideChar(
CP_UTF8,
NULL,
buf,
l/2+1,
reinterpret_cast<LPWSTR>(pUTF8),
l+1);
free(buf);
buf = pUTF8;if (bUnicode)
{
char* pGB2312 = (char*)malloc(l+1);
memset(pGB2312, NULL, l+1);
::WideCharToMultiByte(
CP_ACP,
NULL,
reinterpret_cast<LPWSTR>(buf+headerSz),
(l+1-headerSz)/2,
pGB2312,
l+1,
NULL,
NULL);
free(buf);
buf = pGB2312;
headerSz = 0;
}即:UTF8->Unicode->GB2312;
{
if (pResults) { pResults->nLine=0; pResults->nColumn=0; }
FILE *f=fopen(filename,"rb");
if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; }
fseek(f,0,SEEK_END);
int l=ftell(f),headerSz=0;
if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; return emptyXMLNode; }
fseek(f,0,SEEK_SET);
char *buf=(char*)malloc(l+1);
fread(buf,l,1,f);
fclose(f);
buf[l]=0; BOOL bUnicode = FALSE;
BYTE btUTF8[3] = {0xEF, 0xBB, 0xBF};
BYTE btUnicode[2] = {0xFF, 0xFE};
BYTE btUnicodeBE[2] = {0xFE, 0xFF}; // UTF8 code
if (memcmp(buf, btUTF8, 3) == 0)
{
headerSz = 2;
bUnicode = TRUE; char* pUTF8 = (char*)malloc((l=l*2)+1);
::MultiByteToWideChar(
CP_UTF8,
NULL,
buf,
l/2+1,
reinterpret_cast<LPWSTR>(pUTF8),
l+1);
free(buf);
buf = pUTF8;
}
// Unicode big endian code
else if (memcmp(buf, btUnicodeBE, 2) == 0)
{
headerSz = 2;
bUnicode = TRUE; for (int i=0; i<l; i+=headerSz)
{
BYTE bt = buf[i];
buf[i] = buf[i+1];
buf[i+1] = bt;
}
}
// Unicode code
else if (memcmp(buf, btUnicode, 2) == 0)
{
headerSz = 2;
bUnicode = TRUE;
}
// GB2312 code
else
{
headerSz = 0;
bUnicode = FALSE;
} if (bUnicode)
{
char* pGB2312 = (char*)malloc(l+1);
memset(pGB2312, NULL, l+1);
::WideCharToMultiByte(
CP_ACP,
NULL,
reinterpret_cast<LPWSTR>(buf+headerSz),
(l+1-headerSz)/2,
pGB2312,
l+1,
NULL,
NULL);
free(buf);
buf = pGB2312;
headerSz = 0;
} if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; }
XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults);
free(buf);
return x;
}
搞的我头都大了,测试了网上的楼上的代码,乱码越解越乱了
汗~~~~~~接着试,先谢谢楼上各位