怎么判断char数组里是不是汉字?2字节一汉字
我看汉字存在char数组里全是负数,难道是根据这个判断吗?
我是想数组里有汉字有字母有数字,怎么分别提取出来?汉字有什么判断方法?
我看汉字存在char数组里全是负数,难道是根据这个判断吗?
我是想数组里有汉字有字母有数字,怎么分别提取出来?汉字有什么判断方法?
解决方案 »
- HTML解析 C/C++ (散分)
- 如何修改通过HttpOpenRequest创建出来的Handle的Verb模式?急、急、急!!!
- USB / RS232 这2种通信方式有何区别?使用RS232通信的程序改成usb的大概都需要做些什么事。哪儿有相关的资料。本人没接触过这些。越详细越好。
- CArchive写入读取文件的问题
- 用CRecordset类做数据查询时碰到的问题
- 初学者关于显示的问题?
- 关于MSCOMM控件串口通讯的问题
- 给IE去掉弹出窗口的BHO实现问题?
- VC 局域网中如何利用已知服务器端口号获得服务器IP地址?
- 怎样改变vc文件对话框及其内部控件的前景色和背景色?
- 朋友们有空的话帮我看两个小问题。我现在在镇上的网吧,时间不多。是关于内联函数与内存释放的
- 试问想做个网络服务器,但碰到了大大的问题,欢迎做过服务器的来探讨.
整个汉字的ASCII码是有一定范围的,楼主去搜搜
如果是gbk,第一个字节还是负的,第二字节就不一定了
以宽字符形式存存储
然后一个个取出来,左移七位看是不是0,使得话,不是汉字,否则是汉字
wchar_t arr[10] = L"中国woaini";
int i;i=0;
while(arr[i])
{
if(arr[i]>>7)
printf("YES\n");
else
printf("NO\n");
i++;
}
另外,不知哪位高人可以指教下,如何打印宽字符?比如一个个汉字打印?搞了半天愣是没弄出来
void GetCEFont( CString Text ) //可以把CString看成是char[]
{
int len=Text.GetLength();
TCHAR chTmp; for(int i = 0 ; i < len;)
{
chTmp = Text.GetAt(i); //去一个字节
if ( chTmp < 0 ) //为汉字
{
i++;
i++;
}else{ //为英文
i++;
}
}
}
2 如果本着研究学习的角度,必须存储在char数组中........
两字节内码:区号+0xA0 , 位号+0xa0
如:第一个汉字“啊”的内码为 0xb0,0xa1
GBK范围:
1st byte 2nd byte
0x81~0xfe 0x40~0x7e and 0x80~0xfe
BIG5范围:
1st byte 2nd byte
0x81~0xfe 0x40~0x7e and 0xa1~0xfe
第二个字符范围为:(ch>=0X40 && ch<=0XFE && ch!=0X7F)
表示中文。封装了一个类你可以参考一下:
头文件:#pragma oncestruct WIDECHAR
{
union
{
unsigned char szChar[4];
char szText[4];
}; WIDECHAR();
__forceinline BOOL IsNULL() const;
__forceinline BOOL IsWide() const;
__forceinline void Confirm();
operator UINT() const;
};class CEditorText : public CString
{
public:
CEditorText(void);
virtual ~CEditorText(void);
public:
CEditorText & operator=(LPCTSTR szText);public:
BOOL GetChar(int index, WIDECHAR & ch) const; BOOL MoveNextChar(int & index) const;
BOOL MovePrevChar(int & index) const; BOOL MoveNextWord(int & index) const;
BOOL MovePrevWord(int & index) const; void WordBegin(int & index) const;
void WordEnd(int & index) const;
};#define CHARCLASS1(ch) ((ch>='0' && ch<='9') || (ch>='A' && ch<='Z') || (ch>='a' && ch<='z') || ch=='_' || ch=='$' || ch=='#')
#define CHARCLASS2(ch) (ch==' ' || ch=='\t' || ch=='\r' || ch=='\n')
#define CHARCLASS3(ch) (ch>=1 && ch<=6)
#define CHARCLASS4(ch) ((ch>=7 && ch<=31) || ch=='@')
#define CHARCLASS5(ch) (!CHARCLASS1(ch) && !CHARCLASS2(ch) && !CHARCLASS3(ch) && !CHARCLASS4(ch))#define ISSPLITCHAR(ch) (ch==' ' || ch=='\t' || ch=='\r' || ch=='\n')
#define ISEMPTYCHAR(ch) ((ch>=0&&ch<=32)||ch==127)#define ISWIDECHARFIRST(ch) (ch>=0x81 && ch<=0XFE)
#define ISWIDECHARNEXT(ch) (ch>=0X40 && ch<=0XFE && ch!=0X7F)#define CONFIRMWIDECHAR(ch) \
if (ISWIDECHARFIRST(ch.szChar[0])) \
{ \
if (!ISWIDECHARNEXT(ch.szChar[1])) \
{ \
ch.szChar[0] ='?'; \
ch.szChar[1] = 0; \
} \
} else \
{ \
ch.szChar[1] = 0; \
if (ch.szChar[0]==0X80 || ch.szChar[0]==0X7F || ch.szChar[0]==0XFF) \
ch.szChar[0]='?'; \
} \
实现文件:
BOOL IsCharSameClass(char ch1, char ch2)
{
if (CHARCLASS1(ch1) && CHARCLASS1(ch2))
return TRUE;
else if (CHARCLASS2(ch1) && CHARCLASS2(ch2))
return TRUE;
else if (CHARCLASS3(ch1) && CHARCLASS3(ch2))
return TRUE;
else if (CHARCLASS4(ch1) && CHARCLASS4(ch2))
return TRUE;
else if (CHARCLASS5(ch1) && CHARCLASS5(ch2))
return TRUE; return FALSE;
}BOOL IsCharSameWord(unsigned char ch1,unsigned char ch2)
{
if ((ch1>='0' && ch1<='9')
&& (ch1>='0' && ch2<='9'))
return TRUE;
if (((ch1>='A' && ch1<='Z') || (ch1>='a' && ch1<='z'))
&&((ch2>='A' && ch2<='Z') || (ch2>='a' && ch2<='z')))
return TRUE;
return FALSE;
}
WIDECHAR::WIDECHAR()
{
szChar[0]= 0;
szChar[1]= 0;
szChar[2]= 0;
szChar[3]= 0;
}BOOL WIDECHAR::IsNULL() const
{
return (szChar[0]==0);
}BOOL WIDECHAR::IsWide() const
{
return (szChar[1]);
}void WIDECHAR::Confirm()
{
CONFIRMWIDECHAR((*this));
}WIDECHAR::operator UINT() const
{
union {
UINT nChar;
unsigned char szChar[4];
} a; if (szChar[1]==0)
a.nChar = szChar[0];
else
{
a.nChar = 0;
a.szChar[1] = szChar[0];
a.szChar[0] = szChar[1];
} return a.nChar;
}CEditorText::CEditorText(void)
{
}CEditorText::~CEditorText(void)
{
}BOOL CEditorText::GetChar(int index, WIDECHAR & ch) const
{
if (index>=GetLength() || index<0) return FALSE; ch.szChar[0] = (*this)[index];
ch.szChar[1] = 0; if (index<GetLength()-1)
ch.szChar[1] = (*this)[index+1]; ch.Confirm(); return TRUE;
}BOOL CEditorText::MoveNextChar(int & index) const
{
if (index>=GetLength() || index<0) return FALSE;
WIDECHAR ch; ch.szChar[0] = (*this)[index];
ch.szChar[1] = 0; if (index<GetLength()-1)
ch.szChar[1] = (*this)[index+1]; CONFIRMWIDECHAR(ch); index++;
if (ch.IsWide()) index++; return TRUE;
}BOOL CEditorText::MovePrevChar(int & index) const
{
if (index>GetLength() || index<=0)
return FALSE; if (index==1)
{
index = 0;
return TRUE;
} unsigned char * pStart=(unsigned char *)((LPCTSTR)(*this));
unsigned char * ptr=pStart+index-1; if (!ISWIDECHARNEXT(*ptr))
{
index--; return TRUE;
} ptr--;
while (ISWIDECHARFIRST(*ptr) && ISWIDECHARNEXT(*ptr))
{
if (ptr==pStart) break;
ptr --;
} if (ISWIDECHARFIRST(*ptr))
{
if ((index+pStart-ptr)%2==0)
{
index-=2;
return TRUE;
} index--;
return TRUE;
} if ((index+pStart-ptr)%2==0)
{
index--;
return TRUE;
} index-=2;
return TRUE;
}BOOL CEditorText::MoveNextWord(int & index) const
{
if (index>=GetLength() || index<0) return FALSE; WIDECHAR ch;
LPCTSTR szText = (*this);
int textLen = GetLength(); ch.szChar[0] = szText[index];
ch.szChar[1] = szText[index+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1])
{
index+=2;
return TRUE;
} unsigned char chBase = ch.szChar[0]; index++; while (index<textLen)
{
ch.szChar[0] = szText[index];
ch.szChar[1] = szText[index+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1]) break;
if (!IsCharSameWord(ch.szChar[0],chBase))
break;
index++;
} return TRUE;
}
BOOL CEditorText::MovePrevWord(int & index) const
{
if (index>GetLength() || index<=0) return FALSE; WIDECHAR ch;
LPCTSTR szText = (*this); MovePrevChar(index); ch.szChar[0] = szText[index];
ch.szChar[1] = szText[index+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1]) return TRUE; unsigned char chBase = ch.szChar[0];
int temp = index; while (MovePrevChar(temp))
{
ch.szChar[0] = szText[temp];
ch.szChar[1] = szText[temp+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1]) break; if (!IsCharSameWord(ch.szChar[0],chBase))
break; index = temp;
} return TRUE;
}void CEditorText::WordBegin(int & index) const
{
if (index>GetLength() || index<=0) return; WIDECHAR ch;
LPCTSTR szText = (*this); ch.szChar[0] = szText[index];
ch.szChar[1] = szText[index+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1]) return; unsigned char chBase = ch.szChar[0];
int temp = index; while (MovePrevChar(temp))
{
ch.szChar[0] = szText[temp];
ch.szChar[1] = szText[temp+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1]) break; if (!IsCharSameWord(ch.szChar[0],chBase))
break; index = temp;
}
}void CEditorText::WordEnd(int & index) const
{
if (index>=GetLength() || index<0) return; WIDECHAR ch;
LPCTSTR szText = (*this);
int textLen = GetLength(); ch.szChar[0] = szText[index];
ch.szChar[1] = szText[index+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1])
{
index+=2;
return;
} unsigned char chBase = ch.szChar[0]; index++; while (index<textLen)
{
ch.szChar[0] = szText[index];
ch.szChar[1] = szText[index+1];
CONFIRMWIDECHAR(ch); if (ch.szChar[1]) break;
if (!IsCharSameWord(ch.szChar[0],chBase))
break;
index++;
}
}CEditorText & CEditorText::operator=(LPCTSTR szText)
{
int len = (int)strlen(szText);
CString strText; WIDECHAR ch;
int index = 0; strText = "";
while (index<len)
{
ch.szChar[0] = szText[index];
ch.szChar[1] = szText[index+1]; CONFIRMWIDECHAR(ch); if (ch.szChar[0]!='\r')
strText += ch.szText; index++;if (ch.szChar[1]) index++;
} CString::operator =(strText); return (*this);
}