下面是一大段代码,呵呵:),我本来不想把代码全部复制出来的,可担心问题说不清楚。这是一个用来将.txt文件转换为.dic字典文件的小程序,程序只由一个源文件sim2lib.c构成,来自于自由拼音源码。.txt文件我放在了附件里面。下面是sim2lib.c源文件的内容,用它建立一个Win32控制台程序,编译后可直接运行。我的问题是:当我把.txt文件保存为unicode类型的时候,再运行这个程序就会出问题了,请问改动哪个地方可以让它也能处理unicode类型的.txt文件。谢谢!万分感谢!!#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <io.h>
#include <windows.h>
#include <tchar.h>#define MAX_PHRASE_LEN 8
#define MAX_PY_NUM 420
#define MAX_EACH_PY_NUM 41typedef struct {
WORD wKey;
TCHAR szPY[8];
} PINYIN,FAR *LPPINYIN;PINYIN aPYTab[26][MAX_EACH_PY_NUM] = {0};void LoadHZDictionary( LPTSTR lpStr)
{
TCHAR szPY[20],szHZ[1000];
static int i=0,j=0,nPre=0;
WORD wPYHead=1;
LPPINYIN lpPYTab = (LPPINYIN)aPYTab; _stscanf(lpStr,"%s %s",szPY,szHZ);
wPYHead=(WORD)szPY[0] - (WORD)_T('a');
if(wPYHead != nPre) j=0;
_tcscpy( (lpPYTab+wPYHead*MAX_EACH_PY_NUM+j)->szPY,szPY);
(lpPYTab+wPYHead*MAX_EACH_PY_NUM+j)->wKey=i+1; nPre=wPYHead;
i++,j++;
return;
}WORD GetSegment(LPTSTR buf)
{
if(*buf == _T('\0')) return 1; //END_SEGMENT
else if(*buf == _T('#')) return 2; //COMMENT
else if( _tcsstr(buf,_T("[PUNCTUATION]")) != NULL ) return 3;
else if( _tcsstr(buf,_T("[DICTIONARY]")) != NULL) return 4;
else return 0;
}void GetStr(FILE *pf,LPTSTR pbuf)
{
while( !feof(pf) ) {
*pbuf = _fgettc(pf);
if(*pbuf == _T('\n')) break;
pbuf++;
}
*pbuf = _T('\0');
}void LoadTable()
{
FILE *stream;
TCHAR szStr[1000];
TCHAR szTabFileName[200];
LPTSTR lpTabFileName = szTabFileName;
lpTabFileName += GetSystemDirectory(szTabFileName,200);
if (*(lpTabFileName-1) != _T('\\'))
*lpTabFileName++ = _T('\\');
_tcscpy(lpTabFileName,_T("freepy.tab")); if( (stream = _tfopen( szTabFileName, "r" )) == NULL ){
_stprintf(szStr,"%s can not found",szTabFileName);
MessageBox(NULL,szStr,"init",MB_OK);
exit(1);
}
while( !feof( stream )) {
GetStr(stream,szStr); switch( GetSegment(szStr)) {
case 1: //END_SEGMENT
break; case 2: //COMMENT
break; case 3: //PUNCTUATION
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
while(GetSegment(szStr) != 1) {
if( GetSegment(szStr) != 2){
//LoadPunct( szStr );
}
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
}
break; case 4: //DICTIONARY
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
while(GetSegment(szStr) != 1) {
if( GetSegment(szStr) != 2){
LoadHZDictionary( szStr );
}
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
}
break; default:
break;
}
}
my_exit:
fclose(stream);
return;
}WORD String2Array(LPTSTR lpBuf,LPTSTR lpStrArr,WORD wMaxArrSize)
{
int i;
WORD cursor=0,count=0,wBufLen;
wBufLen = strlen(lpBuf);
for (i=0;i<wBufLen;i++){
if(*(lpBuf+i) == _T(' ') || *(lpBuf+i) == _T('\t')) {
if(i!=0 && *(lpBuf+i-1)!=_T(' ') && *(lpBuf+i-1)!=_T('\t') ){
_tcsncpy(lpStrArr+count*wMaxArrSize,lpBuf+cursor,i-cursor);
*(lpStrArr+count*wMaxArrSize+i-cursor)=_T('\0');
count++;
}
cursor=i+1;
}
if(i == wBufLen-1 && *(lpBuf+i)!=_T(' ') && *(lpBuf+i)!=_T(' ') ){
_tcsncpy(lpStrArr+count*wMaxArrSize,lpBuf+cursor,i-cursor+1);
*(lpStrArr+count*wMaxArrSize+i-cursor+1)=_T('\0');
count++;
}
}
return count;
}int sim2lib(LPTSTR lpInName,LPTSTR lpOutName)
{
FILE *stream,*out;
int i,j;
TCHAR szStr[250];
WORD wLen,wHead,flag;
BYTE abKey[MAX_PHRASE_LEN+2],bLen;
WORD awKey[MAX_PHRASE_LEN];
WORD wCount;
TCHAR szStrArr[MAX_PHRASE_LEN+4][2*MAX_PHRASE_LEN+2];
WORD wAttrib; wAttrib=0;
if( (stream = _tfopen( lpInName, _T("r") )) == NULL ){
fprintf(stderr,"%s cant open.\n",lpInName);
exit(1);
}
if( (out = _tfopen( lpOutName, _T("wb") )) == NULL ){
fprintf(stderr,"%s cant open.\n",lpOutName);
exit(1);
} while( !feof( stream )) {
if( _fgetts(szStr,1000,stream) != NULL){
*(szStr+_tcslen(szStr)-1)=_T('\0');
wCount=String2Array(szStr,(LPTSTR)szStrArr,2*MAX_PHRASE_LEN+2);
wLen=_tcslen(szStrArr[0])/2;
if(wLen != wCount-1 || wLen > MAX_PHRASE_LEN){
printf("%s\n",szStr);
continue;
}
for(i=1;i<wCount;i++){
wHead=(int)szStrArr[i][0] - _T('a');
flag=1;
if(wHead < 0 || wHead > 25){
printf("%s\n",szStr);
flag=0;
break;
}
flag=0;
for(j=0;aPYTab[wHead][j].wKey;j++){
if(!_tcscmp(aPYTab[wHead][j].szPY,szStrArr[i])){
awKey[i-1]=aPYTab[wHead][j].wKey;
flag=1;
break;
}
}
if(!flag) break;
}
if(!flag){
printf("%s\n",szStr);
continue;
}
for(i=0;i<wLen;i++)
abKey[i+1] = awKey[i] & 0xff;
abKey[0]=_T('\0');
for(i=0;i<wLen;i++)
abKey[0] |= (awKey[i] & 0x0100) >> (8-i);
bLen=(BYTE)wLen;
fwrite(&bLen,1,1,out);
fwrite(abKey,1,wLen+1,out);
fwrite(szStr,1,wLen*2,out);
fwrite(&wAttrib,2,1,out);
}
}
fclose(stream);
fclose(out);
return (0);
}void main(int argc,char **argv)
{
if(argc != 3) {
fprintf(stderr,"usage: %s <input_name> <output_name>\n",argv[0]);
return;
}
LoadTable();
sim2lib(argv[1],argv[2]);
return;
}
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <io.h>
#include <windows.h>
#include <tchar.h>#define MAX_PHRASE_LEN 8
#define MAX_PY_NUM 420
#define MAX_EACH_PY_NUM 41typedef struct {
WORD wKey;
TCHAR szPY[8];
} PINYIN,FAR *LPPINYIN;PINYIN aPYTab[26][MAX_EACH_PY_NUM] = {0};void LoadHZDictionary( LPTSTR lpStr)
{
TCHAR szPY[20],szHZ[1000];
static int i=0,j=0,nPre=0;
WORD wPYHead=1;
LPPINYIN lpPYTab = (LPPINYIN)aPYTab; _stscanf(lpStr,"%s %s",szPY,szHZ);
wPYHead=(WORD)szPY[0] - (WORD)_T('a');
if(wPYHead != nPre) j=0;
_tcscpy( (lpPYTab+wPYHead*MAX_EACH_PY_NUM+j)->szPY,szPY);
(lpPYTab+wPYHead*MAX_EACH_PY_NUM+j)->wKey=i+1; nPre=wPYHead;
i++,j++;
return;
}WORD GetSegment(LPTSTR buf)
{
if(*buf == _T('\0')) return 1; //END_SEGMENT
else if(*buf == _T('#')) return 2; //COMMENT
else if( _tcsstr(buf,_T("[PUNCTUATION]")) != NULL ) return 3;
else if( _tcsstr(buf,_T("[DICTIONARY]")) != NULL) return 4;
else return 0;
}void GetStr(FILE *pf,LPTSTR pbuf)
{
while( !feof(pf) ) {
*pbuf = _fgettc(pf);
if(*pbuf == _T('\n')) break;
pbuf++;
}
*pbuf = _T('\0');
}void LoadTable()
{
FILE *stream;
TCHAR szStr[1000];
TCHAR szTabFileName[200];
LPTSTR lpTabFileName = szTabFileName;
lpTabFileName += GetSystemDirectory(szTabFileName,200);
if (*(lpTabFileName-1) != _T('\\'))
*lpTabFileName++ = _T('\\');
_tcscpy(lpTabFileName,_T("freepy.tab")); if( (stream = _tfopen( szTabFileName, "r" )) == NULL ){
_stprintf(szStr,"%s can not found",szTabFileName);
MessageBox(NULL,szStr,"init",MB_OK);
exit(1);
}
while( !feof( stream )) {
GetStr(stream,szStr); switch( GetSegment(szStr)) {
case 1: //END_SEGMENT
break; case 2: //COMMENT
break; case 3: //PUNCTUATION
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
while(GetSegment(szStr) != 1) {
if( GetSegment(szStr) != 2){
//LoadPunct( szStr );
}
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
}
break; case 4: //DICTIONARY
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
while(GetSegment(szStr) != 1) {
if( GetSegment(szStr) != 2){
LoadHZDictionary( szStr );
}
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
}
break; default:
break;
}
}
my_exit:
fclose(stream);
return;
}WORD String2Array(LPTSTR lpBuf,LPTSTR lpStrArr,WORD wMaxArrSize)
{
int i;
WORD cursor=0,count=0,wBufLen;
wBufLen = strlen(lpBuf);
for (i=0;i<wBufLen;i++){
if(*(lpBuf+i) == _T(' ') || *(lpBuf+i) == _T('\t')) {
if(i!=0 && *(lpBuf+i-1)!=_T(' ') && *(lpBuf+i-1)!=_T('\t') ){
_tcsncpy(lpStrArr+count*wMaxArrSize,lpBuf+cursor,i-cursor);
*(lpStrArr+count*wMaxArrSize+i-cursor)=_T('\0');
count++;
}
cursor=i+1;
}
if(i == wBufLen-1 && *(lpBuf+i)!=_T(' ') && *(lpBuf+i)!=_T(' ') ){
_tcsncpy(lpStrArr+count*wMaxArrSize,lpBuf+cursor,i-cursor+1);
*(lpStrArr+count*wMaxArrSize+i-cursor+1)=_T('\0');
count++;
}
}
return count;
}int sim2lib(LPTSTR lpInName,LPTSTR lpOutName)
{
FILE *stream,*out;
int i,j;
TCHAR szStr[250];
WORD wLen,wHead,flag;
BYTE abKey[MAX_PHRASE_LEN+2],bLen;
WORD awKey[MAX_PHRASE_LEN];
WORD wCount;
TCHAR szStrArr[MAX_PHRASE_LEN+4][2*MAX_PHRASE_LEN+2];
WORD wAttrib; wAttrib=0;
if( (stream = _tfopen( lpInName, _T("r") )) == NULL ){
fprintf(stderr,"%s cant open.\n",lpInName);
exit(1);
}
if( (out = _tfopen( lpOutName, _T("wb") )) == NULL ){
fprintf(stderr,"%s cant open.\n",lpOutName);
exit(1);
} while( !feof( stream )) {
if( _fgetts(szStr,1000,stream) != NULL){
*(szStr+_tcslen(szStr)-1)=_T('\0');
wCount=String2Array(szStr,(LPTSTR)szStrArr,2*MAX_PHRASE_LEN+2);
wLen=_tcslen(szStrArr[0])/2;
if(wLen != wCount-1 || wLen > MAX_PHRASE_LEN){
printf("%s\n",szStr);
continue;
}
for(i=1;i<wCount;i++){
wHead=(int)szStrArr[i][0] - _T('a');
flag=1;
if(wHead < 0 || wHead > 25){
printf("%s\n",szStr);
flag=0;
break;
}
flag=0;
for(j=0;aPYTab[wHead][j].wKey;j++){
if(!_tcscmp(aPYTab[wHead][j].szPY,szStrArr[i])){
awKey[i-1]=aPYTab[wHead][j].wKey;
flag=1;
break;
}
}
if(!flag) break;
}
if(!flag){
printf("%s\n",szStr);
continue;
}
for(i=0;i<wLen;i++)
abKey[i+1] = awKey[i] & 0xff;
abKey[0]=_T('\0');
for(i=0;i<wLen;i++)
abKey[0] |= (awKey[i] & 0x0100) >> (8-i);
bLen=(BYTE)wLen;
fwrite(&bLen,1,1,out);
fwrite(abKey,1,wLen+1,out);
fwrite(szStr,1,wLen*2,out);
fwrite(&wAttrib,2,1,out);
}
}
fclose(stream);
fclose(out);
return (0);
}void main(int argc,char **argv)
{
if(argc != 3) {
fprintf(stderr,"usage: %s <input_name> <output_name>\n",argv[0]);
return;
}
LoadTable();
sim2lib(argv[1],argv[2]);
return;
}
但以前我是这样处理的,存的时候,调用WideCharToMultiByte,读的时候,调用MultiByteToWideChar
fprintf(stderr, "%s cant open.\n ",lpInName);
exit(1);
} bool bUnicode = false;
_fgetts(szStr,2,stream);
if( *((WORD*)szStr) == 0xFEFF )
{
bUnicode = true;
}
_fseek(0,0,FILE_BEGIN);if( (out = _tfopen( lpOutName, _T( "wb ") )) == NULL ){
fprintf(stderr, "%s cant open.\n ",lpOutName);
exit(1);
} while( !feof( stream )) {
if( _fgetts(szStr,1000,stream) != NULL){
*(szStr+_tcslen(szStr)-1)=_T( '\0 ');
if( bUnicode )
{
// 这里先把UNICODE转换为ANSI,结果也放在szStr里:(方法就是ouyh12345 提供的函数)
}
wCount=String2Array(szStr,(LPTSTR)szStrArr,2*MAX_PHRASE_LEN+2);
wLen=_tcslen(szStrArr[0])/2;
if(wLen != wCount-1 ¦ ¦ wLen > MAX_PHRASE_LEN){
printf( "%s\n ",szStr);
continue;
}
……
/*新增变量*/
DWORD dwNum;
LPSTR pText;
HGLOBAL hGlobal;if( (stream = _tfopen( lpInName, _T( "r ") )) == NULL ){
fprintf(stderr, "%s cant open.\n ",lpInName);
exit(1);
} bool bUnicode = false;
_fgetts(szStr,2,stream);
if( *((WORD*)szStr) == 0xFEFF )
{
bUnicode = true;
}
fseek(stream,2,FILE_BEGIN);//我没有用过_fseek,不知道这样用对不对;
if( (out = _tfopen( lpOutName, _T( "wb ") )) == NULL ){
fprintf(stderr, "%s cant open.\n ",lpOutName);
exit(1);
} while( !feof( stream )) {
if( _fgetts(szStr,1000,stream) != NULL){
*(szStr+_tcslen(szStr)-1)=_T( '\0 ');
if( bUnicode )
{ //加入下面代码
dwNum=WideCharToMultiByte(CP_OEMCP,NULL,(LPCWSTR)szStr,-1,NULL,0,NULL,FALSE);
hGlobal=GlobalAlloc(GHND,dwNum);
pText=(LPSTR)GlobalLock(hGlobal); if (!pText) {
GlobalUnlock(hGlobal);
GlobalFree(hGlobal);
} WideCharToMultiByte (CP_OEMCP,NULL,(LPCWSTR)szStr,-1,pText,dwNum,NULL,FALSE);
}
wCount=String2Array(pText,(LPTSTR)szStrArr,2*MAX_PHRASE_LEN+2);
wLen=_tcslen(szStrArr[0])/2;
if(wLen != wCount-1 ¦ ¦ wLen > MAX_PHRASE_LEN){
printf( "%s\n ",szStr);
continue;
}
……
…………
GlobalUnlock(hGlobal);
GlobalFree(hGlobal);
但是编译时会有警告
D:\vcProject\sim2lib_24\sim2lib.c(192) : warning C4047: 'function' : 'unsigned long ' differs in levels of indirection from 'void *'
D:\vcProject\sim2lib_24\sim2lib.c(192) : warning C4024: 'WideCharToMultiByte' : different types for formal and actual parameter 2
D:\vcProject\sim2lib_24\sim2lib.c(201) : warning C4047: 'function' : 'unsigned long ' differs in levels of indirection from 'void *'
D:\vcProject\sim2lib_24\sim2lib.c(201) : warning C4024: 'WideCharToMultiByte' : different types for formal and actual parameter 2
这是不是说类型不匹配啊?我改了好几个地方都没解决,另外,是否应该把setting->c/c++中的预定义设为_UNICODE,UNICODE
_fgetts(szStr,2,stream);
if( *((WORD*)szStr) == 0xFEFF )
{
// 这里如果是UNICODE文件,则读写指针不用调整,文本内容是从第三(2)个字节开始的
bUnicode = true;
}
else
{
// 如果不是UNICODE文件,则把读写指针重新定位到文件开头:
fseek(stream,0, SEEK_SET);
}另外:给你一个UNICODE转ANSI字符的函数吧:
/* 功能: Unicode 转 ANSI
*
* 参数: pText--->输入: Unicode字符; 输出: Ansi字符
*
* 返回值: 如果成功则返回真,否则返回假.
*/
bool CGlobalFunc::Unicode2Ansi(unsigned short *pText)
{
if( pText == NULL )
{
return false;
} int nUniLen = lstrlenW(pText);
if( nUniLen == 0 )
{
return false;
} int nAnsiLen = WideCharToMultiByte(CP_ACP, 0, pText, nUniLen, NULL, 0, NULL, NULL);
if( nAnsiLen == 0 )
{
return false;
} char *pAnsi = new char[nAnsiLen + 1];
nAnsiLen = WideCharToMultiByte(CP_ACP, 0, pText, nUniLen, pAnsi, nAnsiLen, NULL, NULL);
if( nAnsiLen == 0 )
{
delete[] pAnsi;
return false;
} pAnsi[nAnsiLen] = 0; memset(pText, 0, nUniLen * sizeof(short));
memcpy(pText, pAnsi, nAnsiLen); delete[] pAnsi;
pAnsi = NULL;
return true;
}
BOOL Unicode2Ansi(unsigned short *pText)
{
HGLOBAL hGlobal;
char *pAnsi;
int nUniLen,nAnsiLen; if( pText == NULL )
{
return FALSE;
} nUniLen = lstrlenW(pText);
if( nUniLen == 0 )
{
return FALSE;
} nAnsiLen = WideCharToMultiByte(CP_ACP, 0, pText, nUniLen, NULL, 0, NULL, NULL);
if( nAnsiLen == 0 )
{
return FALSE;
} //char *pAnsi = new char[nAnsiLen + 1];
hGlobal=GlobalAlloc(GHND,nAnsiLen+1);
pAnsi=(char*)GlobalLock(hGlobal);
if (!pAnsi)
{
GlobalUnlock(hGlobal);
GlobalFree(hGlobal);
return FALSE;
}
nAnsiLen = WideCharToMultiByte(CP_ACP, 0, pText, nUniLen, pAnsi, nAnsiLen, NULL, NULL);
if( nAnsiLen == 0 )
{
//delete[] pAnsi;
GlobalUnlock(hGlobal);
GlobalFree(hGlobal);
return FALSE;
} pAnsi[nAnsiLen] = 0; memset(pText, 0, nUniLen * sizeof(short));
memcpy(pText, pAnsi, nAnsiLen); //delete[] pAnsi;
GlobalUnlock(hGlobal);
GlobalFree(hGlobal);
pAnsi = NULL;
return TRUE;
}然后更改sim2lib函数,在相应位置加上:
_fgetts(szStr,2,stream);
if( *((WORD*)szStr) == 0xFEFF )
{
// 这里如果是UNICODE文件,则读写指针不用调整,文本内容是从第三(2)个字节开始的
bUnicode = TRUE;
}
else
{
// 如果不是UNICODE文件,则把读写指针重新定位到文件开头:
fseek(stream,0, SEEK_SET);
}…………
if( bUnicode )
{
Unicode2Ansi((unsigned short*)szStr);
} 现在程序处理ANSI文件没有问题,但是处理UNICODE文件时生成的.dic文件是空的,郁闷!我不知道是哪个环节
出了问题?
我在程序里面需不需要预定义UNICODE,如果定义的话,可这程序只不过是将宽字符转换为多字节,
然后仍然按ANSI的方式处理。可如果不定义的话,比如像_fgetts(szStr, nBuf - 2, stream)
这个函数,系统会使用ANSI的char *fgets( char *string, int n, FILE *stream ),即它
需要一个char*参数,但从unicode类型simple.txt文件里读到的却是wchar_t数据,这不矛盾了吗?
呵呵 ,我不知道上面的理解是不是正确?
另:我已经给您的msn发送了邀请。
我需要实现的功能就是,这个小程序能根据一个.txt记事本文件生成一个.dic文件
当.txt文件为ANSI编码时,主贴里面的程序就已经可以实现了。
当.txt文件为UNICODE编码时,主贴里面的程序无法实现。
现在要解决的就是后面这个问题。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <io.h>
#include <windows.h>
#include <tchar.h>#define MAX_PHRASE_LEN 8
#define MAX_PY_NUM 420
#define MAX_EACH_PY_NUM 41typedef struct {
WORD wKey;
TCHAR szPY[8];
} PINYIN,FAR *LPPINYIN;PINYIN aPYTab[26][MAX_EACH_PY_NUM] = {0};void LoadHZDictionary( LPTSTR lpStr)
{
TCHAR szPY[20],szHZ[1000];
static int i=0,j=0,nPre=0;
WORD wPYHead=1;
LPPINYIN lpPYTab = (LPPINYIN)aPYTab; _stscanf(lpStr,_T("%s %s"),szPY,szHZ);
wPYHead=(WORD)szPY[0] - (WORD)_T('a');
if(wPYHead != nPre) j=0;
_tcscpy( (lpPYTab+wPYHead*MAX_EACH_PY_NUM+j)->szPY,szPY);
(lpPYTab+wPYHead*MAX_EACH_PY_NUM+j)->wKey=i+1;
//得到的拼音编号为[1,416]
nPre=wPYHead;
i++,j++;
return;
}WORD GetSegment(LPTSTR buf)
{
if(*buf == _T('\0')) return 1; //END_SEGMENT
else if(*buf == _T('#')) return 2; //COMMENT
else if( _tcsstr(buf,_T("[PUNCTUATION]")) != NULL ) return 3;
else if( _tcsstr(buf,_T("[DICTIONARY]")) != NULL) return 4;
else return 0;
}void GetStr(FILE *pf,LPTSTR pbuf)
{
while( !feof(pf) ) {
*pbuf = _fgettc(pf);
if(*pbuf == _T('\n')) break;
pbuf++;
}
*pbuf = _T('\0');
}void LoadTable()
{
FILE *stream;
TCHAR szStr[1000];
TCHAR szTabFileName[200];
LPTSTR lpTabFileName = szTabFileName;
lpTabFileName += GetSystemDirectory(szTabFileName,200);
if (*(lpTabFileName-1) != _T('\\'))
*lpTabFileName++ = _T('\\');
_tcscpy(lpTabFileName,_T("freepy.tab")); if( (stream = _tfopen( szTabFileName, _T("r") )) == NULL ){//_T("rb")
_stprintf(szStr,_T("%s can not found"),szTabFileName);
MessageBox(NULL,szStr,_T("init"),MB_OK);
exit(1);
}
while( !feof( stream )) {
GetStr(stream,szStr); switch( GetSegment(szStr)) {
case 1: //END_SEGMENT
break; case 2: //COMMENT
break; case 3: //PUNCTUATION
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
while(GetSegment(szStr) != 1) {
if( GetSegment(szStr) != 2){
//LoadPunct( szStr );
}
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
}
break; case 4: //DICTIONARY
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
while(GetSegment(szStr) != 1) {
if( GetSegment(szStr) != 2){
LoadHZDictionary( szStr );
}
if( feof( stream ) ) goto my_exit;
GetStr(stream,szStr);
}
break; default:
break;
}
}
my_exit:
fclose(stream);
return;
}WORD String2Array(LPTSTR lpBuf,LPTSTR lpStrArr,WORD wMaxArrSize)
{
int i;
WORD cursor=0,count=0,wBufLen;
//wBufLen = strlen(lpBuf);
wBufLen = _tcslen(lpBuf);
for (i=0;i<wBufLen;i++){
if(*(lpBuf+i) == _T(' ') || *(lpBuf+i) == _T('\t')) {
if(i!=0 && *(lpBuf+i-1)!=_T(' ') && *(lpBuf+i-1)!=_T('\t') ){
_tcsncpy(lpStrArr+count*wMaxArrSize,lpBuf+cursor,i-cursor);
*(lpStrArr+count*wMaxArrSize+i-cursor)=_T('\0');
count++;
}
cursor=i+1;
}
if(i == wBufLen-1 && *(lpBuf+i)!=_T(' ') && *(lpBuf+i)!=_T(' ') ){
_tcsncpy(lpStrArr+count*wMaxArrSize,lpBuf+cursor,i-cursor+1);
*(lpStrArr+count*wMaxArrSize+i-cursor+1)=_T('\0');
count++;
}
}
return count;
}int sim2lib(LPTSTR lpInName,LPTSTR lpOutName)
{
FILE *stream,*out;
int i,j;
TCHAR szStr[250];
WORD wLen,wHead,flag;
BYTE abKey[MAX_PHRASE_LEN+2],bLen;
WORD awKey[MAX_PHRASE_LEN];
WORD wCount;
//TCHAR szStrArr[MAX_PHRASE_LEN+4][2*MAX_PHRASE_LEN+2];
TCHAR szStrArr[MAX_PHRASE_LEN+4][MAX_PHRASE_LEN+1];
WORD wAttrib;
BOOL bFirst=TRUE; wAttrib=0;
if( (stream = _tfopen( lpInName, /*_T("r")*/_T("rb") )) == NULL ){//_T("rb")
_ftprintf(stderr,_T("%s cant open.\n"),lpInName);
exit(1);
}
if( (out = _tfopen( lpOutName, _T("wb") )) == NULL ){
_ftprintf(stderr,_T("%s cant open.\n"),lpOutName);
exit(1);
} while( !feof( stream )) {
if( _fgetts(szStr,1000,stream) != NULL){
if (bFirst) {
bFirst=FALSE;
continue;
}
//*(szStr+_tcslen(szStr)-1)=_T('\0');
*(szStr+_tcslen(szStr)-2)=_T('\0');
//wCount=String2Array(szStr,(LPTSTR)szStrArr,2*MAX_PHRASE_LEN+2);//宽字符的话应该是MAX_PHRASE_LEN+1
wCount=String2Array(szStr,(LPTSTR)szStrArr,MAX_PHRASE_LEN+1);
//wLen=_tcslen(szStrArr[0])/2;//宽字符应该直接就是_tcslen(szStrArr[0])
wLen=_tcslen(szStrArr[0]);
if(wLen != wCount-1 || wLen > MAX_PHRASE_LEN){
_tprintf(_T("%S\n"),szStr);
continue;
}
for(i=1;i<wCount;i++){
wHead=szStrArr[i][0] - _T('a');
flag=1;
if(wHead < 0 || wHead > 25){
_tprintf(_T("%S\n"),szStr);
flag=0;
break;
}
flag=0;
for(j=0;aPYTab[wHead][j].wKey;j++){
if(!_tcscmp(aPYTab[wHead][j].szPY,szStrArr[i])){
awKey[i-1]=aPYTab[wHead][j].wKey;
flag=1;
break;
}
}
if(!flag) break;
}
if(!flag){
_tprintf(_T("%S\n"),_T("E"));
continue;
}
for(i=0;i<wLen;i++)
abKey[i+1] = awKey[i] & 0xff;
abKey[0]=_T('\0');
for(i=0;i<wLen;i++)
abKey[0] |= (awKey[i] & 0x0100) >> (8-i);
bLen=(BYTE)wLen;
fwrite(&bLen,1,1,out);
fwrite(abKey,1,wLen+1,out);
//fwrite(szStr,1,wLen*2,out);
fwrite(szStr,1,wLen,out);
fwrite(&wAttrib,2,1,out);
}
}
fclose(stream);
fclose(out);
return (0);
}void _tmain(int argc,TCHAR **argv)
{
/*if(argc != 3) {
fprintf(stderr,"usage: %s <input_name> <output_name>\n",argv[0]);
return;
}*/
LoadTable();
//sim2lib(argv[1],argv[2]);
sim2lib(_T("D:\\vcProject\\sim2lib_24\\Debug\\simple.txt"),_T("D:\\vcProject\\sim2lib_24\\Debug\\a.dic"));
return;
}