在C#中是否有字符串近似匹配的相关函数?
我在网上看到一个用C++写的字符串近似匹配的算法,但是里面的char* 在C#中如何解决呢,如何在char* 和 string 之间进行转化?还有那个程序要做怎样的修改?
程序:
char* amatch(const char* text, const char* pat, int k)
{
int m = strlen(pat);
assert(m-k>0);
assert((m-k)*(k+2)<= 64);
int j;
__int64 Din = 0;
__int64 M1 = 0;
__int64 M2 = 0;
__int64 M3 = 0;
__int64 G = 1 << k;
int onekp1 = (1 << (k+1)) - 1;
for (j=0; j<m-k; j++)
{
Din = (Din << (k+2))|onekp1;
M1 = (M1 << (k+2))|1;
if (j < m-k-1)
M2 = (M2 << (k+2)) | 1;
}
M2=(M2<<(k+2))|onekp1;
__int64 D=Din;
const char* s=text;
int c=*s++;
while(c)
{
int found=0;
const char* sp=pat;
for(j=0;j<k+1;j++)
{
int cp=*sp++;
if(c==cp)
{
found=1;
break;
}
}
if(found)
{
do
{
__int64 tc = 0;
const char* sp = pat;
for (j=0; j<m; j++)
{
int cp = *sp++;
if (c!=cp)
c|=(1<<j);
}
__int64 Tc = 0;
for (j=0; j<m-k; j++)
Tc = (Tc<<(k+2))|((tc>>j)&onekp1);
__int64 x = (D>>(k+2))|Tc;
D=((D<<1)|M1)&((D<<(k+3))|M2)&(((x+M1)^x)>>1)&Din;
if((D & G) == 0)
return (char*)s;
if(D != Din)
c = *s++;
}
while ( D != Din && c);
}
if (c)
c = *s++;
}
return NULL;
}
我在网上看到一个用C++写的字符串近似匹配的算法,但是里面的char* 在C#中如何解决呢,如何在char* 和 string 之间进行转化?还有那个程序要做怎样的修改?
程序:
char* amatch(const char* text, const char* pat, int k)
{
int m = strlen(pat);
assert(m-k>0);
assert((m-k)*(k+2)<= 64);
int j;
__int64 Din = 0;
__int64 M1 = 0;
__int64 M2 = 0;
__int64 M3 = 0;
__int64 G = 1 << k;
int onekp1 = (1 << (k+1)) - 1;
for (j=0; j<m-k; j++)
{
Din = (Din << (k+2))|onekp1;
M1 = (M1 << (k+2))|1;
if (j < m-k-1)
M2 = (M2 << (k+2)) | 1;
}
M2=(M2<<(k+2))|onekp1;
__int64 D=Din;
const char* s=text;
int c=*s++;
while(c)
{
int found=0;
const char* sp=pat;
for(j=0;j<k+1;j++)
{
int cp=*sp++;
if(c==cp)
{
found=1;
break;
}
}
if(found)
{
do
{
__int64 tc = 0;
const char* sp = pat;
for (j=0; j<m; j++)
{
int cp = *sp++;
if (c!=cp)
c|=(1<<j);
}
__int64 Tc = 0;
for (j=0; j<m-k; j++)
Tc = (Tc<<(k+2))|((tc>>j)&onekp1);
__int64 x = (D>>(k+2))|Tc;
D=((D<<1)|M1)&((D<<(k+3))|M2)&(((x+M1)^x)>>1)&Din;
if((D & G) == 0)
return (char*)s;
if(D != Din)
c = *s++;
}
while ( D != Din && c);
}
if (c)
c = *s++;
}
return NULL;
}
C#里不用指针
你的意思是把char* 改为string,然后在这个函数里用 string[0] 之类的方法来读? 谁能写一个从 char* ,string 互换的具体的函数?
近似匹配就是像一个字符串 “XXX0123456789XXX”,输入“01256”、“0189”、“4578”、“1789”等字符串,都能和原来的字符串匹配上,就是能找到 0123456789 这个子串
用正则表达式能表示这种匹配吗?
贴一段修改的代码
string amatch(string text,string pat, int k)
{
int m =pat.Length;
// assert(m-k>0);
// assert((m-k)*(k+2)<= 64);
int j;
Int64 Din = 0;
Int64 M1 = 0;
Int64 M2 = 0;
// Int64 M3 = 0;
Int64 G = 1 << k;
int onekp1 = (1 << (k+1)) - 1;
for (j=0; j<m-k; j++)
{
Din = (Din << (k+2))|onekp1;
M1 = (M1 << (k+2))|1;
if (j < m-k-1)
M2 = (M2 << (k+2)) | 1;
}
M2=(M2<<(k+2))|onekp1;
Int64 D=Din;
int s=0;
// const char* s=&(text[0]);
int c=text[s++];
// int c=*s++;
while(s<text.Length)
{
int found=0;
// const char* sp=&(pat[0]);
int sp=0;
for(j=0;j<k+1;j++)
{
// int cp=*sp++;
int cp=pat[sp++];
if(c==cp)
{
found=1;
break;
}
}
if(found!=0)
{
do
{
Int64 tc = 0;
// const char* sp = pat;
// int sp=0;
int sp1=0;
for (j=0; j<m; j++)
{
int cp = pat[sp1++];
if (c!=cp)
c|=(1<<j);
}
Int64 Tc = 0;
for (j=0; j<m-k; j++)
Tc = (Tc<<(k+2))|((tc>>j)&onekp1);
Int64 x = (D>>(k+2))|Tc;
D=((D<<1)|M1)&((D<<(k+3))|M2)&(((x+M1)^x)>>1)&Din;
if((D & G) == 0)
{
return text.Substring(s,text.Length-s);
// return (char*)s;
}
if(D != Din)
c = text[s++];
}
while ( (D !=0 && ( Din!=0 && c!=0 ))||(D == 0 && !( Din!=0 && c!=0 )));
// while ( D != ( Din && c ) )
}
if (s<text.Length)
{
c = text[s++];
}
}
return null;
}