这个算法精准度有问题,
比如: txt1:='1,2,3,4,5,6,7,8,';
txt2:='1,2,3,4,5,6,7,8,9,10';
结果确也是:100%如何达到这样的结果:
80%因为,我是以逗号分开的,要判断的是逗号间的数字,而且像,1和10这样的话是不按相似来算的.这个咋改?路过的高人帮看下
//相似度测试
function CompareStringsInPercent(Str1, Str2: string): byte;
type
TLink = array [0 .. 1] of byte;
var
tmpPattern: TLink;
PatternA, PatternB: array of TLink;
IndexA, IndexB, LengthStr: Integer;
begin
Result := 100;
// Building pattern tables
LengthStr :=Max(Length(Str1), Length(Str2));
for IndexA := 1 to LengthStr do
begin
if Length(Str1) >= IndexA then
begin
SetLength(PatternA, (Length(PatternA) + 1));
PatternA[Length(PatternA) - 1][0] := byte(Str1[IndexA]);
PatternA[Length(PatternA) - 1][1] := IndexA;
end;
if Length(Str2) >= IndexA then
begin
SetLength(PatternB, (Length(PatternB) + 1));
PatternB[Length(PatternB) - 1][0] := byte(Str2[IndexA]);
PatternB[Length(PatternB) - 1][1] := IndexA;
end;
end;
// Quick Sort of pattern tables
IndexA := 0;
IndexB := 0;
while ((IndexA < (Length(PatternA) - 1)) and (IndexB < (Length(PatternB) - 1))
) do
begin
if Length(PatternA) > IndexA then
begin
if PatternA[IndexA][0] < PatternA[IndexA + 1][0] then
begin
tmpPattern[0] := PatternA[IndexA][0];
tmpPattern[1] := PatternA[IndexA][1];
PatternA[IndexA][0] := PatternA[IndexA + 1][0];
PatternA[IndexA][1] := PatternA[IndexA + 1][1];
PatternA[IndexA + 1][0] := tmpPattern[0];
PatternA[IndexA + 1][1] := tmpPattern[1];
if IndexA > 0 then
Dec(IndexA);
end
else
Inc(IndexA);
end;
if Length(PatternB) > IndexB then
begin
if PatternB[IndexB][0] < PatternB[IndexB + 1][0] then
begin
tmpPattern[0] := PatternB[IndexB][0];
tmpPattern[1] := PatternB[IndexB][1];
PatternB[IndexB][0] := PatternB[IndexB + 1][0];
PatternB[IndexB][1] := PatternB[IndexB + 1][1];
PatternB[IndexB + 1][0] := tmpPattern[0];
PatternB[IndexB + 1][1] := tmpPattern[1];
if IndexB > 0 then
Dec(IndexB);
end
else
Inc(IndexB);
end;
end;
// Calculating simularity percentage
LengthStr := Min(Length(PatternA), Length(PatternB));
for IndexA := 0 to (LengthStr - 1) do
begin
if PatternA[IndexA][0] = PatternB[IndexA][0] then
begin
if Max(PatternA[IndexA][1], PatternB[IndexA][1]) - Min
(PatternA[IndexA][1], PatternB[IndexA][1]) > 0 then
Dec(Result, ((100 div LengthStr) div (Max(PatternA[IndexA][1],
PatternB[IndexA][1]) - Min(PatternA[IndexA][1],
PatternB[IndexA][1]))))
else if Result < 100 then
Inc(Result);
end
else
Dec(Result, (100 div LengthStr))
end;
SetLength(PatternA, 0);
SetLength(PatternB, 0);
end;
比如: txt1:='1,2,3,4,5,6,7,8,';
txt2:='1,2,3,4,5,6,7,8,9,10';
结果确也是:100%如何达到这样的结果:
80%因为,我是以逗号分开的,要判断的是逗号间的数字,而且像,1和10这样的话是不按相似来算的.这个咋改?路过的高人帮看下
//相似度测试
function CompareStringsInPercent(Str1, Str2: string): byte;
type
TLink = array [0 .. 1] of byte;
var
tmpPattern: TLink;
PatternA, PatternB: array of TLink;
IndexA, IndexB, LengthStr: Integer;
begin
Result := 100;
// Building pattern tables
LengthStr :=Max(Length(Str1), Length(Str2));
for IndexA := 1 to LengthStr do
begin
if Length(Str1) >= IndexA then
begin
SetLength(PatternA, (Length(PatternA) + 1));
PatternA[Length(PatternA) - 1][0] := byte(Str1[IndexA]);
PatternA[Length(PatternA) - 1][1] := IndexA;
end;
if Length(Str2) >= IndexA then
begin
SetLength(PatternB, (Length(PatternB) + 1));
PatternB[Length(PatternB) - 1][0] := byte(Str2[IndexA]);
PatternB[Length(PatternB) - 1][1] := IndexA;
end;
end;
// Quick Sort of pattern tables
IndexA := 0;
IndexB := 0;
while ((IndexA < (Length(PatternA) - 1)) and (IndexB < (Length(PatternB) - 1))
) do
begin
if Length(PatternA) > IndexA then
begin
if PatternA[IndexA][0] < PatternA[IndexA + 1][0] then
begin
tmpPattern[0] := PatternA[IndexA][0];
tmpPattern[1] := PatternA[IndexA][1];
PatternA[IndexA][0] := PatternA[IndexA + 1][0];
PatternA[IndexA][1] := PatternA[IndexA + 1][1];
PatternA[IndexA + 1][0] := tmpPattern[0];
PatternA[IndexA + 1][1] := tmpPattern[1];
if IndexA > 0 then
Dec(IndexA);
end
else
Inc(IndexA);
end;
if Length(PatternB) > IndexB then
begin
if PatternB[IndexB][0] < PatternB[IndexB + 1][0] then
begin
tmpPattern[0] := PatternB[IndexB][0];
tmpPattern[1] := PatternB[IndexB][1];
PatternB[IndexB][0] := PatternB[IndexB + 1][0];
PatternB[IndexB][1] := PatternB[IndexB + 1][1];
PatternB[IndexB + 1][0] := tmpPattern[0];
PatternB[IndexB + 1][1] := tmpPattern[1];
if IndexB > 0 then
Dec(IndexB);
end
else
Inc(IndexB);
end;
end;
// Calculating simularity percentage
LengthStr := Min(Length(PatternA), Length(PatternB));
for IndexA := 0 to (LengthStr - 1) do
begin
if PatternA[IndexA][0] = PatternB[IndexA][0] then
begin
if Max(PatternA[IndexA][1], PatternB[IndexA][1]) - Min
(PatternA[IndexA][1], PatternB[IndexA][1]) > 0 then
Dec(Result, ((100 div LengthStr) div (Max(PatternA[IndexA][1],
PatternB[IndexA][1]) - Min(PatternA[IndexA][1],
PatternB[IndexA][1]))))
else if Result < 100 then
Inc(Result);
end
else
Dec(Result, (100 div LengthStr))
end;
SetLength(PatternA, 0);
SetLength(PatternB, 0);
end;
那就先得到3个结果:
x=数值相同的个数
y=a多出来的数值个数
z=b多出来的数值个数相似度=f(x,y,z)