如果用BMP算法,需要提供的参数有源字符串和目标字符串,但是源目标字符串有10M,恐怕不行,如果用strPos函数,则需要为这个文本文件buffer分配10M空间,然后再用一个pChar指针指向它,但是给buffer分配10M的空间是否会出错呢,有人能指点一下吗
解决方案 »
- 兄弟们有问题了高手请看。。。。。。。。。。。。。。。。。。。
- delphi下intraweb开发的exe文档如何在iis下面发布?
- 跟潮流,散200祝大家新年愉快!狗年行狗运!
- 问题:如何自己定义一个新的键盘布局?用delphi7可以实现吗?
- 如何在delphi下快速查看project1.dpr的内容?
- paradox数据库能在局域网上让多机远程访问吗?如果可以,怎么实现呢?
- 请求高手帮忙!究竟出了什么问题 ?如何释放DLL 中的MDI 窗体 及DLL
- 我用DBGRID新增一条记录因重复键不能保存,而当我取消时,如何将DBGRID中新增的那行去掉?cancel没反应,delete又会将我调出修改的记录取消时删掉.
- 我有个幼稚的问题:如何送分?
- 哪位高手能给我一个用delphi写的proxy程序,我把我的分儿全给他!
- 如何用delphi 建立一个类,再使用对象?
- dhtml北景图片怎么插入的.
看例子
var
Buf : String;
Key : String;
FP : PChar;
begin
Buf := '';//改成你的读取过程
Key := '查询字符串';
//首先得判断一下查询和被查询字符串是否为空
FP := StrPos(@Buf[1],@Key[1]);
.......
end;
unit Unit1;interfaceuses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, StrUtils;type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;var
Form1: TForm1;implementation{$R *.dfm}
function KMP(tent,path:String;StartPos:Integer=1;EndPos:Integer=-1):Integer;
var
f:array of Integer;
t,i,j,p:Integer;
lent:Integer;
lenp:Integer;
begin
if EndPos>0 then
Lent:=EndPos
else
lent:=Length(tent);
lenp:=Length(path);
SetLength(f,lenp+1);
f[1]:=0;
for j:=2 to lenp do
begin
i:=f[j-1];
while (path[j]<>path[i+1])and (i>0) do i:=f[i];
if path[j]=path[i+1] then
f[j]:=i+1
else
f[j]:=0;
end;
t:=StartPos;
p:=1;
while (t<=lent) and (p<=lenp) do
begin
if tent[t]=path[p] then
begin
Inc(t);
Inc(p);
end
else if p=1 then
Inc(t)
else p:=f[p-1]+1;
end;
if p<lenp then
Result:=-1
else
Result:=t-lenp;
end;function FindStrInFile(Str,FileName:String;var Positions:Array of Int64):Integer;
var
FS:TFileStream;
Buffer:String;
i,ReadBegin,ReadSize,ReadLen:Integer;
BeginPos:Int64;
SLen:Integer;
begin
Result:=0;
SLen:=Length(Str);
ReadBegin:=1;
ReadSize:=SLen*100;
BeginPos:=0;
SetLength(Buffer,ReadSize*2);
FS:=TFileStream.Create(FileName,fmOpenRead or fmShareDenyNone); try
while Result<High(Positions) do
begin
ReadLen:=FS.Read(Buffer[ReadBegin],ReadSize);
if ReadLen>0 then
begin
i:=1;
repeat
i:=KMP(Buffer,Str,i,ReadBegin+ReadLen-1);
if i>0 then
begin
Positions[Result]:=BeginPos+i;
Inc(Result);
Inc(i,SLen);
end;
until i<0;
ReadSize:=SLen*99+1;
if FS.Size-FS.Position<ReadSize then ReadSize:=FS.Size-FS.Position;
ReadBegin:=100*SLen-ReadSize;
Move(Buffer[ReadSize+1],Buffer[1],ReadBegin);
BeginPos:=BeginPos+ReadSize;
end
else
Break;
end;
finally
FS.Free;
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
var
ps:Array[0..100] of Int64;//搜索匹配窜的确位置序列
i,n:Integer;
begin
n:=FindStrInFile('xml',ExtractFilePath(Application.ExeName)+'QTDB.TXT',ps);
for i:=0 to n-1 do
begin
Memo1.Lines.Add('第'+IntToStr(i)+'位置在:'+IntToStr(ps[i]));
end;
end;end.
然后你又在问题里说“如果用BMP算法,需要提供的参数有源字符串和目标字符串,但是源目标字符串有10M,恐怕不行”。为什么不行?KMP根本是不需要回溯的。而且,你就算用strpos,照样要把那10M从头到尾过一遍。那么,你说那话的原因很清楚,你把那个10M的文件当做模式串了。
下面的话证实了我的判断“KMP算法要源字符串作参数,很难想象源字符串有10M大小”所以,你完全可以用KMP,只是要先把概念弄明白。
http://mhss.nease.net/string/KMP.html
begin
// PosEx in D7.StrUtils
Result := PosEx(ASub, ASource, StartPos);
end;
type
TFindPos = function(const ASource, ASub: string; StartPos: Integer): Integer;procedure TForm1.CompareButtonClick(Sender: TObject); function StringFromFile(const FileName: string): string;
var
FileSize: Integer;
begin
with TMemoryStream.Create do
try
LoadFromFile(FileName);
FileSize := Size;
SetLength(Result, FileSize);
Move(Memory^, Result[1], FileSize);
finally
Free;
end;
end;
procedure Search(const ASource, ASub, MethodName: string; SearchPos: TFindPos);
var
S1, E1: DWORD;
S2, E2: Int64;
SubLen, StartPos, SearchCount: Integer;
begin
S1 := GetTickCount;
QueryPerformanceCounter(S2);
StartPos := 1;
SearchCount := 0;
SubLen := Length(ASub);
StartPos := SearchPos(ASource, ASub, StartPos);
while StartPos <> 0 do
begin
Inc(SearchCount);
Inc(StartPos, SubLen);
StartPos := SearchPos(ASource, ASub, StartPos);
end;
QueryPerformanceCounter(E2);
E1 := GetTickCount;
MsgMemo.Lines.Add(Format('%20.20s GetTick Time: %10.10d, Query Counter: %10.10d, Search: %d',
[MethodName, E1 - S1, E2 - S2, SearchCount]));
end;var
Source, Find: string;
begin
Find := Trim(FindEdit.Text);
if Find = '' then
raise Exception.Create('嗯,找到N多,俺就不说了。');
Source := StringFromFile(FileEdit.Text);
try
Search(Source, Find, 'FindPosWithPosEx', FindPosWithSys);
finally
SetLength(Source, 0);
end;
end;
TFindPos = function(const ASource, ASub: string; StartPos: Integer): Integer;
然后套到上面的
try
Search(Source, Find, 'FindPosWithKMP', FindPosWithKMP);
finally
SetLength(Source, 0);
end;看看KMP跟PosEx快多少
unit Unit1;interfaceuses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, StrUtils;type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;var
Form1: TForm1;implementation{$R *.dfm}
function KMP(tent,path:String;StartPos:Integer=1;EndPos:Integer=-1):Integer;
var
f:array of Integer;
t,i,j,p:Integer;
lent:Integer;
lenp:Integer;
begin
if EndPos>0 then
Lent:=EndPos
else
lent:=Length(tent);
lenp:=Length(path);
SetLength(f,lenp+1);
f[1]:=0;
for j:=2 to lenp do
begin
i:=f[j-1];
while (path[j]<>path[i+1])and (i>0) do i:=f[i];
if path[j]=path[i+1] then
f[j]:=i+1
else
f[j]:=0;
end;
t:=StartPos;
p:=1;
while (t<=lent) and (p<=lenp) do
begin
if tent[t]=path[p] then
begin
Inc(t);
Inc(p);
end
else if p=1 then
Inc(t)
else p:=f[p-1]+1;
end;
if p<lenp then
Result:=-1
else
Result:=t-lenp;
end;function FindStrInFile(Str,FileName:String;var Positions:Array of Int64):Integer;
const
BUFFERBLOCK=10000;//调整这个可以加快文件读取进度
var
FS:TFileStream;
Buffer:String;
i,ReadBegin,ReadSize,ReadLen:Integer;
BeginPos:Int64;
SLen:Integer;
begin
Result:=0;
SLen:=Length(Str);
ReadBegin:=1;
ReadSize:=SLen*BUFFERBLOCK;
BeginPos:=0;
SetLength(Buffer,ReadSize);
FS:=TFileStream.Create(FileName,fmOpenRead or fmShareDenyNone);
try
while Result<High(Positions) do
begin
ReadLen:=FS.Read(Buffer[ReadBegin],ReadSize);
if ReadLen>0 then
begin
i:=1;
repeat
i:=KMP(Buffer,Str,i,ReadBegin+ReadLen-1);
if i>0 then
begin
Positions[Result]:=BeginPos+i;
Inc(Result);
Inc(i,SLen);
end;
until i<0;
ReadSize:=(BUFFERBLOCK-1)*SLen+1;
if FS.Size-FS.Position<ReadSize then ReadSize:=FS.Size-FS.Position;
ReadBegin:=BUFFERBLOCK*SLen-ReadSize+1;
Move(Buffer[ReadSize+1],Buffer[1],ReadBegin-1);
BeginPos:=BeginPos+ReadSize;
end
else
Break;
end;
finally
FS.Free;
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
var
ps:Array[0..100] of Int64;//搜索匹配窜的确位置序列
i,n:Integer;
begin
n:=FindStrInFile('xml',ExtractFilePath(Application.ExeName)+'QTDB.TXT',ps);
for i:=0 to n-1 do
begin
Memo1.Lines.Add('第'+IntToStr(i)+'位置在:'+IntToStr(ps[i]));
end;
end;end.
function PosEx(const SubStr, S: string; Offset: Cardinal = 1): Integer;
var
I,X: Integer;
Len, LenSubStr: Integer;
begin
if Offset = 1 then
Result := Pos(SubStr, S)
else
begin
I := Offset;
LenSubStr := Length(SubStr);
Len := Length(S) - LenSubStr + 1;
while I <= Len do
begin
if S[I] = SubStr[1] then
begin
X := 1;
while (X < LenSubStr) and (S[I + X] = SubStr[X + 1]) do
Inc(X);
if (X = LenSubStr) then
begin
Result := I;
exit;
end;
end;
Inc(I);
end;
Result := 0;
end;
end;
你将你的KMP写成: FindPos(const ASource, ASub: string; StartPos: Integer): Integer;格式,然后代入那上面的代码去比较一下。我只想比较一下,不管哪种写法。
另:刚才改了你的KMP函数,还真是吓我一跳,丢到我的那函数中,好像死掉了,郁闷,还是你来吧。
const
MAX_CHAR = 256;
SizeInt = SizeOf(Integer);
type
PByteArr = ^TByteArr;
TByteArr = array [0..MaxInt - 1] of Byte;
var
Src, Sub: PByte;
I, J, CurrPos, SubLen, SrcLen: Integer;
Buffer: array [0..MAX_CHAR - 1] of Integer;
begin
Result := 0;
SubLen := Length(ASub);
SrcLen := Length(ASource);
if SubLen > SrcLen then Exit; Sub := PByte(ASub);
Src := PByte(ASource);
for I := 0 to MAX_CHAR - 1 do
Buffer[I] := SubLen;
for I := 0 to SubLen - 2 do
Buffer[PByteArr(Sub)^[I]] := SubLen - I - 1; CurrPos := SubLen + StartPos - 2;
while CurrPos < SrcLen do
begin
I := CurrPos;
J := SubLen - 1;
while (J >= 0) and ((PByteArr(Src)^[I] = PByteArr(Sub)^[J])) do
begin
Dec(J);
Dec(I);
end;
if -1 = J then
begin
Result := CurrPos - SubLen + 1;
break;
end;
Inc(CurrPos, Buffer[PByteArr(Src)^[CurrPos]]);
end;
end;function KMP(const ASource, ASub: string; StartPos: Integer = 1):Integer;
var
F: array of Integer;
I, J, SrcLen, SubLen: Integer;
begin
Result := 0;
SubLen := Length(ASub);
SetLength(f,SubLen + 1);
SrcLen := Length(ASource);
F[0] := 0;
F[1] := 0; I := 1; J := 0;
FillChar(F[0], SizeOf(Integer) * (SubLen + 1), 0);
while I < SubLen do
if (J = 0) or (ASub[I] = ASub[J]) then
begin
Inc(I);
Inc(J);
F[I] := J;
end else
J := F[J]; J := 1;
I := StartPos;
while (I <= SrcLen) and (J <= SubLen) do
begin
if ASource[I] = ASub[J] then
begin
Inc(I);
Inc(J);
end
else
if J = 1 then
Inc(I)
else
J := F[J - 1] + 1;
if J > SubLen then
begin
Result := I - SubLen;
break;
end;
end;
end;function FindPosWithKMP(const ASource, ASub: string; AStartPos: Integer): Integer;
begin
Result := KMP(ASource, ASub, AStartPos);
end;
>>如果用BMP算法,需要提供的参数有源字符串和目标字符串,但是源目标字符串有10M,恐怕不
>>行,如果用strPos函数,则需要为这个文本文件buffer分配10M空间,然后再用一个pChar指针
>>指向它,但是给buffer分配10M的空间是否会出错呢,有人能指点一下吗用发贴内容生成的文件有10M,查找的内容是:buffer
测试的结果是:
FindPosWithBM GetTick Time: 0000000047, Query Counter: 0000194279, Search: 88318
FindPosWithSys GetTick Time: 0000000031, Query Counter: 0000125786, Search: 88318
FindPosWithKMP GetTick Time: 0000000125, Query Counter: 0000474927, Search: 88318FindPosWithBM GetTick Time: 0000000063, Query Counter: 0000217063, Search: 88318
FindPosWithSys GetTick Time: 0000000047, Query Counter: 0000190896, Search: 88318
FindPosWithKMP GetTick Time: 0000000110, Query Counter: 0000419372, Search: 88318FindPosWithBM GetTick Time: 0000000078, Query Counter: 0000234192, Search: 88318
FindPosWithSys GetTick Time: 0000000047, Query Counter: 0000150726, Search: 88318
FindPosWithKMP GetTick Time: 0000000140, Query Counter: 0000482356, Search: 88318procedure TForm1.AddFileButtonClick(Sender: TObject); procedure AddFile(FileName: string);
const
TEST_SIZE = 10024 * 1000;
var
Buffer: Pointer;
FileSize, Count: Integer;
begin
with TFileStream.Create(FileName, fmOpenReadWrite) do
try
FileSize := Size;
Count := FileSize;
GetMem(Buffer, Count);
try
ReadBuffer(Buffer^, Count);
Inc(FileSize, Write(Buffer^, Count));
while TEST_SIZE > FileSize do
Inc(FileSize, Write(Buffer^, Count));
Caption := Format('文件大小为:%d', [FileSize]);
finally
FreeMem(Buffer);
end;
finally
Free;
end;
end;
begin
AddFile(FileEdit.Text);
end;
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, StrUtils, Math, ZLib;type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;var
Form1: TForm1;implementation{$R *.dfm}
function KMP(tent,path:String;StartPos:Integer=1;EndPos:Integer=-1):Integer;
var
f:array of Integer;
t,i,j,p:Integer;
lent:Integer;
lenp:Integer;
begin
if EndPos>0 then
Lent:=EndPos
else
lent:=Length(tent);
lenp:=Length(path);
SetLength(f,lenp+1);
f[1]:=0;
for j:=2 to lenp do
begin
i:=f[j-1];
while (path[j]<>path[i+1])and (i>0) do i:=f[i];
if path[j]=path[i+1] then
f[j]:=i+1
else
f[j]:=0;
end;
t:=StartPos;
p:=1;
while (t<=lent) and (p<=lenp) do
begin
if tent[t]=path[p] then
begin
Inc(t);
Inc(p);
end
else if p=1 then
Inc(t)
else p:=f[p-1]+1;
end;
if p<lenp then
Result:=-1
else
Result:=t-lenp;
end;
procedure FindStrInFile(Str,FileName:String;out Positions:Pointer;out GetPosNum:Integer);
const
BUFFERBLOCK=10000;//调整这个可以加快文件读取进度
var
FS:TFileStream;
Buffer:String;
i,ReadBegin,ReadSize,ReadLen:Integer;
BeginPos,Count:Int64;
SLen:Integer;
begin
Count:=0;
GetPosNum:=BUFFERBLOCK;
GetMem(Positions,SizeOf(Int64)*GetPosNum);
SLen:=Length(Str);
ReadBegin:=1;
ReadSize:=SLen*BUFFERBLOCK;
BeginPos:=0;
SetLength(Buffer,ReadSize);
FS:=TFileStream.Create(FileName,fmOpenRead or fmShareDenyNone);
try
try
while True do
begin
ReadLen:=FS.Read(Buffer[ReadBegin],ReadSize);
if ReadLen>0 then
begin
i:=1;
repeat
i:=KMP(Buffer,Str,i,ReadBegin+ReadLen-1);
if i>0 then
begin
if Count>GetPosNum then
begin
Inc(GetPosNum,BUFFERBLOCK*SizeOf(Int64));
ReallocMem(Positions,GetPosNum);
end;
PInt64(Integer(Positions)+Count*SizeOf(Int64))^:=BeginPos+i;
Inc(Count);
Inc(i,SLen);
end;
until i<0;
if ReadLen<ReadSize then Break;
if Count>0 then
ReadSize:=Max((BUFFERBLOCK-1)*SLen+1,i+SLen)
else
ReadSize:=(BUFFERBLOCK-1)*SLen+1;
if FS.Size-FS.Position<ReadSize then ReadSize:=FS.Size-FS.Position;
ReadBegin:=BUFFERBLOCK*SLen-ReadSize+1;
Move(Buffer[ReadSize+1],Buffer[1],ReadBegin-1);
BeginPos:=BeginPos+ReadSize;
end
else
Break;
end;
GetPosNum:=Count;
ReallocMem(Positions,GetPosNum*SizeOf(Int64));
except
on E:Exception do
begin
GetPosNum:=0;
FreeMem(Positions);
end;
end;
finally
FS.Free;
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
var
ps:Pointer;//搜索匹配窜的确位置序列
s,e,i,n:Integer;
begin
s:=GetTickCount;
FindStrInFile('xml is a good document!',ExtractFilePath(Application.ExeName)+'QTDB2.TXT',ps,n);
e:=GetTickCount;
Memo1.Lines.Add('本次扫描耗时'+IntToStr(e-s)+'毫秒');
Memo1.Lines.Add('扫描到'+IntToStr(n)+'个结果');
for i:=0 to n-1 do
begin
Memo1.Lines.Add('第'+IntToStr(i+1)+'位置在:'+IntToStr(PInt64(Integer(ps)+i*SizeOf(Int64))^));
end;
FreeMem(ps);
end;end.
一般是用HASH做,我也想研究一下,不过,感觉有点不自量力。
如果文件的1023处有abcde,那么按照你们的方法是找不到的!
lz可以在MSDN里查MapViewOfFile
我好像没使用buf吧,就是使用了buf,那是调用者的问题,而不是算法本身的问题。Windows N多API,我没调用好,那是自己的问题,关MS什么事啊。