我用delphi写了一个统计单词的程序:我已经能够统计出文本里有多少个英文单词了,可是我无论如何也高不明白怎样将重复的单词去掉,无法实现只计算不重复的单词。下面是我的代码,跪请高手们一定帮我这个忙,谢谢了
unit Unit1;interfaceuses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, ComCtrls;type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
Button2: TButton;
OpenDialog1: TOpenDialog;
Button3: TButton;
Label1: TLabel;
Label2: TLabel;
procedure FormShow(Sender: TObject);
procedure Button1Click(Sender: TObject);
procedure Button2Click(Sender: TObject);
procedure Button3Click(Sender: TObject);
procedure FormCreate(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;var
Form1: TForm1;
oString:Tstream;
wordcount:integer;
a:array[0..1000] of string;
i,j:integer;implementation{$R *.dfm}procedure TForm1.FormShow(Sender: TObject);
begin
wordcount:=0;
i:=0;
j:=0;
a[0]:=' ';
end;procedure TForm1.Button1Click(Sender: TObject);
label outer;
//使用goto语句必须在此用label声明,且一旦声明就必须使用
begin
oString := TStringStream.Create(Memo1.Lines.text);
with TParser.Create(oString) do
try
while(Token <> toEOF) do
begin
if Token = toSymbol then
begin
for j:=0 to 1000 do //问题可能出在toSymbol
begin
if toSymbol=a[j] then
goto outer;
end;
a[i]:=toSymbol; //我想用一个数组来接收Memo中的所有单词
Inc(i);
outer:
Inc(wordcount);
end;
NextToken;
end;
finally
Free;
oString.Free;
end;
Label1.caption:=('单词共:'+inttostr(wordcount)+'个');
Label2.caption:=('不同的单词共:'+inttostr(i)+'个');
wordcount:=0; //清零
i:=0; //清零
end;
procedure TForm1.Button2Click(Sender: TObject);
begin
opendialog1.Filter:='文本文件(*.txt)|*.txt';
if opendialog1.Execute then
memo1.Lines.LoadFromFile(opendialog1.FileName);
end;procedure TForm1.Button3Click(Sender: TObject);
begin
Memo1.Lines.Clear;
Label1.Caption:=' ';
Label2.Caption:=' ';
end;procedure TForm1.FormCreate(Sender: TObject);
begin
Memo1.Lines.Clear;
end;end.
unit Unit1;interfaceuses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, ComCtrls;type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
Button2: TButton;
OpenDialog1: TOpenDialog;
Button3: TButton;
Label1: TLabel;
Label2: TLabel;
procedure FormShow(Sender: TObject);
procedure Button1Click(Sender: TObject);
procedure Button2Click(Sender: TObject);
procedure Button3Click(Sender: TObject);
procedure FormCreate(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;var
Form1: TForm1;
oString:Tstream;
wordcount:integer;
a:array[0..1000] of string;
i,j:integer;implementation{$R *.dfm}procedure TForm1.FormShow(Sender: TObject);
begin
wordcount:=0;
i:=0;
j:=0;
a[0]:=' ';
end;procedure TForm1.Button1Click(Sender: TObject);
label outer;
//使用goto语句必须在此用label声明,且一旦声明就必须使用
begin
oString := TStringStream.Create(Memo1.Lines.text);
with TParser.Create(oString) do
try
while(Token <> toEOF) do
begin
if Token = toSymbol then
begin
for j:=0 to 1000 do //问题可能出在toSymbol
begin
if toSymbol=a[j] then
goto outer;
end;
a[i]:=toSymbol; //我想用一个数组来接收Memo中的所有单词
Inc(i);
outer:
Inc(wordcount);
end;
NextToken;
end;
finally
Free;
oString.Free;
end;
Label1.caption:=('单词共:'+inttostr(wordcount)+'个');
Label2.caption:=('不同的单词共:'+inttostr(i)+'个');
wordcount:=0; //清零
i:=0; //清零
end;
procedure TForm1.Button2Click(Sender: TObject);
begin
opendialog1.Filter:='文本文件(*.txt)|*.txt';
if opendialog1.Execute then
memo1.Lines.LoadFromFile(opendialog1.FileName);
end;procedure TForm1.Button3Click(Sender: TObject);
begin
Memo1.Lines.Clear;
Label1.Caption:=' ';
Label2.Caption:=' ';
end;procedure TForm1.FormCreate(Sender: TObject);
begin
Memo1.Lines.Clear;
end;end.
procedure TForm1.Button4Click(Sender: TObject);
label outer;
//使用goto语句必须在此用label声明,且一旦声明就必须使用
var
mystr:string;
begin
for j:=0 to 1000 do
begin
a[j] := ''; //初始化数组
end; oString := TStringStream.Create(Memo1.Lines.text);
with TParser.Create(oString) do
try
MyStr := TokenString;
if (TokenSymbolIs(TokenString)) then
begin
a[0]:=MyStr; //我想用一个数组来接收Memo中的所有单词
Inc(i);
Inc(wordcount);
end; while(Token <> toEOF) do
begin
//if Token = toString then
NextToken;
if (TokenSymbolIs(TokenString)) then
begin
MyStr := TokenString;
for j:=0 to 1000 do //问题可能出在toSymbol
begin
if MyStr=a[j] then
goto outer;
end;
a[i]:=MyStr; //我想用一个数组来接收Memo中的所有单词
Inc(i);
outer:
Inc(wordcount);
end;
end;
finally
Free;
oString.Free;
end;
Label1.caption:=('单词共:'+inttostr(wordcount)+'个');
Label2.caption:=('不同的单词共:'+inttostr(i)+'个');
wordcount:=0; //清零
i:=0; //清零
end;
楼主应该好好考虑一下,
而是构造一个完全二叉树,利用二叉树的特性,来实现单词查找比较,词频统计,不也错的.
var
I: Integer;
vStringStream: TStringStream;
vWords: TStringList;
vStr: string;
begin
I := 0;
vStringStream := TStringStream.Create(Memo1.Lines.text);
vWords := TStringList.Create;
with TParser.Create(vStringStream) do try
vWords.Sorted := True;
while(Token <> toEOF) do
begin
if TokenSymbolIs(TokenString) then
begin
vStr := TokenString;
if vWords.IndexOf(vStr) < 0 then // 如果单词已经添加了
vWords.Add(vStr);
Inc(I);
end;
NextToken;
end; Label1.Caption := Format('单词共: %d个', [I]);
Label2.Caption := Format('不同的单词共: %d个', [vWords.Count]);
//show Memo2.Lines.Assign(vWords);
finally
vWords.Free;
Free;
vStringStream.Free;
end;
end;
No.1 尽量别用goto语句
No.2 可以利用TStringList实现快速定位
错误好像是出在了while(Token <> toEOF) do这一行代码上,请教各位为什么会这样呢?
//note: TParser is designed for DFM's so that toString only works with
//'single quoted' strings也就是说TParser这个为DFM解析而写的类,关于'号的处理,有有特殊要求的,那就是要成对出现,并且两个''之字符,将认为是字符串.也就是说,在用来解析普通字符串时,会因为如Teacher's day这样的句中,
因为只有一个',那是不完整的字符串表达式,是无效的表达式,
所以必然产生解析异常;若改为 Teacher''s day 这样,可以避免这种错(Invalid string constant on line 1);如果改成Teacher'abc's day;将解析出四个字符串
Teacher
abc
s
day结论:
当楼主要用TParser 类开统计词频时,确实需要处理这种情况;嘿嘿,处理这种一个'号的情况,楼主可以另外再写一个算法,检查字符串中,如果只有一个',那就再添加一个',再去用TParser 类解析字符串
[超级大笨狼]提供三万单词库备份下载,MS-SQL 2000格式,下载请“自觉”捐赠可用分给我。