就是读入一篇英文文章,然后把用到的单词都罗列出来,并保存成.TXT文件,但是重复的不要,不然一篇文章里出现10个IS,也存10个IS的话,那就太搞笑了,大家帮帮忙,这个问题对我来说,够困难的.
大家能讲的详细点吗,我是编程新手
大家能讲的详细点吗,我是编程新手
解决方案 »
- DBGridEh checkboxes 问题
- 怎么样截获窗口移动的消息,不用wm_nchittest消息
- 我搞不定它( TidHttp )
- 超难问题!ADO+Excel
- 有些控件总是在最上面,比方说webbrowser,我想把image1放在webbrowser上面,如何实现?
- 我想了很长时间才决定把这个帖子放上来,因为是在网路上,Delphi版也有很多朋友,所以我也就没有什么顾虑地畅所欲言。
- 如何获得当前活动的窗口,并关掉多余的以打开的窗口?
- 动态控件的问题进来看看哟
- delphi MSConnection,mstable,msquery是什么控件,给发一份
- DBlookupcombobox等的问题?
- DBGrid上显示图片?
- ExpressQuantum 系列门产品开发技术专区(大家有好的意见问题在这里提)
procedure GetTokens(const SourceFile, TokenFile : string);
var
slSource, slToken : TStringList;
token : string;
xtext : AnsiString;
i : Integer;
const
alpha=['a'..'z','A'..'Z'];
begin
if not FileExists(SourceFile) then Exit;
slSource := TStringList.Create;
slToken := TStringList.Create;
slToken.Sorted := True; //important try
slSource.LoadFromFile(SourceFile);
xtext := slSource.Text;
for i := 1 to Length(xtext) do begin
if xtext[i] in alpha then begin
token := token + xtext[i];
end else if xtext[i] in ['_',#10,#13] then begin //break by line,maybe delete
continue;
end else begin
if (token<>'') and (slToken.IndexOf(token) = -1) then slToken.Add(token);
token :='';
end;
end;
slToken.SaveToFile(TokenFile);
finally
slToken.Free;
slSource.Free;
end;
end;
var
ss: WideString;
i, St: integer;
function IsDelimiter(const Delimiters, c: string): Boolean;
begin
result := StrScan(PChar(Delimiters), c[1]) <> nil;
end;
begin
Result := TStringList.Create;
with Result do
begin
Clear;
Sorted := True;
Duplicates := dupIgnore;
end;
if Length(Str) < 1 then exit;
ss := Str;
St := -1;
for i := 1 to Length(ss) do
if IsDelimiter(Delimiters, ss[i]) then
begin
if St <> -1 then
begin
Result.Add(Copy(ss, St, i - St));
St := -1;
end
end else
if St = -1 then St := i;
if St <> -1 then Result.Add(Copy(ss, St, Length(Str)));
end;//使用用法
with SplitEx(Memo1.Text, ',. ?' + #13#10) do
try
SaveToFile('c:\temp_demo.txt');
finally
Free;
end;
我只是感觉用伪码表达算法有时还不如用代码表达,我说两分钟的意思是我不保证代码是正确的,这只是一个思路——即便我是吹牛了,也比只说风凉话的强。
procedure TForm1.Button1Click(Sender: TObject);
begin
GetTokens('f:\q1.1.html','f:\a.txt');
end;源文件:<!-- This collection of hypertext pages is Copyright 1995 by Steve Summit. -->
<!-- Content from the book "C Programming FAQs: Frequently Asked Questions" -->
<!-- (Addison-Wesley, 1995, ISBN 0-201-84519-9) is made available here by -->
<!-- permission of the author and the publisher as a service to the community. -->
<!-- It is intended to complement the use of the published text -->
<!-- and is protected by international copyright laws. The content is -->
<!-- made available here and may be accessed freely for personal use -->
<!-- but may not be published or retransmitted without written permission. -->
结果:a
accessed
Addison
and
as
Asked
author
available
be
book
but
by
C
collection
community
complement
Content
Copyright
FAQs
for
freely
Frequently
from
here
hypertext
intended
international
is
ISBN
It
laws
made
may
not
of
or
pages
permission
personal
Programming
protected
published
publisher
Questions
retransmitted
service
Steve
Summit
text
the
This
to
use
Wesley
without
written
procedure TForm1.GetWord(sFName: string);
var
ts: TStringList;
s, sa: string;
i, iIndex: Integer;
begin
Memo1.Lines.LoadFromFile(sFName);
s := Memo1.Text;
sa := '';
ts := TStringList.Create;
try
for i := 1 to Length(s) + 1 do
if s[i] in ['A'..'Z', 'a'..'z'] then
sa := sa + s[i]
else if sa <> '' then
begin
if ts.IndexOf(sa) = -1 then
ts.AddObject(sa, TObject(1))
else
begin
iIndex := ts.IndexOf(sa);
ts.Objects[iIndex] := TObject(Integer(ts.Objects[iIndex]) + 1);
end;
sa := '';
end;
for i := 0 to ts.Count - 1 do
ts.Strings[i] := ts.Strings[i] + ' -' + IntToStr(Integer(ts.Objects[i]));
ts.Sorted := true;
Memo2.Lines.Assign(ts);
finally
ts.Free;
end;
end;