<th scope="col">快件单号</th><th scope="col">操作时间</th><th scope="col">快件流程</th>
</tr><tr>
<td>2162335213</td><td>2009-9-4 20:19:23</td><td>浙江东阳/快件揽收扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-4 20:32:22</td><td>浙江东阳/上车扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 5:02:41</td><td>无锡分拨中心/下车扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 5:15:03</td><td>无锡分拨中心/建包扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 7:09:14</td><td>江苏张家港/拆包扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 10:46:27</td><td>江苏张家港/业务员派送扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 18:15:43</td><td>江苏张家港/签收扫描,签收人:鲁</td>
</tr>
</table>
</div>
</td>
</tr>
</table><br />我要过滤掉一些html标签就留下如下文本信息 该如何写呢 快件单号 操作时间 快件流程
2162335213 2009-9-4 20:19:23 浙江东阳/快件揽收扫描
2162335213 2009-9-4 20:32:22 浙江东阳/上车扫描
2162335213 2009-9-5 5:02:41 无锡分拨中心/下车扫描
2162335213 2009-9-5 5:15:03 无锡分拨中心/建包扫描
2162335213 2009-9-5 7:09:14 江苏张家港/拆包扫描
2162335213 2009-9-5 10:46:27 江苏张家港/业务员派送扫描
2162335213 2009-9-5 18:15:43 江苏张家港/签收扫描,签收人:鲁
</tr><tr>
<td>2162335213</td><td>2009-9-4 20:19:23</td><td>浙江东阳/快件揽收扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-4 20:32:22</td><td>浙江东阳/上车扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 5:02:41</td><td>无锡分拨中心/下车扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 5:15:03</td><td>无锡分拨中心/建包扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 7:09:14</td><td>江苏张家港/拆包扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 10:46:27</td><td>江苏张家港/业务员派送扫描</td>
</tr><tr>
<td>2162335213</td><td>2009-9-5 18:15:43</td><td>江苏张家港/签收扫描,签收人:鲁</td>
</tr>
</table>
</div>
</td>
</tr>
</table><br />我要过滤掉一些html标签就留下如下文本信息 该如何写呢 快件单号 操作时间 快件流程
2162335213 2009-9-4 20:19:23 浙江东阳/快件揽收扫描
2162335213 2009-9-4 20:32:22 浙江东阳/上车扫描
2162335213 2009-9-5 5:02:41 无锡分拨中心/下车扫描
2162335213 2009-9-5 5:15:03 无锡分拨中心/建包扫描
2162335213 2009-9-5 7:09:14 江苏张家港/拆包扫描
2162335213 2009-9-5 10:46:27 江苏张家港/业务员派送扫描
2162335213 2009-9-5 18:15:43 江苏张家港/签收扫描,签收人:鲁
begin
pos定位'<'得到位置x,
if (x<1) then
break;
再定位'>'得到位置y,
delete(s,x,y-x+1);
end;
uses mshtml;function TMainForm.GetTable: Integer;
var
s, ss: string;
procedure FillData(sData: TTableString; TableIndex: Integer = 1);
var
i,j, RowCount, CellCount : Integer;
ovTable: OleVariant;
begin
try
try
ss := '';
// tableindex是table所在页面的索引值,你自己试试是第几个
ovTable := WebBrowser1.OleObject.Document.all.tags('TABLE').item(TableIndex);
RowCount := ovTable.Rows.Length;
CellCount := ovTable.Rows.Item(0).Cells.Length;
for i := 0 to (ovTable.Rows.Length - 1) do
begin
for j := 0 to (ovTable.Rows.Item(i).Cells.Length - 1) do
begin
s := ovTable.Rows.Item(i).Cells.Item(j).InnerText; if ss='' then
ss := ss + s
else
ss := ss + s + ',';
end;
ss := ss + #13#10
end;
Memo1.Lines.Add(ss);
ss := ''; Result := ovTable.Rows.Length;
except
Result := 0;
end;
finally end; end;
function G(const s: String; var b: Boolean): String;//uses ComObj
var
o, m: Variant;
i: Integer;
r: String;
begin
b := False;
o := CreateOleObject('VBScript.RegExp');
o.Global := True; o.Pattern := '<th scope="col">([^<]+)</th>';
if o.Test(s) then
begin
m := o.Execute(s);
r := '';
for i := 0 to m.Count - 1 do
r := r + m.Item[i].SubMatches.Item[0] + ' ';
Result := Trim(r);
Exit;
end; o.Pattern := '</tr><tr>$';
if o.Test(s) then
begin
Result := #13#10;
Exit;
end; o.Pattern := '<td>([^<]+)</td>';
if o.Test(s) then
begin
m := o.Execute(s);
r := '';
for i := 0 to m.Count - 1 do
r := r + m.Item[i].SubMatches.Item[0] + ' ';
Result := Trim(r);
Exit;
end; o.Pattern := '</tr>$';
if o.Test(s) then
begin
b := True;
Result := '';
Exit;
end;
end;var
b: Boolean;
f: TextFile;
s, sLine: String;
begin
AssignFile(f, 'a.htm');
try
Reset(f);
s := '';
b := False;
while not (b or Eof(f)) do
begin
ReadLn(f, sLine);
s := s + G(sLine, b);
end;
finally
CloseFile(f);
end; AssignFile(f, 'result.htm');
try
Rewrite(f);
Write(f, s);
finally
CloseFile(f);
end; ShowMessage('OK');
end;
问题如果解决的话要结帖哟~
那就先找<tr>,再找<td>,把之间的空格全删掉
再做处理