我现在的文件是htm的,里面的内容大致如下:
'//========================================================
<html>
<head>
<title>大家好</title>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
</head><body bgcolor="#009900" text="#000000">
<table width="1000" border="1" cellspacing="0" cellpadding="0">
<tr>
<td>大家好,同志辛苦了<a href='www.sohu.com'>你好</a></td>
</tr>
</table>
</body>
</html>//=======================================================================我想把多余的东西替换掉,替换完成后是这样的
<body bgcolor="#009900" text="#000000">
大家好,同志辛苦了<a href='www.sohu.com'>你好</a>
</body>table里的东西都不要,但连接和北京颜色保留,也就是说<body> </body> <a href=''>sdfsd</a>
这样的东西和内容要保留,大家帮我搞一下吧,分不够可以加!
'//========================================================
<html>
<head>
<title>大家好</title>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
</head><body bgcolor="#009900" text="#000000">
<table width="1000" border="1" cellspacing="0" cellpadding="0">
<tr>
<td>大家好,同志辛苦了<a href='www.sohu.com'>你好</a></td>
</tr>
</table>
</body>
</html>//=======================================================================我想把多余的东西替换掉,替换完成后是这样的
<body bgcolor="#009900" text="#000000">
大家好,同志辛苦了<a href='www.sohu.com'>你好</a>
</body>table里的东西都不要,但连接和北京颜色保留,也就是说<body> </body> <a href=''>sdfsd</a>
这样的东西和内容要保留,大家帮我搞一下吧,分不够可以加!
解决方案 »
- XP系统下打开VB开发的软件界面里所有中文文字不显示
- 能不能截取扫描仪扫描到的图片
- 【问】如何让SHBrowseForFolder的>>>根目录<<<为"C:\"?
- ***vb中有什么好的方法实现打印功能?请大虾给点建议***
- 怎样把A表里的100条记录都放到B表里去?
- 紧急求助:datagrid中修改的数据怎么保存到数据库中(再现等待)
- 大家帮忙
- ADO的问题,寻求解答?
- 初学者问题:如何用textbox打开一个文本文件?(明日给分)
- Internet上组建开发团队,欢迎各路高手加入。
- 请问一下,有哪个表格控件可以实现数据输入功能?
- 如何在listview中加入动态加入combo实现listview编辑功能?
用法示例:
dim tmp as string
tmp=GetHrefs("http://www.163.com","<a href=","</a>") 后面几个参数分别是:
PrevResults:上一次检查的结果,用于输出好几个页面中不同的结果。
AllowReduplicate:是否允许重复的结果出现。
Keywords:只搜索相关关键字的结果Private Function GetHrefs(ByVal strURL As String, _
ByVal StartMark As String, _
ByVal EndMark As String, _
Optional ByVal PrevResults As String, _
Optional ByVal AllowReduplicate As Boolean, _
Optional ByVal Keywords As String) as String
On Error GoTo ErrHandle:
Dim strTmp As String
Dim strHTML
Dim Results As Variant
Dim tmpResult As String
strHTML = Inet1.OpenURL(strURL)
Dim CurrentURL As String
CurrentURL = strURL
Dim pos0 As Long
Dim pos1 As Long
Dim posS As Long
Dim posE As Long
posS = 1
posE = 1
If PrevResults <> "" Then
tmpResult = PrevResults
End If
Do While InStr(posS, strHTML, StartMark, vbTextCompare) > 0
If InStr(Inet1.GetHeader, "HTTP/1.1 404") > 0 Then
Exit Do
ElseIf InStr(Inet1.GetHeader, "HTTP/1.1 401") > 0 Then
Exit Do
ElseIf InStr(Inet1.GetHeader, "HTTP/1.1 403") > 0 Then
Exit Do
End If
pos0 = InStr(posS, strHTML, StartMark, vbTextCompare)
posE = pos0 + 1
pos1 = InStr(posE, strHTML, EndMark, vbTextCompare)
If pos0 > 0 Then
pos0 = pos0 + Len(StartMark)
strTmp = Mid(strHTML, pos0, pos1 - pos0)
End If
'URL的后期处理
If InStr(strTmp, Chr(32)) > 0 Then
strTmp = Left(strTmp, InStr(strTmp, Chr(32)))
End If
strTmp = Replace(strTmp, Chr(34), "")
strTmp = Replace(strTmp, "'", "")
strTmp = Replace(strTmp, "#", "")
strTmp = Trim(strTmp)
Dim strProtocol As String
strProtocol = GetUrlParts(strTmp, 1) If strProtocol = "" Then
strTmp = CombineURL(GetFullPath(CurrentURL), strTmp)
End If
'如果不是HTTP协议的链接
If (GetUrlParts(strTmp, 1) <> "http") And (chkNoScan.Value = Unchecked) Then
'nothing
ElseIf (InStr(strTmp, Keywords) <= 0) And (chkNoScan.Value = Unchecked) Then
'nothing
ElseIf (InStr(strTmp, "?") <= 0) And (chkNoScan.Value = Unchecked) Then '过滤不带参数的URL
'nothing
ElseIf InStr(strTmp, Chr(13)) > 0 Then '过滤带回车的URL
'nothing
Else
'查看是否有图片
'If InStr("gif,jpg,bmp,png", LCase(Right(strTmp, 3))) <= 0 Then
If InStr("gif,jpg,bmp,png", LCase(Right(strTmp, 3))) <= 0 Then
If AllowReduplicate = True Then
tmpResult = tmpResult & strTmp & vbCrLf
Else
'查看以前的URL中是否有重复记录
Dim strTmp1 As String
strTmp1 = Replace(strTmp, GetUrlParts(strTmp, 6), "")
If InStr(1, LCase(tmpResult), LCase(strTmp1), vbTextCompare) <= 0 Then
tmpResult = tmpResult & strTmp & vbCrLf
End If
End If
End If
End If
posS = pos0
posE = pos1 + Len(EndMark)
Loop
GetHrefs = tmpResult
Exit Function
ErrHandle: GetHrefs = ""
End Function