给出一个网址,然后就把代码下载带内存中,我再读取它的html(包括asp,php处理后的html)的代码,并寸到数据库中
不管用什么实现,asp,vb,只要能做到就行了
我觉得这个问题和ie差不多,ie也是把.html文件下载到内存,然后根据代码编译成我们所看到的页面,而这个问题仅需要保存htm文件入库就行了
望高手指导
不管用什么实现,asp,vb,只要能做到就行了
我觉得这个问题和ie差不多,ie也是把.html文件下载到内存,然后根据代码编译成我们所看到的页面,而这个问题仅需要保存htm文件入库就行了
望高手指导
例:
'名为“Text1”的 TextBox 控件保存了
'该方法的结果。Internet 传输
'控件的名称是“Inet1”。
Text1.Text = Inet1.OpenURL("http://www.microsoft.com")
private sub command1_click()
webbrowser1.navigate http://www.sohu.com/
timer1.enabled=true
end subprivate sub timer1_timer()
dim doc,objhtml as object
dim i as integer
dim strhtml as stringif not webbrowser1.busy then
set doc=webbrowser1.document
i=0
set objhtml=doc.body.createtextrange()
if not isnull(objhtml) then
text1.text=objhtml.htmltext
end if
timer1.enabled=false
end if
end sub
不过以下代码你需要改一下才行,因为是我自己做的一个软件中用过的,绝对能用
Option Explicit
Public Declare Function URLDownloadToFile Lib "urlmon" Alias _
"URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal _
szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As LongPublic Declare Function SetWindowPos Lib "user32.dll" (ByVal hwnd As Long, _
ByVal hWndInsertAfter As Long, ByVal x As Long, ByVal y As Long, ByVal cx As Long, _
ByVal cy As Long, ByVal wFlags As Long) As LongPublic Const SWP_NOMOVE = &H2 '不移动窗体
Public Const SWP_NOSIZE = &H1 '不改变窗体尺寸
Public Const Flag = &H20 'SWP_DRAWFRAME 'SWP_NOMOVE Or SWP_NOSIZE
Public Const HWND_TOPMOST = -1 '窗体总在最前面
Public Const HWND_NOTOPMOST = -2 '窗体不在最前面Public iCount As Integer
Public Function SaveHTML(ByVal source As String, _
ByVal destfile As String, ByVal addfile As String, ByVal bcut As Boolean) As Integer
Dim returnValue As Long
Dim fs As New FileSystemObject
Dim s As String
Dim f1 As TextStream
Dim f2 As TextStream
Dim f3 As TextStream
Dim b As Boolean
Dim b1 As Boolean
Dim ss As String
Dim bAdd As BooleanSaveHTML = 0
Err.Clear
On Error GoTo exitSUB
bAdd = Not (addfile = "")Form1.Caption = "开始读取网页...."
If bcut Then
Kill "D:\Inetpub\wwwroot\ms.asp"
returnValue = URLDownloadToFile(0, source, _
"D:\Inetpub\wwwroot\ms.asp", 0, 0)
Set f1 = fs.OpenTextFile("D:\Inetpub\wwwroot\ms.asp")
If bAdd Then Set f2 = fs.OpenTextFile(addfile)
Set f3 = fs.CreateTextFile(destfile, True)
s = ""
b = True
Do Until f1.AtEndOfStream
s = LCase(Trim(f1.ReadLine))
If s = "<head>" And b Then
b = False
f3.WriteLine "<head>"
End If
ss = Left(s, 5)
b1 = ss = "<form" Or ss = "</for" Or ss = "<body" Or ss = "</bod"
If s = "</head>" And Not b Then b = True
If b And Not b1 Then f3.WriteLine s
If bAdd Then
If Not b And Not f2.AtEndOfStream Then
Do Until f2.AtEndOfStream
f3.WriteLine f2.ReadLine
Loop
End If
End If
Loop
f1.Close
If bAdd Then f2.Close
f3.Close
Else
returnValue = URLDownloadToFile(0, source, _
destfile, 0, 0)
End If
exitSUB:
If Err.Number > 0 Then
Form1.Caption = "读取网页失败...将马上自动重试"
SaveHTML = -1
Else
Form1.Caption = "读取网页成功...等10秒后自动再读"
SaveHTML = 1
End If
Set f1 = Nothing
Set f2 = Nothing
Set f3 = Nothing
Set fs = Nothing
End Function