我想把外汇牌价干干净净提取出来,用了一次 reg.Pattern = "<.*?>"
strData = reg.Replace(strData, "")
后,还是会剩下一些html代码,我想如法炮制,再用第二次以过滤,但是发现过滤不掉,为什么呢?
比如我想过滤掉"中国银行全球门户网站"和"往日外汇牌价搜索"之间的内容,我是只要再用一次这个就可以了吧?:
reg.Pattern = "中国银行全球门户网站.*?往日外汇牌价搜索"
strData = reg.Replace(strData, "")
--------------------------------
我直接贴.frm文件的全部代码,免得大家画控件了,高人帮忙看下,谢谢!:VERSION 5.00
Begin VB.Form Form1
Caption = "Form1"
ClientHeight = 3030
ClientLeft = 120
ClientTop = 450
ClientWidth = 4560
LinkTopic = "Form1"
ScaleHeight = 10950
ScaleWidth = 20250
StartUpPosition = 3 '窗口缺省
WindowState = 2 'Maximized
Begin VB.TextBox Text3
Height = 7695
Left = 840
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 3
Text = "Form1.frx":0000
Top = 840
Width = 5415
End
Begin VB.CommandButton Command1
Caption = "Command1"
Height = 495
Left = 3120
TabIndex = 2
Top = 240
Width = 975
End
Begin VB.TextBox Text2
Height = 6615
Left = 7200
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 1
Text = "Form1.frx":0006
Top = 360
Width = 6375
End
Begin VB.TextBox Text1
Height = 7095
Left = 14280
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 0
Text = "Form1.frx":000C
Top = 120
Width = 2775
End
End
Attribute VB_Name = "Form1"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Public Function GetCode(CodeBase, url) ' 第1个参数CodeBase设置网页编码方式(GB2312或UTF-8),第2个参数Url设置网页地址
Dim xmlHTTP1
Dim ObjXML
Set xmlHTTP1 = CreateObject("Microsoft.XMLHTTP")
xmlHTTP1.open "get", url, True
xmlHTTP1.send
While xmlHTTP1.ReadyState <> 4
DoEvents
Wend
GetCode = xmlHTTP1.ResponseBody
If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, CodeBase)
Set ObjXML = Nothing
End Function
Public Function BytesToBstr(strBody, CodeBase)
Dim ObjStream
Set ObjStream = CreateObject("Adodb.Stream")
With ObjStream
.Type = 1
.Mode = 3
.open
.write strBody
.Position = 0
.Type = 2
.Charset = CodeBase
BytesToBstr = .ReadText
.Close
End With
Set ObjStream = Nothing
End FunctionPrivate Sub Command1_Click() strData = Text1
Set reg = CreateObject("vbscript.regExp")
reg.Global = True
reg.IgnoreCase = True
reg.MultiLine = True
reg.Pattern = "<.*?>"
strData = reg.Replace(strData, "")
Text2.Text = strData
reg.Pattern = "中国银行全球门户网站.*?往日外汇牌价搜索"
strData = reg.Replace(strData, "")
Text3.Text = strData '就是这里,为什么不行呢?text3的出来是空白
' Set regEx = CreateObject("VBScript.RegExp")
' regEx.IgnoreCase = True
' regEx.Global = True
' regEx.Pattern = "往日外汇牌价搜索(.*?)往日外汇牌价搜索"
' Set Matches = regEx.Execute(Text2.Text)
' For Each Match In Matches
' Text3.Text = Text3.Text & Match.SubMatches(0) & vbCrLf'这里本来牌价信息是在两个"往日外汇牌价搜索"之间的,但是也提取失败了,搞不懂
' Next
End Sub
Private Sub Form_Load()Text1 = GetCode("UTF-8", "http://www.boc.cn/sourcedb/whpj/index.html")End Sub
strData = reg.Replace(strData, "")
后,还是会剩下一些html代码,我想如法炮制,再用第二次以过滤,但是发现过滤不掉,为什么呢?
比如我想过滤掉"中国银行全球门户网站"和"往日外汇牌价搜索"之间的内容,我是只要再用一次这个就可以了吧?:
reg.Pattern = "中国银行全球门户网站.*?往日外汇牌价搜索"
strData = reg.Replace(strData, "")
--------------------------------
我直接贴.frm文件的全部代码,免得大家画控件了,高人帮忙看下,谢谢!:VERSION 5.00
Begin VB.Form Form1
Caption = "Form1"
ClientHeight = 3030
ClientLeft = 120
ClientTop = 450
ClientWidth = 4560
LinkTopic = "Form1"
ScaleHeight = 10950
ScaleWidth = 20250
StartUpPosition = 3 '窗口缺省
WindowState = 2 'Maximized
Begin VB.TextBox Text3
Height = 7695
Left = 840
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 3
Text = "Form1.frx":0000
Top = 840
Width = 5415
End
Begin VB.CommandButton Command1
Caption = "Command1"
Height = 495
Left = 3120
TabIndex = 2
Top = 240
Width = 975
End
Begin VB.TextBox Text2
Height = 6615
Left = 7200
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 1
Text = "Form1.frx":0006
Top = 360
Width = 6375
End
Begin VB.TextBox Text1
Height = 7095
Left = 14280
MultiLine = -1 'True
ScrollBars = 2 'Vertical
TabIndex = 0
Text = "Form1.frx":000C
Top = 120
Width = 2775
End
End
Attribute VB_Name = "Form1"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Public Function GetCode(CodeBase, url) ' 第1个参数CodeBase设置网页编码方式(GB2312或UTF-8),第2个参数Url设置网页地址
Dim xmlHTTP1
Dim ObjXML
Set xmlHTTP1 = CreateObject("Microsoft.XMLHTTP")
xmlHTTP1.open "get", url, True
xmlHTTP1.send
While xmlHTTP1.ReadyState <> 4
DoEvents
Wend
GetCode = xmlHTTP1.ResponseBody
If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, CodeBase)
Set ObjXML = Nothing
End Function
Public Function BytesToBstr(strBody, CodeBase)
Dim ObjStream
Set ObjStream = CreateObject("Adodb.Stream")
With ObjStream
.Type = 1
.Mode = 3
.open
.write strBody
.Position = 0
.Type = 2
.Charset = CodeBase
BytesToBstr = .ReadText
.Close
End With
Set ObjStream = Nothing
End FunctionPrivate Sub Command1_Click() strData = Text1
Set reg = CreateObject("vbscript.regExp")
reg.Global = True
reg.IgnoreCase = True
reg.MultiLine = True
reg.Pattern = "<.*?>"
strData = reg.Replace(strData, "")
Text2.Text = strData
reg.Pattern = "中国银行全球门户网站.*?往日外汇牌价搜索"
strData = reg.Replace(strData, "")
Text3.Text = strData '就是这里,为什么不行呢?text3的出来是空白
' Set regEx = CreateObject("VBScript.RegExp")
' regEx.IgnoreCase = True
' regEx.Global = True
' regEx.Pattern = "往日外汇牌价搜索(.*?)往日外汇牌价搜索"
' Set Matches = regEx.Execute(Text2.Text)
' For Each Match In Matches
' Text3.Text = Text3.Text & Match.SubMatches(0) & vbCrLf'这里本来牌价信息是在两个"往日外汇牌价搜索"之间的,但是也提取失败了,搞不懂
' Next
End Sub
Private Sub Form_Load()Text1 = GetCode("UTF-8", "http://www.boc.cn/sourcedb/whpj/index.html")End Sub
reg.Pattern = "中国银行全球门户网站[\s\S]*?往日外汇牌价搜索"