Option ExplicitPrivate Sub Form_Load() WebBrowser1.Navigate "http://www.english-schools.org/" End SubPrivate Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant) Dim obj As Object
If URL <> "http://www.english-schools.org/" Then Exit Sub For Each obj In WebBrowser1.Document.All.tags("Option") Debug.Print obj.Value Next End Sub
Dim vTag As Object Dim i As Integer Set vTag = WebBrowser1.Document.links For i = 0 To vTag.length - 1 If InStr(vTag(i).href, "schools.org") > 0 Then Me.Print vTag(i).href End If Next
非通用代码Sub GetOptionLinks() Dim URL As String Dim PageHtml As String Dim X Dim s As String Dim i As Integer, j As Integer On Error Resume Next URL = "http://www.english-schools.org/" Set myXmlHttp = CreateObject("msxml2.XMLHTTP") With myXmlHttp .Open "GET", URL, False .send PageHtml = .responsetext X = Split(PageHtml, "option") j = 1 For i = 6 To UBound(X) Step 2 s = X(i) mypos = InStr(s, ">") Debug.Print j & ")URL='" & Mid(s, 9, mypos - 10) & "';Country=" & Mid(s, mypos + 1, Len(s) - mypos - 2) If Err.Number = 0 Then j = j + 1 Else Err.Clear End If Next End WithSet myXmlHttp = Nothing End Sub
用正则表达式写一个Sub GetOptionLinksByRegex() Dim URL As String Dim PageHtml As String Dim X Dim j As Integer Dim objRegex As Object Dim c As Object On Error Resume Next URL = "http://www.english-schools.org/" Set myXmlHttp = CreateObject("msxml2.XMLHTTP") With myXmlHttp .Open "GET", URL, False .send PageHtml = .responsetext j = 1 Set objRegex = CreateObject("VBScript.RegExp") objRegex.ignorecase = True objRegex.Global = True objRegex.Pattern = "[\r\n]\s*" '不能直接设置multiline,否则会丢失几个信息 PageHtml = objRegex.Replace(PageHtml, "") '只能手工替换
objRegex.Pattern = "option value\=""(.+?)""\>(.+?)\<\/option" Set X = objRegex.Execute(PageHtml) For Each c In X Debug.Print j & ")URL=" & c.submatches(0) & ";Country=" & c.submatches(1) j = j + 1 Next End WithSet myXmlHttp = Nothing End Sub
WebBrowser1.Navigate "http://www.english-schools.org/"
End SubPrivate Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
Dim obj As Object
If URL <> "http://www.english-schools.org/" Then Exit Sub
For Each obj In WebBrowser1.Document.All.tags("Option")
Debug.Print obj.Value
Next
End Sub
Dim i As Integer
Set vTag = WebBrowser1.Document.links
For i = 0 To vTag.length - 1
If InStr(vTag(i).href, "schools.org") > 0 Then
Me.Print vTag(i).href
End If
Next
拉脱维亚的:http://www.english-schools.org/latvia/index.htm
芬兰的:http://www.english-schools.org/finland/index.htm
科威特:http://www.english-schools.org/kuwait/index.htm
该下拉框中都没有,但确实存在上面网页
将http://www.english-schools.org页面另存为HTML文件。用PilotEdit打开
2. 点排序按钮,选择“比较由正则表达式定义的字符串”,输入下面的正则表达式和目标字符串:
正则表达式:<option value="http://*english-schools.org*">[]*</option>
目标字符串:http://%02%03%043. 点“将目标字符串拷贝到剪贴板”,即可将你要的数据拷贝到剪贴板(共48个学校):
http://www.english-schools.org/austria/index.htm
http://belgium.english-schools.org
http://english-schools.org/cyprus/
http://www.english-schools.org/czech/index.htm
http://www.english-schools.org/den/
http://france.english-schools.org
http://germany.english-schools.org/
http://www.english-schools.org/greece/index.htm
http://www.english-schools.org/hungary/
http://italy.english-schools.org/
http://www.english-schools.org/luxembourg/international-school-of-luxembourg.htm
http://netherlands.english-schools.org/
http://www.english-schools.org/norway/index.htm
http://www.english-schools.org/poland
http://portugal.english-schools.org/
http://english-schools.org/romania/
http://spain.english-schools.org/
http://switzerland.english-schools.org/
http://www.english-schools.org/britain/
http://english-schools.org/argentina/
http://www.english-schools.org/australia
http://www.english-schools.org/azerbaijan
http://www.english-schools.org/brazil
http://english-schools.org/china/
http://english-schools.org/egypt/
http://www.english-schools.org/ghana/ghana-international-school.htm
http://www.english-schools.org/hong-kong/
http://india.english-schools.org/
http://www.english-schools.org/indonesia/
http://www.english-schools.org/jakarta/
http://japan.english-schools.org/
http://english-schools.org/kenya/
http://www.english-schools.org/korea
http://www.english-schools.org/kuwait
http://english-schools.org/malaysia/
http://english-schools.org/mexico/
http://english-schools.org/moscow/
http://www.english-schools.org/saudi-arabia
http://singapore.english-schools.org/
http://www.english-schools.org/tanzania
http://www.english-schools.org/thailand/
http://www.english-schools.org/uganda
http://www.english-schools.org/UAE
http://www.english-schools.org/vietnam
http://www.english-schools.org/zambia
http://www.english-schools.org/zimbabwe
http://www.english-schools.org/philippines
http://www.english-schools.org/kazakhstan
2)URL='http://belgium.english-schools.org';Country=Belgium
3)URL='http://english-schools.org/cyprus/';Country=Cyprus
4)URL='http://www.english-schools.org/czech/index.htm';Country=Czech
Republic
5)URL='http://www.english-schools.org/den/';Country=Den
6)URL='http://france.english-schools.org';Country=France
7)URL='http://germany.english-schools.org/';Country=Germany
8)URL='http://www.english-schools.org/greece/index.htm';Country=Greece
9)URL='http://www.english-schools.org/hungary/';Country=Hungary
10)URL='http://italy.english-schools.org/';Country=Italy
11)URL='http://www.english-schools.org/luxembourg/international-school-of-luxembourg.htm';Country=Luxembourg
12)URL='http://netherlands.english-schools.org/';Country=Netherlands
13)URL='http://www.english-schools.org/norway/index.htm';Country=Norway
14)URL='http://www.english-schools.org/poland';Country=Poland
15)URL='http://portugal.english-schools.org/';Country=Portugal
16)URL='http://english-schools.org/romania/';Country=Romania
17)URL='http://spain.english-schools.org/';Country=Spain
18)URL='http://www.sweden-schools.com/';Country=Sweden
19)URL='http://switzerland.english-schools.org/';Country=Switzerland
20)URL='http://www.english-schools.org/britain/';Country=United Kingdom
21)URL='http://english-schools.org/argentina/';Country=Argentina
22)URL='http://www.english-schools.org/australia';Country=Australia
23)URL='http://www.english-schools.org/azerbaijan';Country=Azerbaijan
24)URL='http://www.english-schools.org/brazil';Country=Brazil
25)URL='http://english-schools.org/china/';Country=China
26)URL='http://english-schools.org/egypt/';Country=Egypt
27)URL='http://www.english-schools.org/ghana/ghana-international-school.htm';Country=Ghana
28)URL='http://www.english-schools.org/hong-kong/';Country=Hong Kong
29)URL='http://india.english-schools.org/';Country=India
30)URL='http://www.english-schools.org/indonesia/';Country=Indonesia
31)URL='http://www.english-schools.org/jakarta/';Country=Jakarta
32)URL='http://japan.english-schools.org/';Country=Japan
33)URL='http://english-schools.org/kenya/';Country=Kenya
34)URL='http://www.english-schools.org/korea';Country=Korea
35)URL='http://www.english-schools.org/kuwait';Country=Kuwait
36)URL='http://english-schools.org/malaysia/';Country=Malaysia
37)URL='http://english-schools.org/mexico/';Country=Mexico
38)URL='http://english-schools.org/moscow/';Country=Moscow
39)URL='http://www.english-schools.org/saudi-arabia';Country=Saudi
Arabia
40)URL='http://singapore.english-schools.org/';Country=Singapore
41)URL='http://www.english-schools.org/tanzania';Country=Tanzania
42)URL='http://www.english-schools.org/thailand/';Country=Thailand
43)URL='http://www.english-schools.org/uganda';Country=Uganda
44)URL='http://www.english-schools.org/UAE';Country=United Arab Emirates
45)URL='http://www.english-schools.org/vietnam';Country=Vietnam
46)URL='http://www.english-schools.org/zambia';Country=Zambia
47)URL='http://www.english-schools.org/zimbabwe';Country=Zimbabwe
48)URL='http://www.english-schools.org/philippines';Country=Philippines
49)URL='http://www.english-schools.org/kazakhstan';Country=Kazakhstan
Dim URL As String
Dim PageHtml As String
Dim X
Dim s As String
Dim i As Integer, j As Integer
On Error Resume Next
URL = "http://www.english-schools.org/"
Set myXmlHttp = CreateObject("msxml2.XMLHTTP")
With myXmlHttp
.Open "GET", URL, False
.send
PageHtml = .responsetext
X = Split(PageHtml, "option")
j = 1
For i = 6 To UBound(X) Step 2
s = X(i)
mypos = InStr(s, ">")
Debug.Print j & ")URL='" & Mid(s, 9, mypos - 10) & "';Country=" & Mid(s, mypos + 1, Len(s) - mypos - 2)
If Err.Number = 0 Then
j = j + 1
Else
Err.Clear
End If
Next
End WithSet myXmlHttp = Nothing
End Sub
Dim URL As String
Dim PageHtml As String
Dim X
Dim j As Integer
Dim objRegex As Object
Dim c As Object
On Error Resume Next
URL = "http://www.english-schools.org/"
Set myXmlHttp = CreateObject("msxml2.XMLHTTP")
With myXmlHttp
.Open "GET", URL, False
.send
PageHtml = .responsetext
j = 1
Set objRegex = CreateObject("VBScript.RegExp")
objRegex.ignorecase = True
objRegex.Global = True
objRegex.Pattern = "[\r\n]\s*" '不能直接设置multiline,否则会丢失几个信息
PageHtml = objRegex.Replace(PageHtml, "") '只能手工替换
objRegex.Pattern = "option value\=""(.+?)""\>(.+?)\<\/option"
Set X = objRegex.Execute(PageHtml)
For Each c In X
Debug.Print j & ")URL=" & c.submatches(0) & ";Country=" & c.submatches(1)
j = j + 1
Next
End WithSet myXmlHttp = Nothing
End Sub
发在我的博客里面。
http://blog.csdn.net/lqnoway/archive/2009/08/11/4432530.aspx
是用google的方法去找,可惜太多了有5000多条,就不复制上来了。(而且有很多重复的以及不是国家的,还不知道如何挑选)