如何抓取谷歌,百度里面特定的搜索结果! 如题,在自己的网页里面写一个搜索引擎,输入搜索条件搜索后,在自己的页面里面显示抓取谷歌和百度里面的特定的搜索结果,例如搜索结果是 百度里面的(第一,第五,第八条)+谷歌里面的(第一条,第四条)的搜索结果组合!希望大家踊跃发言!表达自己的观点和想法,大家互相探讨和学习! 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 protected void Button1_Click(object sender, EventArgs e) { Uri u = new Uri("http://www.google.cn/search?q="+TextBox1.Text+"&hl=zh-CN&ie=GB2312&lr=&nxpt=10.2068337957705091425725"); HttpWebRequest dd = (HttpWebRequest)WebRequest.Create(u); dd.Method = "GET"; WebResponse req = dd.GetResponse(); Stream str = req.GetResponseStream(); StreamReader readerOfStream = new StreamReader(str, System.Text.Encoding.GetEncoding("GB2312")); string aa = readerOfStream.ReadToEnd(); string abc = ""; char aa1=(char)13; char aa2=(char)10; string Str = aa.Replace(aa1.ToString(), "").Replace(aa2.ToString(), ""); string StartStr = "<div><div class=g>[变量]"; StartStr = StartStr.Replace("[变量]", ".*"); string LastStr = "</div>"; string SearchStr = StartStr + ".*" + LastStr; Regex re = new Regex(SearchStr, RegexOptions.IgnoreCase); Match m = re.Match(aa); Response.Write(m.Value); str.Close(); }效果图http://blog.csdn.net/lem12/archive/2008/01/15/2044655.aspx Uri u = new Uri("http://www.google.cn/search?q=" + TextBox1.Text + "&hl=zh-CN&ie=gbk&lr=&nxpt=10.2068337957705091425725"); HttpWebRequest dd = (HttpWebRequest)WebRequest.Create(u); dd.Method = "GET"; WebResponse req = dd.GetResponse(); Stream str = req.GetResponseStream(); StreamReader readerOfStream = new StreamReader(str, System.Text.Encoding.GetEncoding("GB2312")); string aa = readerOfStream.ReadToEnd(); string abc = ""; char aa1 = (char)13; char aa2 = (char)10; string Str = aa.Replace(aa1.ToString(), "").Replace(aa2.ToString(), ""); string StartStr = "<div><div class=g>[变量]"; StartStr = StartStr.Replace("[变量]", ".*"); string LastStr = "</div>"; string SearchStr = StartStr + ".*" + LastStr; Regex re = new Regex(SearchStr, RegexOptions.IgnoreCase); Match m = re.Match(aa); Response.Write(m.Value); str.Close(); google貌似有WEB服务.直接调用.百度的好像没有,只能用正则来弄。而且相当不方便 基于某位高人的写法,非本人版权//------------------------------------------------------------------------------// <autogenerated>// This code was generated by a tool.// Runtime Version: 1.0.3705.209//// Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated.// </autogenerated>//------------------------------------------------------------------------------// // This source code was auto-generated by wsdl, Version=1.0.3705.209.// Google generated this file via the command line "wsdl.exe GoogleSearch.wsdl"// using System.Diagnostics;using System.Xml.Serialization;using System;using System.Web.Services.Protocols;using System.ComponentModel;using System.Web.Services;/// <res/>[System.Diagnostics.DebuggerStepThroughAttribute()][System.ComponentModel.DesignerCategoryAttribute("code")][System.Web.Services.WebServiceBindingAttribute(Name = "GoogleSearchBinding", Namespace = "urn:GoogleSearch")][System.Xml.Serialization.SoapIncludeAttribute(typeof(ResultElement))]public class GoogleSearchService : System.Web.Services.Protocols.SoapHttpClientProtocol{ /// <res/> public GoogleSearchService() { this.Url = "http://api.google.com/search/beta2"; } /// <res/> [System.Web.Services.Protocols.SoapRpcMethodAttribute("urn:GoogleSearchAction", RequestNamespace = "urn:GoogleSearch", ResponseNamespace = "urn:GoogleSearch")] [return: System.Xml.Serialization.SoapElementAttribute("return", DataType = "base64Binary")] public System.Byte[] doGetCachedPage(string key, string url) { object[] results = this.Invoke("doGetCachedPage", new object[] { key, url}); return ((System.Byte[])(results[0])); } /// <res/> public System.IAsyncResult BegindoGetCachedPage(string key, string url, System.AsyncCallback callback, object asyncState) { return this.BeginInvoke("doGetCachedPage", new object[] { key, url}, callback, asyncState); } /// <res/> public System.Byte[] EnddoGetCachedPage(System.IAsyncResult asyncResult) { object[] results = this.EndInvoke(asyncResult); return ((System.Byte[])(results[0])); } /// <res/> [System.Web.Services.Protocols.SoapRpcMethodAttribute("urn:GoogleSearchAction", RequestNamespace = "urn:GoogleSearch", ResponseNamespace = "urn:GoogleSearch")] [return: System.Xml.Serialization.SoapElementAttribute("return")] public string doSpellingSuggestion(string key, string phrase) { object[] results = this.Invoke("doSpellingSuggestion", new object[] { key, phrase}); return ((string)(results[0])); } /// <res/> public System.IAsyncResult BegindoSpellingSuggestion(string key, string phrase, System.AsyncCallback callback, object asyncState) { return this.BeginInvoke("doSpellingSuggestion", new object[] { key, phrase}, callback, asyncState); } /// <res/> public string EnddoSpellingSuggestion(System.IAsyncResult asyncResult) { object[] results = this.EndInvoke(asyncResult); return ((string)(results[0])); } /// <res/> [System.Web.Services.Protocols.SoapRpcMethodAttribute("urn:GoogleSearchAction", RequestNamespace = "urn:GoogleSearch", ResponseNamespace = "urn:GoogleSearch")] [return: System.Xml.Serialization.SoapElementAttribute("return")] public GoogleSearchResult doGoogleSearch(string key, string q, int start, int maxResults, bool filter, string restrict, bool safeSearch, string lr, string ie, string oe) { object[] results = this.Invoke("doGoogleSearch", new object[] { key, q, start, maxResults, filter, restrict, safeSearch, lr, ie, oe}); return ((GoogleSearchResult)(results[0])); } /// <res/> public System.IAsyncResult BegindoGoogleSearch(string key, string q, int start, int maxResults, bool filter, string restrict, bool safeSearch, string lr, string ie, string oe, System.AsyncCallback callback, object asyncState) { return this.BeginInvoke("doGoogleSearch", new object[] { key, q, start, maxResults, filter, restrict, safeSearch, lr, ie, oe}, callback, asyncState); } /// <res/> public GoogleSearchResult EnddoGoogleSearch(System.IAsyncResult asyncResult) { object[] results = this.EndInvoke(asyncResult); return ((GoogleSearchResult)(results[0])); }}/// <res/>[System.Xml.Serialization.SoapTypeAttribute("GoogleSearchResult", "urn:GoogleSearch")]public class GoogleSearchResult{ /// <res/> public bool documentFiltering; /// <res/> public string searchComments; /// <res/> public int estimatedTotalResultsCount; /// <res/> public bool estimateIsExact; /// <res/> public ResultElement[] resultElements; /// <res/> public string searchQuery; /// <res/> public int startIndex; /// <res/> public int endIndex; /// <res/> public string searchTips; /// <res/> public DirectoryCategory[] directoryCategories; /// <res/> public System.Double searchTime;}/// <res/>[System.Xml.Serialization.SoapTypeAttribute("ResultElement", "urn:GoogleSearch")]public class ResultElement{ /// <res/> public string summary; /// <res/> public string URL; /// <res/> public string snippet; /// <res/> public string title; /// <res/> public string cachedSize; /// <res/> public bool relatedInformationPresent; /// <res/> public string hostName; /// <res/> public DirectoryCategory directoryCategory; /// <res/> public string directoryTitle;}/// <res/>[System.Xml.Serialization.SoapTypeAttribute("DirectoryCategory", "urn:GoogleSearch")]public class DirectoryCategory{ /// <res/> public string fullViewableName; /// <res/> public string specialEncoding;} 调用方法GoogleSearchService s = new GoogleSearchService(); GoogleSearchResult r; // call search function // r = s.doGoogleSearch( "", textSearch.Text, 0, 10, false, "", false, "", "", ""); // create HTML document to show result // string strFile = "result.html"; StreamWriter sw = File.CreateText(strFile); // Header inforamtion // sw.WriteLine("<HTML><HEAD><style> BODY { font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; font-size : 9pt; color : #000000; SCROLLBAR-FACE-COLOR: white; SCROLLBAR-HIGHLIGHT-COLOR: #003366; SCROLLBAR-SHADOW-COLOR: #003366; SCROLLBAR-3DLIGHT-COLOR: #f9f9f9; SCROLLBAR-ARROW-COLOR: #003366; SCROLLBAR-TRACK-COLOR: white; SCROLLBAR-DARKSHADOW-COLOR: #f9f9f9 }</style></HEAD><BODY>"); // Category // foreach(DirectoryCategory dc in r.directoryCategories) { sw.Write("<b>Category</b> : "); sw.WriteLine(dc.fullViewableName); sw.WriteLine("<br><br><br>"); } // iterate items // foreach(ResultElement re in r.resultElements) { // Title // string strTitle = "<a href=\"" + re.URL + "\">" + re.title + "</a><br>"; sw.WriteLine(strTitle); // snippet // string strSnippet = re.snippet +"<br>"; sw.WriteLine(strSnippet); // link and cache size // string strLink = "<a href=\"" + re.URL + "\">" + re.URL + "</a> - " + re.cachedSize + "<br><br>"; sw.WriteLine(strLink); // 2 line // sw.WriteLine("<br><br>"); } // file close // sw.Close(); // result inforamtion // labelSearchText.Text = textSearch.Text + " 's web search"; int estResults = r.estimatedTotalResultsCount; double ldTime = r.searchTime; labelSearchResult.Text = "Total " + Convert.ToString(estResults) + " " + "1 - 10 seach result Total time:" + Convert.ToString(ldTime);该项目网上有下载。 相关技术文档http://code.google.com/intl/zh-CN/apis/codesearch/docs/2.0/developers_guide.html 很简单。编码后用GET方式提交搜索结果,百度会返回一个页面,既搜索结果。<a>标记,id=awN,N为当前条的下标。这是百度,其他搜索引擎类似。 很简单。 编码后用GET方式提交搜索地址,百度会返回一个页面,既搜索结果。 <a>标记,id=awN,N为当前条的下标。 这是百度,其他搜索引擎类似。 用WebBrowser控件可以遍历标记,带上翻页慢慢爬,可以爬出一大堆。 http://woso100.com 就已经做了。可以试试。 能借用参考参考吗?感谢了!邮箱[email protected] 楼主我偷懒一下,因为我不是做这个方向的——我是自己完全实现搜索引擎的那种,你这种可以去参考元搜索-METASearch,会对你有帮助的,就是组合最准确的几个答案嘛对吧 呵呵 是的。GOOGLE是有WEB服务调用就行了。 http://www.soulema.com这个是我去年业余时间做的,而且已经做到提供数据调用了,下面是一个例子http://www.soulema.com/s.aspx?type=xml&search=sql2005你也可以用这个功能来获得数据http://www.soulemei.com 是利用xml的一个例子另外,还有个纯静态的html,这个页面你可以直接拿过去用http://www.soulema.com/s.html 在delphi用WEBBROWSER控件如何抓取网页的网页地址。 感觉用正则不是最佳的方案,期待更好的方案,ing...................................... 样式问题 FCK 问题 比较急 在线等!repeater 筛选数据 显示 绑定repater空件的问题 ASP.NET 2.0 中login 控件 怎样设置才能在新窗口打开登陆地址? Session超时后重新登陆,怎么新的框架网页整个出现在原来框架的右边? 在asp.net网页中如何加入视频文件?? 在window 窗体控件中,如何控制aspx页面中的元素。 报表数据输出行单元格合并(ROWSPAN) 请高手介绍一本学习ASP.NET的好书!! 用Request.Form的方式怎么得到图片控件的ImageUrl值 access中用OleDbParameter后数据不能及时更新???
protected void Button1_Click(object sender, EventArgs e)
{
Uri u = new Uri("http://www.google.cn/search?q="+TextBox1.Text+"&hl=zh-CN&ie=GB2312&lr=&nxpt=10.2068337957705091425725");
HttpWebRequest dd = (HttpWebRequest)WebRequest.Create(u); dd.Method = "GET";
WebResponse req = dd.GetResponse();
Stream str = req.GetResponseStream();
StreamReader readerOfStream = new StreamReader(str, System.Text.Encoding.GetEncoding("GB2312"));
string aa = readerOfStream.ReadToEnd();
string abc = "";
char aa1=(char)13;
char aa2=(char)10;
string Str = aa.Replace(aa1.ToString(), "").Replace(aa2.ToString(), "");
string StartStr = "<div><div class=g>[变量]";
StartStr = StartStr.Replace("[变量]", ".*");
string LastStr = "</div>";
string SearchStr = StartStr + ".*" + LastStr;
Regex re = new Regex(SearchStr, RegexOptions.IgnoreCase);
Match m = re.Match(aa); Response.Write(m.Value);
str.Close();
}
效果图
http://blog.csdn.net/lem12/archive/2008/01/15/2044655.aspx
Uri u = new Uri("http://www.google.cn/search?q=" + TextBox1.Text + "&hl=zh-CN&ie=gbk&lr=&nxpt=10.2068337957705091425725");
HttpWebRequest dd = (HttpWebRequest)WebRequest.Create(u);
dd.Method = "GET";
WebResponse req = dd.GetResponse();
Stream str = req.GetResponseStream();
StreamReader readerOfStream = new StreamReader(str, System.Text.Encoding.GetEncoding("GB2312"));
string aa = readerOfStream.ReadToEnd();
string abc = "";
char aa1 = (char)13;
char aa2 = (char)10;
string Str = aa.Replace(aa1.ToString(), "").Replace(aa2.ToString(), "");
string StartStr = "<div><div class=g>[变量]";
StartStr = StartStr.Replace("[变量]", ".*");
string LastStr = "</div>"; string SearchStr = StartStr + ".*" + LastStr;
Regex re = new Regex(SearchStr, RegexOptions.IgnoreCase);
Match m = re.Match(aa); Response.Write(m.Value);
str.Close();
百度的好像没有,只能用正则来弄。而且相当不方便
// <autogenerated>
// This code was generated by a tool.
// Runtime Version: 1.0.3705.209
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </autogenerated>
//------------------------------------------------------------------------------//
// This source code was auto-generated by wsdl, Version=1.0.3705.209.
// Google generated this file via the command line "wsdl.exe GoogleSearch.wsdl"
//
using System.Diagnostics;
using System.Xml.Serialization;
using System;
using System.Web.Services.Protocols;
using System.ComponentModel;
using System.Web.Services;
/// <res/>
[System.Diagnostics.DebuggerStepThroughAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
[System.Web.Services.WebServiceBindingAttribute(Name = "GoogleSearchBinding", Namespace = "urn:GoogleSearch")]
[System.Xml.Serialization.SoapIncludeAttribute(typeof(ResultElement))]
public class GoogleSearchService : System.Web.Services.Protocols.SoapHttpClientProtocol
{ /// <res/>
public GoogleSearchService()
{
this.Url = "http://api.google.com/search/beta2";
} /// <res/>
[System.Web.Services.Protocols.SoapRpcMethodAttribute("urn:GoogleSearchAction", RequestNamespace = "urn:GoogleSearch", ResponseNamespace = "urn:GoogleSearch")]
[return: System.Xml.Serialization.SoapElementAttribute("return", DataType = "base64Binary")]
public System.Byte[] doGetCachedPage(string key, string url)
{
object[] results = this.Invoke("doGetCachedPage", new object[] {
key,
url});
return ((System.Byte[])(results[0]));
} /// <res/>
public System.IAsyncResult BegindoGetCachedPage(string key, string url, System.AsyncCallback callback, object asyncState)
{
return this.BeginInvoke("doGetCachedPage", new object[] {
key,
url}, callback, asyncState);
} /// <res/>
public System.Byte[] EnddoGetCachedPage(System.IAsyncResult asyncResult)
{
object[] results = this.EndInvoke(asyncResult);
return ((System.Byte[])(results[0]));
} /// <res/>
[System.Web.Services.Protocols.SoapRpcMethodAttribute("urn:GoogleSearchAction", RequestNamespace = "urn:GoogleSearch", ResponseNamespace = "urn:GoogleSearch")]
[return: System.Xml.Serialization.SoapElementAttribute("return")]
public string doSpellingSuggestion(string key, string phrase)
{
object[] results = this.Invoke("doSpellingSuggestion", new object[] {
key,
phrase});
return ((string)(results[0]));
} /// <res/>
public System.IAsyncResult BegindoSpellingSuggestion(string key, string phrase, System.AsyncCallback callback, object asyncState)
{
return this.BeginInvoke("doSpellingSuggestion", new object[] {
key,
phrase}, callback, asyncState);
} /// <res/>
public string EnddoSpellingSuggestion(System.IAsyncResult asyncResult)
{
object[] results = this.EndInvoke(asyncResult);
return ((string)(results[0]));
} /// <res/>
[System.Web.Services.Protocols.SoapRpcMethodAttribute("urn:GoogleSearchAction", RequestNamespace = "urn:GoogleSearch", ResponseNamespace = "urn:GoogleSearch")]
[return: System.Xml.Serialization.SoapElementAttribute("return")]
public GoogleSearchResult doGoogleSearch(string key, string q, int start, int maxResults, bool filter, string restrict, bool safeSearch, string lr, string ie, string oe)
{
object[] results = this.Invoke("doGoogleSearch", new object[] {
key,
q,
start,
maxResults,
filter,
restrict,
safeSearch,
lr,
ie,
oe});
return ((GoogleSearchResult)(results[0]));
} /// <res/>
public System.IAsyncResult BegindoGoogleSearch(string key, string q, int start, int maxResults, bool filter, string restrict, bool safeSearch, string lr, string ie, string oe, System.AsyncCallback callback, object asyncState)
{
return this.BeginInvoke("doGoogleSearch", new object[] {
key,
q,
start,
maxResults,
filter,
restrict,
safeSearch,
lr,
ie,
oe}, callback, asyncState);
} /// <res/>
public GoogleSearchResult EnddoGoogleSearch(System.IAsyncResult asyncResult)
{
object[] results = this.EndInvoke(asyncResult);
return ((GoogleSearchResult)(results[0]));
}
}/// <res/>
[System.Xml.Serialization.SoapTypeAttribute("GoogleSearchResult", "urn:GoogleSearch")]
public class GoogleSearchResult
{ /// <res/>
public bool documentFiltering; /// <res/>
public string searchComments; /// <res/>
public int estimatedTotalResultsCount; /// <res/>
public bool estimateIsExact; /// <res/>
public ResultElement[] resultElements; /// <res/>
public string searchQuery; /// <res/>
public int startIndex; /// <res/>
public int endIndex; /// <res/>
public string searchTips; /// <res/>
public DirectoryCategory[] directoryCategories; /// <res/>
public System.Double searchTime;
}/// <res/>
[System.Xml.Serialization.SoapTypeAttribute("ResultElement", "urn:GoogleSearch")]
public class ResultElement
{ /// <res/>
public string summary; /// <res/>
public string URL; /// <res/>
public string snippet; /// <res/>
public string title; /// <res/>
public string cachedSize; /// <res/>
public bool relatedInformationPresent; /// <res/>
public string hostName; /// <res/>
public DirectoryCategory directoryCategory; /// <res/>
public string directoryTitle;
}/// <res/>
[System.Xml.Serialization.SoapTypeAttribute("DirectoryCategory", "urn:GoogleSearch")]
public class DirectoryCategory
{ /// <res/>
public string fullViewableName; /// <res/>
public string specialEncoding;
}
GoogleSearchResult r;
// call search function
//
r = s.doGoogleSearch(
"",
textSearch.Text,
0,
10,
false, "", false, "", "", "");
// create HTML document to show result
//
string strFile = "result.html";
StreamWriter sw = File.CreateText(strFile);
// Header inforamtion
//
sw.WriteLine("<HTML><HEAD><style> BODY { font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; font-size : 9pt; color : #000000; SCROLLBAR-FACE-COLOR: white; SCROLLBAR-HIGHLIGHT-COLOR: #003366; SCROLLBAR-SHADOW-COLOR: #003366; SCROLLBAR-3DLIGHT-COLOR: #f9f9f9; SCROLLBAR-ARROW-COLOR: #003366; SCROLLBAR-TRACK-COLOR: white; SCROLLBAR-DARKSHADOW-COLOR: #f9f9f9 }</style></HEAD><BODY>");
// Category
//
foreach(DirectoryCategory dc in r.directoryCategories)
{
sw.Write("<b>Category</b> : ");
sw.WriteLine(dc.fullViewableName);
sw.WriteLine("<br><br><br>");
}
// iterate items
//
foreach(ResultElement re in r.resultElements)
{
// Title
//
string strTitle = "<a href=\"" + re.URL + "\">" + re.title + "</a><br>";
sw.WriteLine(strTitle);
// snippet
//
string strSnippet = re.snippet +"<br>";
sw.WriteLine(strSnippet); // link and cache size
//
string strLink = "<a href=\"" + re.URL + "\">" + re.URL + "</a> - " + re.cachedSize + "<br><br>";
sw.WriteLine(strLink); // 2 line
//
sw.WriteLine("<br><br>");
}
// file close
//
sw.Close();
// result inforamtion
//
labelSearchText.Text = textSearch.Text + " 's web search"; int estResults = r.estimatedTotalResultsCount;
double ldTime = r.searchTime;
labelSearchResult.Text = "Total " + Convert.ToString(estResults) + " " + "1 - 10 seach result Total time:" + Convert.ToString(ldTime);
该项目网上有下载。
很简单。
编码后用GET方式提交搜索结果,百度会返回一个页面,既搜索结果。
<a>标记,id=awN,N为当前条的下标。
这是百度,其他搜索引擎类似。
编码后用GET方式提交搜索地址,百度会返回一个页面,既搜索结果。
<a>标记,id=awN,N为当前条的下标。
这是百度,其他搜索引擎类似。
是的。GOOGLE是有WEB服务调用就行了。
这个是我去年业余时间做的,而且已经做到提供数据调用了,下面是一个例子
http://www.soulema.com/s.aspx?type=xml&search=sql2005
你也可以用这个功能来获得数据http://www.soulemei.com 是利用xml的一个例子另外,还有个纯静态的html,这个页面你可以直接拿过去用
http://www.soulema.com/s.html
ing......................................