using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Web;namespace BaiduQA
{
public class BaiduQA
{ /// <summary>
///传入关键字
///</summary>
static string url_code = "";
static string my_entry = "";//url入口
static string string3 = "";//获取所有item的地址
static WebClient mywebclient = new WebClient();
public static string Get_Key(string key)
{ if (key == "")
{
return "传入值不能为空!";
}
else
{ mywebclient.Credentials = CredentialCache.DefaultCredentials;
string string1 = "http://zhidao.baidu.com/q?ct=17&word=";
string string2 = "&tn=ikaslist&rn=25&pn=0";
my_entry = string1 + HttpUtility.UrlEncode(key, Encoding.GetEncoding("GB2312")) + string2;
byte[] key_byte = mywebclient.DownloadData(my_entry);
url_code = Encoding.Default.GetString(key_byte);
search_count();
}
return my_entry;
}
public static string search_count()
{
Regex re1 = new Regex(@"共搜到相关问题\s(?<number>[\d].*?)\s");
MatchCollection mc1 = re1.Matches(url_code);
if (mc1.Count.ToString() == "0")
{
return "搜索不到相关信息";
}
else
{
foreach (Match m1 in mc1)
{
int int1 = int.Parse(m1.Groups["number"].ToString());
if (int1 > 25)
{
Regex re2 = new Regex(@"<font>\[尾页\]</font>");
Match m2 = re2.Match(url_code);
for (int i2 = 0; i2 <= 1; i2++)
{
if (i2 == 0)
{
first_page();//应该写的很明白了!
}
else
{
if (m2.Success)
//证明搜索这个关键字信息的数据至少有11页,根据baidu的格式为0,25,50,因为我已经调用了first_page()函数,所以i3//从25开始
{
Regex re3 = new Regex(@"下一页.*?ikaslist&rn=25&pn=(?<page>[\d].*?)>");
MatchCollection mc3 = re3.Matches(url_code);
foreach (Match m3 in mc3)
{
for (int i3 = 25; i3 <= int.Parse(m3.Groups["page"].ToString()); i3 += 25)
{
byte[] byte3 = mywebclient.DownloadData(my_entry.Replace("ikaslist&rn=25&pn=0", "ikaslist&rn=25&pn=" + i3.ToString()));
string3 = Encoding.Default.GetString(byte3);
Get_List();
}
}
} else//相关信息少于或等于10页,同上!
{
Regex re3 = new Regex(@"\[(?<page>[\d]{1,10})\]</a> [\s\S]{1,80}下一页");
MatchCollection mc3 = re3.Matches(url_code);
foreach (Match m3 in mc3)
{
for (int i3 = 25; i3 <= int.Parse(m3.Groups["page"].ToString()); i3 += 25)
{
byte[] byte3 = mywebclient.DownloadData(my_entry.Replace("ikaslist&rn=25&pn=0", "ikaslist&rn=25&pn=" + i3.ToString()));
string3 = Encoding.Default.GetString(byte3);
Get_List();
}
}
}
}
}
}
else
{
first_page();
}
}
}
return "OK";
}
public static void first_page()
{
Regex re_list_page = new Regex(@"<table\sborder=0.*?href=""(?<web_url>[\s\S]*?)""");
MatchCollection mc_list_page = re_list_page.Matches(string3);
foreach (Match m_list_page in mc_list_page)
{
byte[] my_list_byte = mywebclient.DownloadData(my_entry);
string my_list_string = Encoding.Default.GetString(my_list_byte);
Regex re_list = new Regex(@"最佳答案");
Match m_list = re_list.Match(my_list_string);
if (m_list.Success)
{
Regex re_content = new Regex(@"<cq>(?<问题标题>[\s\S]*?)</cq>[\s\S]*?<cd>(?<问题补充>[\s\S]*?)</cd>[\s\S]*?<div\sclass=""f14\sp90\spl10"">(?<回复>[\s\S]*?)</div>");
MatchCollection mc_content = re_content.Matches(my_list_string);
foreach (Match m_content in mc_content)
{
Console.WriteLine("问题标题:" + m_content.Groups["问题标题"].ToString() + "\n\n");
}
}
else
{
Console.WriteLine("对于没有最佳答案的回答,那么即使获取到此问题也无意义!");
} }
}
public static void Get_List()
{
Regex re_list_page = new Regex(@"<table\sborder=0.*?href=""(?<web_url>[\s\S]*?)""");
MatchCollection mc_list_page = re_list_page.Matches(string3);
foreach (Match m_list_page in mc_list_page)
{
byte[] my_list_byte = mywebclient.DownloadData("http://zhidao.baidu.com" + m_list_page.Groups["web_url"].ToString());
string my_list_string = Encoding.Default.GetString(my_list_byte);
Regex re_list = new Regex(@"最佳答案");
Match m_list = re_list.Match(my_list_string);
if (m_list.Success)
{
Regex re_content = new Regex(@"<cq>(?<问题标题>[\s\S]*?)</cq>[\s\S]*?<cd>(?<问题补充>[\s\S]*?)</cd>[\s\S]*?<div\sclass=""f14\sp90\spl10"">(?<回复>[\s\S]*?)</div>");
MatchCollection mc_content = re_content.Matches(my_list_string);
foreach (Match m_content in mc_content)
{
Console.WriteLine("问题标题:"+m_content.Groups["问题标题"].ToString()+"\n\n");
}
}
else
{
Console.WriteLine("对于没有最佳答案的回答,那么即使获取到此问题也无意义!");
}
}
}
}
}这是我调用的代码:using System;
using System.Collections.Generic;
using System.Text;
using System.Web;
namespace Test_BaiduQA
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine( BaiduQA.BaiduQA.Get_Key("中国人"));
Console.ReadLine();
}
}
}
晕,第一页搜索的数据好像被跳过了换言之
也就是说:第一页的数据没有被输出?????????
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Web;namespace BaiduQA
{
public class BaiduQA
{ /// <summary>
///传入关键字
///</summary>
static string url_code = "";
static string my_entry = "";//url入口
static string string3 = "";//获取所有item的地址
static WebClient mywebclient = new WebClient();
public static string Get_Key(string key)
{ if (key == "")
{
return "传入值不能为空!";
}
else
{ mywebclient.Credentials = CredentialCache.DefaultCredentials;
string string1 = "http://zhidao.baidu.com/q?ct=17&word=";
string string2 = "&tn=ikaslist&rn=25&pn=0";
my_entry = string1 + HttpUtility.UrlEncode(key, Encoding.GetEncoding("GB2312")) + string2;
byte[] key_byte = mywebclient.DownloadData(my_entry);
url_code = Encoding.Default.GetString(key_byte);
search_count();
}
return my_entry;
}
public static string search_count()
{
Regex re1 = new Regex(@"共搜到相关问题\s(?<number>[\d].*?)\s");
MatchCollection mc1 = re1.Matches(url_code);
if (mc1.Count.ToString() == "0")
{
return "搜索不到相关信息";
}
else
{
foreach (Match m1 in mc1)
{
int int1 = int.Parse(m1.Groups["number"].ToString());
if (int1 > 25)
{
Regex re2 = new Regex(@"<font>\[尾页\]</font>");
Match m2 = re2.Match(url_code);
for (int i2 = 0; i2 <= 1; i2++)
{
if (i2 == 0)
{
first_page();//应该写的很明白了!
}
else
{
if (m2.Success)
//证明搜索这个关键字信息的数据至少有11页,根据baidu的格式为0,25,50,因为我已经调用了first_page()函数,所以i3//从25开始
{
Regex re3 = new Regex(@"下一页.*?ikaslist&rn=25&pn=(?<page>[\d].*?)>");
MatchCollection mc3 = re3.Matches(url_code);
foreach (Match m3 in mc3)
{
for (int i3 = 25; i3 <= int.Parse(m3.Groups["page"].ToString()); i3 += 25)
{
byte[] byte3 = mywebclient.DownloadData(my_entry.Replace("ikaslist&rn=25&pn=0", "ikaslist&rn=25&pn=" + i3.ToString()));
string3 = Encoding.Default.GetString(byte3);
Get_List();
}
}
} else//相关信息少于或等于10页,同上!
{
Regex re3 = new Regex(@"\[(?<page>[\d]{1,10})\]</a> [\s\S]{1,80}下一页");
MatchCollection mc3 = re3.Matches(url_code);
foreach (Match m3 in mc3)
{
for (int i3 = 25; i3 <= int.Parse(m3.Groups["page"].ToString()); i3 += 25)
{
byte[] byte3 = mywebclient.DownloadData(my_entry.Replace("ikaslist&rn=25&pn=0", "ikaslist&rn=25&pn=" + i3.ToString()));
string3 = Encoding.Default.GetString(byte3);
Get_List();
}
}
}
}
}
}
else
{
first_page();
}
}
}
return "OK";
}
public static void first_page()
{
Regex re_list_page = new Regex(@"<table\sborder=0.*?href=""(?<web_url>[\s\S]*?)""");
MatchCollection mc_list_page = re_list_page.Matches(string3);
foreach (Match m_list_page in mc_list_page)
{
byte[] my_list_byte = mywebclient.DownloadData(my_entry);
string my_list_string = Encoding.Default.GetString(my_list_byte);
Regex re_list = new Regex(@"最佳答案");
Match m_list = re_list.Match(my_list_string);
if (m_list.Success)
{
Regex re_content = new Regex(@"<cq>(?<问题标题>[\s\S]*?)</cq>[\s\S]*?<cd>(?<问题补充>[\s\S]*?)</cd>[\s\S]*?<div\sclass=""f14\sp90\spl10"">(?<回复>[\s\S]*?)</div>");
MatchCollection mc_content = re_content.Matches(my_list_string);
foreach (Match m_content in mc_content)
{
Console.WriteLine("问题标题:" + m_content.Groups["问题标题"].ToString() + "\n\n");
}
}
else
{
Console.WriteLine("对于没有最佳答案的回答,那么即使获取到此问题也无意义!");
} }
}
public static void Get_List()
{
Regex re_list_page = new Regex(@"<table\sborder=0.*?href=""(?<web_url>[\s\S]*?)""");
MatchCollection mc_list_page = re_list_page.Matches(string3);
foreach (Match m_list_page in mc_list_page)
{
byte[] my_list_byte = mywebclient.DownloadData("http://zhidao.baidu.com" + m_list_page.Groups["web_url"].ToString());
string my_list_string = Encoding.Default.GetString(my_list_byte);
Regex re_list = new Regex(@"最佳答案");
Match m_list = re_list.Match(my_list_string);
if (m_list.Success)
{
Regex re_content = new Regex(@"<cq>(?<问题标题>[\s\S]*?)</cq>[\s\S]*?<cd>(?<问题补充>[\s\S]*?)</cd>[\s\S]*?<div\sclass=""f14\sp90\spl10"">(?<回复>[\s\S]*?)</div>");
MatchCollection mc_content = re_content.Matches(my_list_string);
foreach (Match m_content in mc_content)
{
Console.WriteLine("问题标题:"+m_content.Groups["问题标题"].ToString()+"\n\n");
}
}
else
{
Console.WriteLine("对于没有最佳答案的回答,那么即使获取到此问题也无意义!");
}
}
}
}
}这是我调用的代码:using System;
using System.Collections.Generic;
using System.Text;
using System.Web;
namespace Test_BaiduQA
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine( BaiduQA.BaiduQA.Get_Key("中国人"));
Console.ReadLine();
}
}
}
晕,第一页搜索的数据好像被跳过了换言之
也就是说:第一页的数据没有被输出?????????
MatchCollection mc_list_page = re_list_page.Matches(string3);
这里的string3没有正确赋值谁写的就让谁看下吧,写得这么麻烦,只是取个问题标题就要每次都去取一次网页源文件,效率不敢恭维的说