using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Web;namespace BaiduQA
{
    public class BaiduQA
    {   /// <summary>
        ///传入关键字
        ///</summary>
        static string url_code = "";
        static string my_entry = "";//url入口
        static string string3 = "";//获取所有item的地址
      static   WebClient mywebclient = new WebClient();
        public static string Get_Key(string key)
        {            if (key == "")
            {
              return  "传入值不能为空!";
            }
            else
            {                mywebclient.Credentials = CredentialCache.DefaultCredentials;
                string string1 = "http://zhidao.baidu.com/q?ct=17&word=";
                string string2 = "&tn=ikaslist&rn=25&pn=0";
                my_entry = string1 + HttpUtility.UrlEncode(key, Encoding.GetEncoding("GB2312")) + string2;
               
                byte[] key_byte = mywebclient.DownloadData(my_entry);
                url_code = Encoding.Default.GetString(key_byte);
                search_count();
              
            }
            return my_entry;
        }
        public static string search_count()
        {
            Regex re1 = new Regex(@"共搜到相关问题\s(?<number>[\d].*?)\s");
            MatchCollection mc1 = re1.Matches(url_code);
            if (mc1.Count.ToString() == "0")
            {
                return "搜索不到相关信息";
            }
            else
            {
                foreach (Match m1 in mc1)
                {
                    int int1 = int.Parse(m1.Groups["number"].ToString());
                    if (int1 > 25)
                    {
                        Regex re2 = new Regex(@"<font>\[尾页\]</font>");
                        Match m2 = re2.Match(url_code);
                        for (int i2 = 0; i2 <= 1; i2++)
                        {
                            if (i2 == 0)
                            {
                                first_page();//应该写的很明白了!
                            }
                            else
                            {
                                if (m2.Success)
                                //证明搜索这个关键字信息的数据至少有11页,根据baidu的格式为0,25,50,因为我已经调用了first_page()函数,所以i3//从25开始
                                {
                                    Regex re3 = new Regex(@"下一页.*?ikaslist&rn=25&pn=(?<page>[\d].*?)>");
                                    MatchCollection mc3 = re3.Matches(url_code);
                                    foreach (Match m3 in mc3)
                                    {
                                        for (int i3 = 25; i3 <= int.Parse(m3.Groups["page"].ToString()); i3 += 25)
                                        {
                                            byte[] byte3 = mywebclient.DownloadData(my_entry.Replace("ikaslist&rn=25&pn=0", "ikaslist&rn=25&pn=" + i3.ToString()));
                                            string3 = Encoding.Default.GetString(byte3);
                                            Get_List();
                                        }
                                    }
                                }                                else//相关信息少于或等于10页,同上!
                                {
                                    Regex re3 = new Regex(@"\[(?<page>[\d]{1,10})\]</a>&nbsp;[\s\S]{1,80}下一页");
                                    MatchCollection mc3 = re3.Matches(url_code);
                                    foreach (Match m3 in mc3)
                                    {
                                        for (int i3 = 25; i3 <= int.Parse(m3.Groups["page"].ToString()); i3 += 25)
                                        {
                                            byte[] byte3 = mywebclient.DownloadData(my_entry.Replace("ikaslist&rn=25&pn=0", "ikaslist&rn=25&pn=" + i3.ToString()));
                                            string3 = Encoding.Default.GetString(byte3);
                                            Get_List();
                                        }
                                    }
                                }
                            }
                        }
                        
                    }
                    else
                    {
                        first_page();
                    }
                }
            }
            return "OK";
        }
        public static void  first_page()
        {
            Regex re_list_page = new Regex(@"<table\sborder=0.*?href=""(?<web_url>[\s\S]*?)""");
            MatchCollection mc_list_page = re_list_page.Matches(string3);
            foreach (Match m_list_page in mc_list_page)
            {
                byte[] my_list_byte = mywebclient.DownloadData(my_entry);
                string my_list_string = Encoding.Default.GetString(my_list_byte);
                Regex re_list = new Regex(@"最佳答案");
                Match m_list = re_list.Match(my_list_string);
                if (m_list.Success)
                {
                    Regex re_content = new Regex(@"<cq>(?<问题标题>[\s\S]*?)</cq>[\s\S]*?<cd>(?<问题补充>[\s\S]*?)</cd>[\s\S]*?<div\sclass=""f14\sp90\spl10"">(?<回复>[\s\S]*?)</div>");
                    MatchCollection mc_content = re_content.Matches(my_list_string);
                    foreach (Match m_content in mc_content)
                    {
                        Console.WriteLine("问题标题:" + m_content.Groups["问题标题"].ToString() + "\n\n");
                    }
                }
                else
                {
                    Console.WriteLine("对于没有最佳答案的回答,那么即使获取到此问题也无意义!");
                }            }
        }
        public static void  Get_List()
        {
            Regex re_list_page = new Regex(@"<table\sborder=0.*?href=""(?<web_url>[\s\S]*?)""");
            MatchCollection mc_list_page = re_list_page.Matches(string3);
            foreach (Match m_list_page in mc_list_page)
            {
                byte[] my_list_byte = mywebclient.DownloadData("http://zhidao.baidu.com" + m_list_page.Groups["web_url"].ToString());
                string my_list_string = Encoding.Default.GetString(my_list_byte);
                Regex re_list = new Regex(@"最佳答案");
                Match m_list = re_list.Match(my_list_string);
                if (m_list.Success)
                {
                    Regex re_content = new Regex(@"<cq>(?<问题标题>[\s\S]*?)</cq>[\s\S]*?<cd>(?<问题补充>[\s\S]*?)</cd>[\s\S]*?<div\sclass=""f14\sp90\spl10"">(?<回复>[\s\S]*?)</div>");
                    MatchCollection mc_content = re_content.Matches(my_list_string);
                    foreach (Match m_content in mc_content)
                    {
                        Console.WriteLine("问题标题:"+m_content.Groups["问题标题"].ToString()+"\n\n");
                    }
                }
                else
                {
                    Console.WriteLine("对于没有最佳答案的回答,那么即使获取到此问题也无意义!");
                }
            
            }
        }
            
    }
}这是我调用的代码:using System;
using System.Collections.Generic;
using System.Text;
using System.Web;
namespace Test_BaiduQA
{
    class Program
    {
        static void Main(string[] args)
        {
           Console.WriteLine( BaiduQA.BaiduQA.Get_Key("中国人"));
   
           Console.ReadLine();
        }
    }
}
晕,第一页搜索的数据好像被跳过了换言之
也就是说:第一页的数据没有被输出?????????