想编写一个小程序,从京东商城中抓取搜索结果后再分析。
在京东的网页中,当在搜索框中输入“微星”后搜索,IE地址栏变为“http://search.360buy.com/Search?keyword=微星”。
于是在程序中依样画葫芦,输入如下代码
                    WebClient wc = new WebClient();
                    HttpWebRequest hwr = new HttpWebRequest();
                    string text = wc.DownloadString("http://search.360buy.com/Search?keyword=微星);
结果发现,搜索不到结果,查看返回的网页源码,<title>标记如下
<title>寰槦 - 商品搜索 - 京东商城</title> 正确的title应为<title>微星 - 商品搜索 - 京东商城</title>为什么IE向服务器传送http://search.360buy.com/Search?keyword=微星后能返回正确搜索结果,而我的WebClient传送时,服务器收到的是“寰槦”这样的乱码?怎么解决这个问题?注:1. 英文能搜索到正确的结果
    2. 即使用UrlEncode编码“微星”,也没用,一样返回<title>寰槦 - 商品搜索 - 京东商城</title>。
       编码代码如下:
                        string text = wc.DownloadString("http://search.360buy.com/Search?keyword=" + System.Web.HttpUtility.UrlEncode("微星"));

解决方案 »

  1.   

    HttpWebRequest   myReq   =   (HttpWebRequest)WebRequest.Create("http://www.baidu.com");  
      myReq.Accept="Accept-Language:   zh-cn";  
      myReq.Referer="http://www.baidu.com";  
      myReq.MaximumAutomaticRedirections=1;  
      myReq.AllowAutoRedirect=true;  
      HttpWebResponse   myres=(HttpWebResponse)myReq.GetResponse();//  
      Stream   resStream   =   myres.GetResponseStream();  
      StreamReader   sr   =   new   StreamReader(resStream,   System.Text.Encoding.Default);  
       
      Response.Write(sr.ReadToEnd());   再用正则过滤
      

  2.   


            /// <summary>
            /// 取得指定页的 html代码
            /// </summary>
            /// <param name="url">指定的URL</param>
            /// <returns>返回得到的html代码</returns>
            /// 调用示例 string Ls_html=Gethtml("http://www.163.com");
            private string Gethtml(string url)
            {
                try
                {
                    string html = string.Empty;
                    string encoding = string.Empty;
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                    request.Method = "get";
                    request.ContentType = "text/html";
                    request.Timeout = 60 * 1000;
                    byte[] buffer = new byte[6072];
                    using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                    {                    using (Stream reader = response.GetResponseStream())
                        {
                            reader.ReadTimeout = 60 * 1000;
                            using (MemoryStream memory = new MemoryStream())
                            {
                                int index = 1;
                                int sum = 0;
                                while (index > 0 && sum < 100 * 6072)
                                {
                                    index = reader.Read(buffer, 0, 6072);
                                    if (index > 0)
                                    {
                                        memory.Write(buffer, 0, index);
                                        sum += index;
                                    }
                                }
                                html = Encoding.GetEncoding("gb2312").GetString(memory.ToArray());                            if (string.IsNullOrEmpty(html))
                                {
                                    return html;
                                }
                                else
                                {
                                    Regex re = new Regex(@"charset=(?<charset>[\s\S]*?)[""|']");
                                    Match m = re.Match(html.ToLower());
                                    encoding = m.Groups["charset"].ToString();
                                }                            if (string.IsNullOrEmpty(encoding) || string.Equals(encoding.ToLower(), "gb2312"))
                                {
                                    return html;
                                }
                                else
                                {
                                    return Encoding.GetEncoding(encoding).GetString(memory.ToArray());
                                }
                            }
                        }
                    }
                }
                catch (Exception)
                {
                    return "";
                }
            }
    很好用的
      

  3.   

    Encoding encoding = Encoding.GetEncoding("gb2312");
            byte[] data = encoding.GetBytes("");
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(new Uri(""));
            request.Referer = "";
            request.Timeout = 300000;
            request.AllowAutoRedirect = false;
            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; VIMEI Searcher)";
            request.Method = "POST";
            request.ContentType = "application/x-www-form-urlencoded";
            request.ContentLength = data.Length;        Stream writer = request.GetRequestStream();
            writer.Write(data, 0, data.Length);
            writer.Flush();
            writer.Close();
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            Stream stream = response.GetResponseStream();
            StreamReader reader = new StreamReader(stream, encoding);
            string html = reader.ReadToEnd();
            string Text = html;
            reader.Close();
            stream.Close();
            response.Close();