函数如下。。但是遇到UTF8的就不行了 。求解
private  string GetWebPageEx(string url)   
    {   
     string contentType = "application/x-www-form-urlencoded";   
     string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";   
     string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";     try
     {
         HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
         request.UserAgent = userAgent;
         request.ContentType = contentType;
         request.CookieContainer = cookie;
         request.Accept = accept;
         request.Method = "get";         WebResponse response = request.GetResponse();
         Stream responseStream = response.GetResponseStream();
         
         Encoding encoding = null;
         for (int i = 0; i < response.Headers.Count; i++)
         {
             Match m = Regex.Match(response.Headers[i].ToString(), "(?i)(?<=charset=)[^ ]+");
             if (!m.Success) continue;
             encoding = Encoding.GetEncoding(m.Value);
             break;
         }         if (encoding == null)
         {
             encoding = Encoding.GetEncoding(936);
             //encoding = Encoding.GetEncoding(936);
         }         
         
         StreamReader sr = new StreamReader(responseStream,encoding);
         
         String html = sr.ReadToEnd();                   Regex r = new Regex("content=\"text/html; charset=(?<v>\\S*?)\" />");
         if(encoding.BodyName=="gb2312" && r.IsMatch(html))
         {
             Match m=r.Match(html);
             if (m.Groups["v"].ToString().IndexOf("-8") > -1)
             {
                 
                 byte[] xx = encoding.GetBytes(html);
                 html = Encoding.UTF8.GetString(xx);
                 //这里转换后还是有乱码,大概在有标点的地方比如逗号 引号的部分 会有乱码             }
         }
         response.Close();         return html;
     }
     catch (Exception ex)
     { return "获取失败!"; }

解决方案 »

  1.   

    response.ContentEncoding多看MSDN...
      

  2.   

    http://topic.csdn.net/u/20110309/11/9b4c1ce1-b042-4c5f-9f48-5a6f8db72dfa.html
      

  3.   

    弄这个网址的话 好像返回 “”http://blog.sina.com.cn/s/articlelist_1215172700_0_1.html
      

  4.   

    弄这个网址的话 response.ContentEncoding好像返回 “”http://blog.sina.com.cn/s/articlelist_1215172700_0_1.html
      

  5.   

    回2楼。。我看你那个 好像是固定去UTF8 的 如果是GB2312的 怎么办?
      

  6.   

    用页面中的charset判断也可以。
    ContentEncoding好像返会值里有GB18030这个Encoding不知道能取到不,反正ContentEncoding返回值需要判断,但是charset的绝对可以。