http://read.10086.cn/booklist?nodeId=0&fee=0&order=1&bookListType=1&view=2&page=2
我抓取的时候老报,“远程服务器返回错误: (500) 内部服务器错误。”你们能帮我测试下吗?我的代码:
 HttpWebResponse res;
            string charSet = "";
            try
            {
                WebClient myWebClient = new WebClient();//创建WebClient实例myWebClient 
                myWebClient.Credentials = CredentialCache.DefaultNetworkCredentials;
                byte[] myDataBuffer = myWebClient.DownloadData(url);
                string strWebData = Encoding.Default.GetString(myDataBuffer);                //获取网页字符编码描述信息 
                Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                string webCharSet = charSetMatch.Groups[2].Value.Replace("\"", "");
                if (charSet == null || charSet == "")
                    charSet = webCharSet;                if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
                    strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
                return strWebData;
            }
            catch (WebException ex)
            {
                res = (HttpWebResponse)ex.Response;
               
            }
            
            StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.Default);
            return sr.ReadToEnd();抓取

解决方案 »

  1.   

    示例:http://download.csdn.net/detail/zhuankeshumo/5656679
      

  2.   

    主要看那个Helper的方法就行了 如果你是.NET 4.5版本的  删掉这个引用
      

  3.   

    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;namespace App
    {
        class HttpHelper
        {
            private static readonly string DefaultUserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16";
            /// <summary>         
            /// 创建POST方式的HTTP请求          
            /// </summary>         
            /// <param name="url">请求的URL</param>          
            /// <param name="parameters">随同请求POST的参数名称及参数值字典</param>          
            /// <param name="userAgent">请求的客户端浏览器信息,可以为空</param>         
            /// <param name="requestEncoding">发送HTTP请求时所用的编码</param>         
            /// <param name="cookies">随同HTTP请求发送的Cookie信息,如果不需要身份验证可以为空</param>    
            /// <returns></returns>         
            public static Task<WebResponse> CreatePostHttpResponse(string url, IDictionary<string, string> parameters, string userAgent, Encoding requestEncoding, CookieContainer cookieContainer)
            {
                if (string.IsNullOrEmpty(url))
                {
                    throw new ArgumentNullException("url");
                }
                if (requestEncoding == null)
                {
                    throw new ArgumentNullException("requestEncoding");
                }
                HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
                request.Method = "POST";
                request.ContentType = "application/x-www-form-urlencoded";
                if (!string.IsNullOrEmpty(userAgent))
                {
                    request.UserAgent = userAgent;
                }
                else
                {
                    request.UserAgent = DefaultUserAgent;
                }
                if (cookieContainer == null)
                {
                    request.CookieContainer = new CookieContainer();
                }
                else
                {
                    request.CookieContainer = cookieContainer;
                }
                //如果需要POST数据             
                if (!(parameters == null || parameters.Count == 0))
                {
                    StringBuilder buffer = new StringBuilder();
                    int i = 0;
                    foreach (string key in parameters.Keys)
                    {
                        if (i > 0)
                        {
                            buffer.AppendFormat("&{0}={1}", key, parameters[key]);
                        }
                        else
                        {
                            buffer.AppendFormat("{0}={1}", key, parameters[key]);
                        }
                        i++;
                    }
                    byte[] data = requestEncoding.GetBytes(buffer.ToString());
                    var task = Task.Factory.FromAsync<Stream>(request.BeginGetRequestStream, request.EndGetRequestStream, request, TaskCreationOptions.None);               //等待任务完成               
                    task.Wait();                //执行完本任务后再连续执行写入留和返回response对象           '
                    using (Stream stream = task.Result)//如果上面没有等待任务完成那一句,在这里直接获取结果也是可以的           
                    {
                        stream.Write(data, 0, data.Length);
                    }
                }
                return Task.Factory.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, request, TaskCreationOptions.None);
            }        /// <summary>          
            /// 创建GET方式的HTTP请求        
            /// </summary>        
            /// <param name="url">请求的URL</param>          
            /// <param name="timeout">请求的超时时间</param>         
            /// <param name="userAgent">请求的客户端浏览器信息,可以为空</param>       
            /// <param name="cookies">随同HTTP请求发送的Cookie信息,如果不需要身份验证可以为空</param>         
            /// <returns></returns>        
            public static Task<WebResponse> CreateGetHttpResponse(string url, string userAgent, CookieContainer cookieContainer)
            {
                if (string.IsNullOrEmpty(url))
                {
                    throw new ArgumentNullException("url");
                }
                HttpWebRequest request = WebRequest.Create(new Uri(url)) as HttpWebRequest;
                request.Method = "GET";
                request.UserAgent = DefaultUserAgent;
                if (!string.IsNullOrEmpty(userAgent))
                {
                    request.UserAgent = userAgent;
                }
                if (cookieContainer == null)
                {
                    request.CookieContainer = new CookieContainer();
                }
                else
                {
                    request.CookieContainer = cookieContainer;
                }
                return Task.Factory.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, request, TaskCreationOptions.None);
            }
        }
    }
      

  4.   

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using System.Net;
    using System.IO;namespace App
    {
        class Program
        {
            static void Main(string[] args)
            { 
                CookieContainer cookie=new CookieContainer();
                WebResponse res = HttpHelper.CreateGetHttpResponse("http://read.10086.cn/booklist?nodeId=0&fee=0&order=1&bookListType=1&view=2&page=2", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16" , cookie).Result;            string content = "";
                using (Stream stream = res.GetResponseStream())
                {
                    using (StreamReader sr = new StreamReader(stream))
                    {
                        content = sr.ReadToEnd();
                    }
                }
                Console.WriteLine(content);
                Console.ReadKey();
            }
        }
    }
      

  5.   

    如果不是.net 4.5的 需要那个dll  下载的项目文件夹里面有
      

  6.   

    看你前面的帖子抓到的网页都是用正则 而且自己不精通 你可以用这个http://htmlagilitypack.codeplex.com/