http://read.10086.cn/booklist?nodeId=0&fee=0&order=1&bookListType=1&view=2&page=2
我抓取的时候老报,“远程服务器返回错误: (500) 内部服务器错误。”你们能帮我测试下吗?我的代码:
HttpWebResponse res;
string charSet = "";
try
{
WebClient myWebClient = new WebClient();//创建WebClient实例myWebClient
myWebClient.Credentials = CredentialCache.DefaultNetworkCredentials;
byte[] myDataBuffer = myWebClient.DownloadData(url);
string strWebData = Encoding.Default.GetString(myDataBuffer); //获取网页字符编码描述信息
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
string webCharSet = charSetMatch.Groups[2].Value.Replace("\"", "");
if (charSet == null || charSet == "")
charSet = webCharSet; if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
return strWebData;
}
catch (WebException ex)
{
res = (HttpWebResponse)ex.Response;
}
StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.Default);
return sr.ReadToEnd();抓取
我抓取的时候老报,“远程服务器返回错误: (500) 内部服务器错误。”你们能帮我测试下吗?我的代码:
HttpWebResponse res;
string charSet = "";
try
{
WebClient myWebClient = new WebClient();//创建WebClient实例myWebClient
myWebClient.Credentials = CredentialCache.DefaultNetworkCredentials;
byte[] myDataBuffer = myWebClient.DownloadData(url);
string strWebData = Encoding.Default.GetString(myDataBuffer); //获取网页字符编码描述信息
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
string webCharSet = charSetMatch.Groups[2].Value.Replace("\"", "");
if (charSet == null || charSet == "")
charSet = webCharSet; if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
return strWebData;
}
catch (WebException ex)
{
res = (HttpWebResponse)ex.Response;
}
StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.Default);
return sr.ReadToEnd();抓取
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;namespace App
{
class HttpHelper
{
private static readonly string DefaultUserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16";
/// <summary>
/// 创建POST方式的HTTP请求
/// </summary>
/// <param name="url">请求的URL</param>
/// <param name="parameters">随同请求POST的参数名称及参数值字典</param>
/// <param name="userAgent">请求的客户端浏览器信息,可以为空</param>
/// <param name="requestEncoding">发送HTTP请求时所用的编码</param>
/// <param name="cookies">随同HTTP请求发送的Cookie信息,如果不需要身份验证可以为空</param>
/// <returns></returns>
public static Task<WebResponse> CreatePostHttpResponse(string url, IDictionary<string, string> parameters, string userAgent, Encoding requestEncoding, CookieContainer cookieContainer)
{
if (string.IsNullOrEmpty(url))
{
throw new ArgumentNullException("url");
}
if (requestEncoding == null)
{
throw new ArgumentNullException("requestEncoding");
}
HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
request.Method = "POST";
request.ContentType = "application/x-www-form-urlencoded";
if (!string.IsNullOrEmpty(userAgent))
{
request.UserAgent = userAgent;
}
else
{
request.UserAgent = DefaultUserAgent;
}
if (cookieContainer == null)
{
request.CookieContainer = new CookieContainer();
}
else
{
request.CookieContainer = cookieContainer;
}
//如果需要POST数据
if (!(parameters == null || parameters.Count == 0))
{
StringBuilder buffer = new StringBuilder();
int i = 0;
foreach (string key in parameters.Keys)
{
if (i > 0)
{
buffer.AppendFormat("&{0}={1}", key, parameters[key]);
}
else
{
buffer.AppendFormat("{0}={1}", key, parameters[key]);
}
i++;
}
byte[] data = requestEncoding.GetBytes(buffer.ToString());
var task = Task.Factory.FromAsync<Stream>(request.BeginGetRequestStream, request.EndGetRequestStream, request, TaskCreationOptions.None); //等待任务完成
task.Wait(); //执行完本任务后再连续执行写入留和返回response对象 '
using (Stream stream = task.Result)//如果上面没有等待任务完成那一句,在这里直接获取结果也是可以的
{
stream.Write(data, 0, data.Length);
}
}
return Task.Factory.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, request, TaskCreationOptions.None);
} /// <summary>
/// 创建GET方式的HTTP请求
/// </summary>
/// <param name="url">请求的URL</param>
/// <param name="timeout">请求的超时时间</param>
/// <param name="userAgent">请求的客户端浏览器信息,可以为空</param>
/// <param name="cookies">随同HTTP请求发送的Cookie信息,如果不需要身份验证可以为空</param>
/// <returns></returns>
public static Task<WebResponse> CreateGetHttpResponse(string url, string userAgent, CookieContainer cookieContainer)
{
if (string.IsNullOrEmpty(url))
{
throw new ArgumentNullException("url");
}
HttpWebRequest request = WebRequest.Create(new Uri(url)) as HttpWebRequest;
request.Method = "GET";
request.UserAgent = DefaultUserAgent;
if (!string.IsNullOrEmpty(userAgent))
{
request.UserAgent = userAgent;
}
if (cookieContainer == null)
{
request.CookieContainer = new CookieContainer();
}
else
{
request.CookieContainer = cookieContainer;
}
return Task.Factory.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, request, TaskCreationOptions.None);
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net;
using System.IO;namespace App
{
class Program
{
static void Main(string[] args)
{
CookieContainer cookie=new CookieContainer();
WebResponse res = HttpHelper.CreateGetHttpResponse("http://read.10086.cn/booklist?nodeId=0&fee=0&order=1&bookListType=1&view=2&page=2", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16" , cookie).Result; string content = "";
using (Stream stream = res.GetResponseStream())
{
using (StreamReader sr = new StreamReader(stream))
{
content = sr.ReadToEnd();
}
}
Console.WriteLine(content);
Console.ReadKey();
}
}
}