本人想用C#做一个信息抓取系统,想在新浪网上抓取新闻。前提是我已经在新浪网上已登陆的。请问应该怎么实现? 请教各位帅哥们: 本人想用C#做一个信息抓取系统,想在新浪网上抓取新闻。前提是我已经在新浪网上已登陆的。请问应该怎么实现? 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 本来想回答的看见0.00%就算了!!bangding 自己模拟请求吧,大概也就是个Cookie不好处理,搞定就ok了。 public static byte[] GetHtmlByBytes(string server, string URL, byte[] byteRequest, string cookie, out string header) { long contentLength; HttpWebRequest httpWebRequest; HttpWebResponse webResponse; httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL); CookieContainer co = new CookieContainer(); co.SetCookies(new Uri(server), cookie); httpWebRequest.CookieContainer = co; httpWebRequest.ContentType = "application/x-www-form-urlencoded"; httpWebRequest.Accept = "application/x-shockwave-flash, image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"; httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)"; httpWebRequest.Headers.Add("Accept-Language", "zh-cn"); //是否支持重新定向 //httpWebRequest.AllowAutoRedirect = false; httpWebRequest.Method = "GET"; httpWebRequest.Timeout = 15000; httpWebRequest.ContentLength = byteRequest.Length; webResponse = (HttpWebResponse)httpWebRequest.GetResponse(); //header = webResponse.Headers.ToString(); header = webResponse.Headers.Get("Set-Cookie"); if (string.IsNullOrEmpty(header)) { header = cookie; } else { header = cookie + "," + header; } //getStream = webResponse.GetResponseStream(); Stream gzips = webResponse.GetResponseStream(); contentLength = webResponse.ContentLength; byte[] outBytes = new byte[0]; outBytes = ReadFully(gzips); gzips.Close(); return outBytes; }public static byte[] ReadFully(Stream stream) { byte[] buffer = new byte[128]; using (MemoryStream ms = new MemoryStream()) { while (true) { int read = stream.Read(buffer, 0, buffer.Length); if (read <= 0) return ms.ToArray(); ms.Write(buffer, 0, read); } } } C# 调用另一个类中的数组 C#连接数据库的问题 高手来看看,求助。我想实现截图功能。这样写不对。。请指点一下 关于sqlite时间为空的读取问题。 求助 c#简单问题 求教:关于SAVEFILEDIALOG的问题 数据同步更新 发飙了!谁知道VS2005的C#编译器在哪里可以下载?或者在已安装VS2005的系统目录中可以找到? WINCE下,C#问题 怎么向XML中追加数据 各位高手 ,在c#中在怎么 调出系统的那个计算器 Excel 如何将修改的内容保存到原文件中
{
long contentLength;
HttpWebRequest httpWebRequest;
HttpWebResponse webResponse;
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
CookieContainer co = new CookieContainer();
co.SetCookies(new Uri(server), cookie);
httpWebRequest.CookieContainer = co; httpWebRequest.ContentType = "application/x-www-form-urlencoded";
httpWebRequest.Accept = "application/x-shockwave-flash, image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"; httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)";
httpWebRequest.Headers.Add("Accept-Language", "zh-cn"); //是否支持重新定向
//httpWebRequest.AllowAutoRedirect = false; httpWebRequest.Method = "GET";
httpWebRequest.Timeout = 15000;
httpWebRequest.ContentLength = byteRequest.Length;
webResponse = (HttpWebResponse)httpWebRequest.GetResponse();
//header = webResponse.Headers.ToString();
header = webResponse.Headers.Get("Set-Cookie");
if (string.IsNullOrEmpty(header))
{
header = cookie;
}
else
{
header = cookie + "," + header;
}
//getStream = webResponse.GetResponseStream();
Stream gzips = webResponse.GetResponseStream(); contentLength = webResponse.ContentLength;
byte[] outBytes = new byte[0];
outBytes = ReadFully(gzips);
gzips.Close();
return outBytes;
}
public static byte[] ReadFully(Stream stream)
{
byte[] buffer = new byte[128];
using (MemoryStream ms = new MemoryStream())
{
while (true)
{
int read = stream.Read(buffer, 0, buffer.Length);
if (read <= 0)
return ms.ToArray();
ms.Write(buffer, 0, read);
}
}
}