想得到这个网站的列表,使用下面的方法获取不到,研究了半天,没有结果,特向广大网友求助。是不是用HttpWebRequest的方式这种页面始终就获取不到完整html脚本?(不用类似InternetExplorer ie = GetInternetExplorer();的方法)
http://www.circ.gov.cn/web/site0/tab454/module443/page1.htmurl=http://www.circ.gov.cn/web/site0/tab454/module443/page1.htm;private static CookieContainer cookie = new CookieContainer();
private static string contentType = "application/x-www-form-urlencoded";
private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = userAgent;
request.ContentType = contentType;
request.CookieContainer = cookie;
request.Accept = accept;
request.Method = "get";
request.Timeout = 20000; WebResponse response = request.GetResponse();
Stream responseStream = response.GetResponseStream();
StreamReader reader = new StreamReader(responseStream, encoding);
html = reader.ReadToEnd();
response.Close();
http://www.circ.gov.cn/web/site0/tab454/module443/page1.htmurl=http://www.circ.gov.cn/web/site0/tab454/module443/page1.htm;private static CookieContainer cookie = new CookieContainer();
private static string contentType = "application/x-www-form-urlencoded";
private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = userAgent;
request.ContentType = contentType;
request.CookieContainer = cookie;
request.Accept = accept;
request.Method = "get";
request.Timeout = 20000; WebResponse response = request.GetResponse();
Stream responseStream = response.GetResponseStream();
StreamReader reader = new StreamReader(responseStream, encoding);
html = reader.ReadToEnd();
response.Close();
解决方案 »
- 为什么内容保存到数据库后过段时间变成英文的单引号
- 怎样从一个table里select出一些行然后插入另一个table,这个table的表是一模一样的 ?
- 请问:如何通过数据库中的datetime字段判断是上午还是下午
- 一个关于socket通过代理的问题高手进,现金揭帖!
- 初学ASP。NET,问一简单问题
- 单击列标头对DataGrid1进行重新排序后,问题就来了???
- 有使用过IPropertyBag的吗?
- 请教:关于对象的远程化操作
- 第一次用SQL SERVER来编程,请大家帮我解决几个问题
- 装好了.net framework sdk以后,编好了一个.cs的文件以后,怎样对这个文件进行编译
- 中括号问题public user[] selectall()
- vs 2005 dataSet 无法导入Execl表格
byte[] array = wc.DownloadData ("http://www.circ.gov.cn/web/site0/tab454/module443/page1.htm");
string str = Encoding.UTF8.GetString(array);
Console.WriteLine(str);
{
System.Net.WebClient client = new System.Net.WebClient();
Stream data = client.OpenRead(url);//需要提取的动态页
StreamReader reader = new StreamReader(data, System.Text.Encoding.GetEncoding("utf-8"));//指定编码
string tmpstr = reader.ReadToEnd();
return tmpstr;
}
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
string html = sr.ReadToEnd();
resStream.Close();
sr.Close();System.Net.WebClient wc = new System.Net.WebClient();
Byte[] pageData = wc.DownloadData("");
string s= System.Text.Encoding.Default.GetString(pageData);
Byte[] pageData = wc.DownloadData("");System.Net.WebClient client = new System.Net.WebClient();
Stream data = client.OpenRead(url);//需要提取的动态页都尝试过了,获取的结果都是一样的,都不全。跟IE右键查看源文件是一样的,没有包括列表的内容。在Firefox里右键查看源文件是可以看到列表内容的,下面是当前获取到的不全的html代码的结尾部分...........href="/web/site0/tab129/" class="firstNormal"><span>时政经济</span></A></td>
<td width="14"></td>
<td><img src="/portals/0/images2008/line1.gif" width="1" height="9"></td>
</tr>
</table></td></tr></table></DIV><!--Two Level Menu End Point--><!-- End_Module_9272 -->
</div></TD>
</TR></TBODY></TABLE>