我要采集一个页面里面的数据
但是那个页面不能直接在IE的地址栏里面打开而要通过一个代理页面,在此代理页面的搜索框里面输入地址才可以进去
可以通过什么方法进行采集呢?

解决方案 »

  1.   

    即下面步骤“打开在线网站:https://www.zxproxy.com/输入http://www.bclc.com/app/DidYouWin/WinningNumbers/Keno.asp点"GO"即可然后我要采集的就是http://www.bclc.com/app/DidYouWin/WinningNumbers/Keno.asp里面的数据
      

  2.   

    HttpWebRequest模拟提交我也知道是要这样,可是网上找了好多,都不行呐。打开的那个网页地址栏还加锁了,我郁闷了,还会过时。有没实例呀,小弟急
      

  3.   

    https://www.zxproxy.com/这个是代理页面
    http://www.bclc.com/app/DidYouWin/WinningNumbers/Keno.asp这个是我要访问采集的页面
      

  4.   

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Net;
    using System.IO;
    using System.IO.Compression;
    using System.Text.RegularExpressions;namespace WikiPageCreater.Common
    {
        public class PageHelper
        {
            /// <summary>
            /// 根据 url 获取网页编码
            /// </summary>
            /// <param name="url"></param>
            /// <returns></returns>
            public static string GetEncoding(string url)
            {
                HttpWebRequest request = null;
                HttpWebResponse response = null;
                StreamReader reader = null;
                try
                {
                    request = (HttpWebRequest)WebRequest.Create(url);
                    request.Timeout = 20000;
                    request.AllowAutoRedirect = false;                response = (HttpWebResponse)request.GetResponse();
                    if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
                    {
                        if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
                            reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
                        else
                            reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);                    string html = reader.ReadToEnd();                    Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
                        if (reg_charset.IsMatch(html))
                        {
                            return reg_charset.Match(html).Groups["charset"].Value;
                        }
                        else if (response.CharacterSet != string.Empty)
                        {
                            return response.CharacterSet;
                        }
                        else
                            return Encoding.Default.BodyName;
                    }
                }
                catch
                {
                }
                finally
                {                if (response != null)
                    {
                        response.Close();
                        response = null;
                    }
                    if (reader != null)
                        reader.Close();                if (request != null)
                        request = null;            }            return Encoding.Default.BodyName;
            }        /// <summary>
            /// 根据 url 和 encoding 获取当前url页面的 html 源代码        
           /// </summary>
            /// <param name="url"></param>
            /// <param name="encoding"></param>
            /// <returns></returns>
            public static string GetHtml(string url, Encoding encoding)
            {
                HttpWebRequest request = null;
                HttpWebResponse response = null;
                StreamReader reader = null;
                try
                {
                    request = (HttpWebRequest)WebRequest.Create(url);
                    request.Timeout = 20000;
                    request.AllowAutoRedirect = false;                response = (HttpWebResponse)request.GetResponse();
                    if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
                    {
                        if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
                            reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);
                        else
                            reader = new StreamReader(response.GetResponseStream(), encoding);
                        string html = reader.ReadToEnd();                    return html;
                    }
                }
                catch
                {
                }
                finally
                {                if (response != null)
                    {
                        response.Close();
                        response = null;
                    }
                    if (reader != null)
                        reader.Close();                if (request != null)
                        request = null;            }            return string.Empty;
            }
        }
    }
      

  5.   

    HttpWebRequest
    System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("");
      request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
      System.Net.WebResponse response = request.GetResponse();
      System.IO.Stream resStream = response.GetResponseStream();
      System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
      string content=sr.ReadToEnd();
      resStream.Close();
      sr.Close();webrequest,WebClient 
    System.Net.WebClient wc = new System.Net.WebClient();
      wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
      Byte[] pageData = wc.DownloadData("");
      string content= System.Text.Encoding.Default.GetString(pageData); 
      

  6.   

    这个是被gtw封了的网站,不一定要用在线代理网站上,这个程序控制起来太麻烦。
    其实只需要找一个国外的代理,然后在程序里设置为用这个代理去下载网站,就可以了。
      

  7.   

    大神.. 你是专业在 CSDN上回答问题的吗?