各位高手门!还望不吝赐教啊!~
就是一个方法参数是URL地址返回的是string返回内容是URL对应的网页静态内容,不需要取图片什么的只需要文字就可以,
所有在<html></html>标记里的字符这个方法该怎么写啊!
谢谢了啊!~
问题解决马上就给分了!~
JAVA里用的是HttpUnit框架
ASP.NET应该怎么写啊

解决方案 »

  1.   

    Function getContent() As String
            Dim request As System.Net.HttpWebRequest
            request = System.Net.WebRequest.Create(url)
            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24"        Dim response As System.Net.WebResponse
            response = request.GetResponse        Dim resStream As System.IO.Stream
            resStream = response.GetResponseStream        Dim sr As System.IO.StreamReader
            sr = New System.IO.StreamReader(resStream, System.Text.UTF8Encoding.Default)        Dim htmlCode As String = sr.ReadToEnd()
            resStream.Close()
            sr.Close()
            Return htmlCode    End Function
      

  2.   

    HttpWebRequest
    http://support.microsoft.com/kb/303436
      

  3.   

    下班回家了,丢给你,自已参考,删减using System;
    using System.Text;
    using System.Net;
    using System.IO;
    using System.Text.RegularExpressions;namespace XXXXXXXXXX.XXXXXXXX
    {
        public class BLL_UrlContent
        {
            private string _filteredContent = "";
            private string _pageContent = "";
            private Encoding encoding;
            private string _Url = "";
            private string _DataCode = "";        public BLL_UrlContent(string Url, string DataCode)
            {            _Url = Url;
                _DataCode = DataCode;            _pageContent = GetPage(_Url);
            }        public string pageContent
            {
                get
                {
                    return _pageContent;
                }
            }        public string filteredContent
            {
                get 
                {
                    if (_filteredContent != "")
                    {
                        return _filteredContent;
                    }
                    else
                    {
                        _filteredContent = FilterScript(FilterNewLine(FilterEnter(FilterAllHTML(getBodyContent(pageContent)))));
                        return _filteredContent;  
                    }   
               }
            }        private string GetPage(string m_uri)
            {
                #region            WebResponse response = null;
                Stream stream = null;
                StreamReader reader = null;            try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(m_uri);
                    request.Timeout = 3000;
                    response = request.GetResponse();
                    if (!request.HaveResponse)
                    {
                        response.Close();
                        return null;
                    }
                    stream = response.GetResponseStream();                if (!response.ContentType.ToLower().StartsWith("text/"))
                    {
                        return null;
                    }                string strEncoding = _DataCode.ToLower();
                    if (strEncoding == "utf-8")
                        encoding = Encoding.UTF8;
                    else if (strEncoding == "utf-7")
                        encoding = Encoding.UTF7;
                    else if (strEncoding == "unicode")
                        encoding = Encoding.Unicode;
                    else
                        encoding = Encoding.Default;                reader = new StreamReader(stream, encoding);
                    return reader.ReadToEnd();
                    //string buffer = "";// = reader.ReadToEnd();//,line;
                    //string line;
                    //while ((line = reader.ReadLine()) != null)
                    //{
                    //    buffer += line + "\r\n";
                    //}
                    //return buffer;
                }
                catch
                {
                    return "";
                }
                finally
                {
                    if (reader != null) reader.Close();
                    if (stream != null) stream.Close();
                    if (response != null) response.Close();
                }
                #endregion
            }
            private string FilterAllHTML(string input)
            {
                string pattern = @"<.*?>";
                return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);        }
            private static string FilterScript(string input)
            {
                string pattern = "<script.*?</script>";
                return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
            }
            private static string FilterLink(string input)
            {
                string pattern = @"<a.*?</a>";
                string result = Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
                return result;
            }
            private string getBodyContent(string input)
            {
                string pattern = @"<body.*?</body>";
                Regex reg = new Regex(pattern, RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
                Match mc = reg.Match(input);
                string bodyContent = "";
                if (mc.Success)
                {
                    bodyContent = mc.Value;
                }
                return bodyContent;        }
            private string FilterNewLine(string input)
            {
                string pattern = "\r";
                return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
            }
            private string FilterEnter(string input)
            {
                string pattern = "\n";
                return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
            }
        }
    }
      

  4.   

    WebRequest request = WebRequest.Create(页面url); 
    WebResponse response = request.GetResponse(); 
    Stream resStream = response.GetResponseStream(); 
    StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default); 
    页面内容 = sr.ReadToEnd(); 在页面内容中取<html></html>部分就是文本操作了
      

  5.   

    http://blog.csdn.net/jiang_jiajia10/archive/2008/11/18/3325407.aspx