各位高手门!还望不吝赐教啊!~
就是一个方法参数是URL地址返回的是string返回内容是URL对应的网页静态内容,不需要取图片什么的只需要文字就可以,
所有在<html></html>标记里的字符这个方法该怎么写啊!
谢谢了啊!~
问题解决马上就给分了!~
JAVA里用的是HttpUnit框架
ASP.NET应该怎么写啊
就是一个方法参数是URL地址返回的是string返回内容是URL对应的网页静态内容,不需要取图片什么的只需要文字就可以,
所有在<html></html>标记里的字符这个方法该怎么写啊!
谢谢了啊!~
问题解决马上就给分了!~
JAVA里用的是HttpUnit框架
ASP.NET应该怎么写啊
解决方案 »
- 自定义控件定义了,无法引用~~
- 关于igoogle的实现方法
- 发布网站 出现乱码
- 如何将Gridview 的Commandfield “编辑”“ 删除” 放到所有数据列的右边
- asp.net做的网站上传后错误 急!!!
- 求高人---点选制表控件grid的时候,出现“IE无法开启网际网路网站http://localhost/project/form/aaa.asp.操作已终止”窗口,点确定,for
- javascript可以访问服务器控件吗?
- 求1个sql语句 不难也不容易
- 请教一个flash和windows窗体间传值的问题!给分
- 关于用ASP.NET 打开WORD文档问题
- Microsoft VBScript 运行时错误 错误 '800a01ad'
- 关于超链对搜索引擎收录的问题????
Dim request As System.Net.HttpWebRequest
request = System.Net.WebRequest.Create(url)
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24" Dim response As System.Net.WebResponse
response = request.GetResponse Dim resStream As System.IO.Stream
resStream = response.GetResponseStream Dim sr As System.IO.StreamReader
sr = New System.IO.StreamReader(resStream, System.Text.UTF8Encoding.Default) Dim htmlCode As String = sr.ReadToEnd()
resStream.Close()
sr.Close()
Return htmlCode End Function
http://support.microsoft.com/kb/303436
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;namespace XXXXXXXXXX.XXXXXXXX
{
public class BLL_UrlContent
{
private string _filteredContent = "";
private string _pageContent = "";
private Encoding encoding;
private string _Url = "";
private string _DataCode = ""; public BLL_UrlContent(string Url, string DataCode)
{ _Url = Url;
_DataCode = DataCode; _pageContent = GetPage(_Url);
} public string pageContent
{
get
{
return _pageContent;
}
} public string filteredContent
{
get
{
if (_filteredContent != "")
{
return _filteredContent;
}
else
{
_filteredContent = FilterScript(FilterNewLine(FilterEnter(FilterAllHTML(getBodyContent(pageContent)))));
return _filteredContent;
}
}
} private string GetPage(string m_uri)
{
#region WebResponse response = null;
Stream stream = null;
StreamReader reader = null; try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(m_uri);
request.Timeout = 3000;
response = request.GetResponse();
if (!request.HaveResponse)
{
response.Close();
return null;
}
stream = response.GetResponseStream(); if (!response.ContentType.ToLower().StartsWith("text/"))
{
return null;
} string strEncoding = _DataCode.ToLower();
if (strEncoding == "utf-8")
encoding = Encoding.UTF8;
else if (strEncoding == "utf-7")
encoding = Encoding.UTF7;
else if (strEncoding == "unicode")
encoding = Encoding.Unicode;
else
encoding = Encoding.Default; reader = new StreamReader(stream, encoding);
return reader.ReadToEnd();
//string buffer = "";// = reader.ReadToEnd();//,line;
//string line;
//while ((line = reader.ReadLine()) != null)
//{
// buffer += line + "\r\n";
//}
//return buffer;
}
catch
{
return "";
}
finally
{
if (reader != null) reader.Close();
if (stream != null) stream.Close();
if (response != null) response.Close();
}
#endregion
}
private string FilterAllHTML(string input)
{
string pattern = @"<.*?>";
return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase); }
private static string FilterScript(string input)
{
string pattern = "<script.*?</script>";
return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
private static string FilterLink(string input)
{
string pattern = @"<a.*?</a>";
string result = Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
return result;
}
private string getBodyContent(string input)
{
string pattern = @"<body.*?</body>";
Regex reg = new Regex(pattern, RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
Match mc = reg.Match(input);
string bodyContent = "";
if (mc.Success)
{
bodyContent = mc.Value;
}
return bodyContent; }
private string FilterNewLine(string input)
{
string pattern = "\r";
return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
private string FilterEnter(string input)
{
string pattern = "\n";
return Regex.Replace(input, pattern, "", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
}
}
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
页面内容 = sr.ReadToEnd(); 在页面内容中取<html></html>部分就是文本操作了