一个类:class1using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Web;
using System.Text;
using System;namespace WindowsFormsApplication3
{
public class GetHtml
{ public static string getHtml(string url, string charSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码
{
try
{
WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
// 需要注意的:
//有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
//这是就要具体问题具体分析比如在头部加入cookie
// webclient.Headers.Add("Cookie", cookie);
//这样可能需要一些重载方法。根据需要写就可以了 //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
myWebClient.Credentials = CredentialCache.DefaultCredentials;
//如果服务器要验证用户名,密码
//NetworkCredential mycred = new NetworkCredential(struser, strpassword);
//myWebClient.Credentials = mycred;
//从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
byte[] myDataBuffer = myWebClient.DownloadData(url);
string strWebData = Encoding.Default.GetString(myDataBuffer); //获取网页字符编码描述信息
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
string webCharSet = charSetMatch.Groups[2].Value;
if (charSet == null || charSet == "")
charSet = webCharSet; if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
return strWebData;
}
catch(Exception e){return "";}
}
}
}
主窗体代码:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;namespace WindowsFormsApplication3
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
} private void button1_Click(object sender, EventArgs e)
{
GetHtml a=new GetHtml();
this.textBox1.Text =GetHtml.getHtml("http://www.baidu.com/", "UTF8");
}
}
}为什么运行无动静啊???我该怎么修改
using System.IO;
using System.Text.RegularExpressions;
using System.Web;
using System.Text;
using System;namespace WindowsFormsApplication3
{
public class GetHtml
{ public static string getHtml(string url, string charSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码
{
try
{
WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
// 需要注意的:
//有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
//这是就要具体问题具体分析比如在头部加入cookie
// webclient.Headers.Add("Cookie", cookie);
//这样可能需要一些重载方法。根据需要写就可以了 //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
myWebClient.Credentials = CredentialCache.DefaultCredentials;
//如果服务器要验证用户名,密码
//NetworkCredential mycred = new NetworkCredential(struser, strpassword);
//myWebClient.Credentials = mycred;
//从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
byte[] myDataBuffer = myWebClient.DownloadData(url);
string strWebData = Encoding.Default.GetString(myDataBuffer); //获取网页字符编码描述信息
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
string webCharSet = charSetMatch.Groups[2].Value;
if (charSet == null || charSet == "")
charSet = webCharSet; if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
return strWebData;
}
catch(Exception e){return "";}
}
}
}
主窗体代码:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;namespace WindowsFormsApplication3
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
} private void button1_Click(object sender, EventArgs e)
{
GetHtml a=new GetHtml();
this.textBox1.Text =GetHtml.getHtml("http://www.baidu.com/", "UTF8");
}
}
}为什么运行无动静啊???我该怎么修改
http://topic.csdn.net/u/20110219/22/4191d9b6-6560-40d5-a29f-280b434acd7a.html
=>
catch(Exception e){return e.Message;}
看看输出什么?
函数中,按f9设置断点,单步跟踪
查看变量的值,判断问题即可看到具体问题,发上来,讨论
个人意见,高手多多指教
检查:会不会是因为编码不对,转成的string有乱码,然后造成后面的正则失败?