函数如下。。但是遇到UTF8的就不行了 。求解
private string GetWebPageEx(string url)
{
string contentType = "application/x-www-form-urlencoded";
string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = userAgent;
request.ContentType = contentType;
request.CookieContainer = cookie;
request.Accept = accept;
request.Method = "get"; WebResponse response = request.GetResponse();
Stream responseStream = response.GetResponseStream();
Encoding encoding = null;
for (int i = 0; i < response.Headers.Count; i++)
{
Match m = Regex.Match(response.Headers[i].ToString(), "(?i)(?<=charset=)[^ ]+");
if (!m.Success) continue;
encoding = Encoding.GetEncoding(m.Value);
break;
} if (encoding == null)
{
encoding = Encoding.GetEncoding(936);
//encoding = Encoding.GetEncoding(936);
}
StreamReader sr = new StreamReader(responseStream,encoding);
String html = sr.ReadToEnd(); Regex r = new Regex("content=\"text/html; charset=(?<v>\\S*?)\" />");
if(encoding.BodyName=="gb2312" && r.IsMatch(html))
{
Match m=r.Match(html);
if (m.Groups["v"].ToString().IndexOf("-8") > -1)
{
byte[] xx = encoding.GetBytes(html);
html = Encoding.UTF8.GetString(xx);
//这里转换后还是有乱码,大概在有标点的地方比如逗号 引号的部分 会有乱码 }
}
response.Close(); return html;
}
catch (Exception ex)
{ return "获取失败!"; }
private string GetWebPageEx(string url)
{
string contentType = "application/x-www-form-urlencoded";
string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = userAgent;
request.ContentType = contentType;
request.CookieContainer = cookie;
request.Accept = accept;
request.Method = "get"; WebResponse response = request.GetResponse();
Stream responseStream = response.GetResponseStream();
Encoding encoding = null;
for (int i = 0; i < response.Headers.Count; i++)
{
Match m = Regex.Match(response.Headers[i].ToString(), "(?i)(?<=charset=)[^ ]+");
if (!m.Success) continue;
encoding = Encoding.GetEncoding(m.Value);
break;
} if (encoding == null)
{
encoding = Encoding.GetEncoding(936);
//encoding = Encoding.GetEncoding(936);
}
StreamReader sr = new StreamReader(responseStream,encoding);
String html = sr.ReadToEnd(); Regex r = new Regex("content=\"text/html; charset=(?<v>\\S*?)\" />");
if(encoding.BodyName=="gb2312" && r.IsMatch(html))
{
Match m=r.Match(html);
if (m.Groups["v"].ToString().IndexOf("-8") > -1)
{
byte[] xx = encoding.GetBytes(html);
html = Encoding.UTF8.GetString(xx);
//这里转换后还是有乱码,大概在有标点的地方比如逗号 引号的部分 会有乱码 }
}
response.Close(); return html;
}
catch (Exception ex)
{ return "获取失败!"; }
解决方案 »
- C#与C++的类型问题
- asp页面怎样在运行.cs文件之前进行JS判断
- c#操作word文件
- 请问,page.Session["f"] 与 System.Web.HttpContext.Current.Session["f'] 的作用是一样的吗?
- 请问做测试员有何前途阿?
- owc如何copy Excel文件内容
- DLL 调用
- datagridview绑定list后,怎么更新数据库呢?
- 如何修改DATASET writexml方法生成的XML文件??我是说改第一行的那个encoding
- 读取客户端文件(如文本)数据并写入服务器端数据库,可行吗?应如何做?(在线等待)
- C#逻辑题 望高手解答
- sql 语句操作的问题
ContentEncoding好像返会值里有GB18030这个Encoding不知道能取到不,反正ContentEncoding返回值需要判断,但是charset的绝对可以。