asp.net使用正则表达式获取网页内容 其实是老问题,网上也有些例子,但是都看不太懂,希望大家能讲解下,让我理解原理,随便给出答案更好O(∩_∩)O~网页是http://www.weather.com.cn/weather/101200101.shtml要使用正则表达式获取比如地名,天气情况,风向,温度等信息,正则表达式如何写,往大家赐教。 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 楼上给正则 我给个抓取的using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Net;using System.IO;using System.IO.Compression;using System.Text.RegularExpressions;namespace WikiPageCreater.Common{ public class PageHelper { /// <summary> /// 根据 url 获取网页编码 /// </summary> /// <param name="url"></param> /// <returns></returns> public static string GetEncoding(string url) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)); else reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)"); if (reg_charset.IsMatch(html)) { return reg_charset.Match(html).Groups["charset"].Value; } else if (response.CharacterSet != string.Empty) { return response.CharacterSet; } else return Encoding.Default.BodyName; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return Encoding.Default.BodyName; } /// <summary> /// 根据 url 和 encoding 获取当前url页面的 html 源代码 /// </summary> /// <param name="url"></param> /// <param name="encoding"></param> /// <returns></returns> public static string GetHtml(string url, Encoding encoding) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding); else reader = new StreamReader(response.GetResponseStream(), encoding); string html = reader.ReadToEnd(); return html; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return string.Empty; } }} 没必要去抓取这些信息太麻烦这些东西 通过开放的web services就能得到天气预报Web服务,数据来源于中国气象局Endpoint :http://www.webxml.com.cn/WebServices/WeatherWebService.asmxDisco :http://www.webxml.com.cn/WebServices/WeatherWebService.asmx?discoWSDL :http://www.webxml.com.cn/WebServices/WeatherWebService.asmx?wsdlhttp://blog.csdn.net/fengyarongaa/article/details/6541109http://www.cnblogs.com/wlb/archive/2009/04/18/1438019.html 就在这网站上搞个iframe的显示。反正你是一天一天的。。http://service.weather.com.cn/plugin/index.shtml 不要什么都用正则表达式,对于网页采集,可以先用正则对html进行预处理(去除script、style等标记),然后用xml处理,才是上策。 用xml,没听说过,您能解释下么? 如何给两个页面传递参数 datalist嵌套,出现隔行显示怎么办?? 谁搞过这样SQL数据库复制 一个简单的sql问题 怎样取treeview中值 怎么控制文本框输入的值符合以下标准 我想把连接数据库单独写在一个类文件中,这样可以方便其他文件调用,可我调试时总是出错,哪位能不能够提供一份源码让我参考一下?谢谢! 急!!在线等待!!解决马上给分!!! 一个简单的问题!送分!!!!!(在线等待!!!) 关于datagrid动态产生列的问题(火急!) 请教 怎么判断是否POST 提交? CKFinder.dll 如何重新生成
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.IO.Compression;
using System.Text.RegularExpressions;namespace WikiPageCreater.Common
{
public class PageHelper
{
/// <summary>
/// 根据 url 获取网页编码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string GetEncoding(string url)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
else
reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
if (reg_charset.IsMatch(html))
{
return reg_charset.Match(html).Groups["charset"].Value;
}
else if (response.CharacterSet != string.Empty)
{
return response.CharacterSet;
}
else
return Encoding.Default.BodyName;
}
}
catch
{
}
finally
{ if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close(); if (request != null)
request = null; } return Encoding.Default.BodyName;
} /// <summary>
/// 根据 url 和 encoding 获取当前url页面的 html 源代码
/// </summary>
/// <param name="url"></param>
/// <param name="encoding"></param>
/// <returns></returns>
public static string GetHtml(string url, Encoding encoding)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);
else
reader = new StreamReader(response.GetResponseStream(), encoding);
string html = reader.ReadToEnd(); return html;
}
}
catch
{
}
finally
{ if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close(); if (request != null)
request = null; } return string.Empty;
}
}
}
太麻烦
这些东西 通过开放的web services就能得到
天气预报Web服务,数据来源于中国气象局Endpoint :http://www.webxml.com.cn/WebServices/WeatherWebService.asmxDisco :http://www.webxml.com.cn/WebServices/WeatherWebService.asmx?discoWSDL :http://www.webxml.com.cn/WebServices/WeatherWebService.asmx?wsdl
http://blog.csdn.net/fengyarongaa/article/details/6541109
http://www.cnblogs.com/wlb/archive/2009/04/18/1438019.html
就在这网站上搞个iframe的显示。反正你是一天一天的。。http://service.weather.com.cn/plugin/index.shtml
用xml,没听说过,您能解释下么?