while ( (line=sr.ReadLine()) != null ) { MatchCollection match=Regex.Matches(line,pattern,RegexOptions.IgnoreCase); foreach(Match m in match) { listBox2.Text+=m.ToString(); } }
line=sr.ReadLine() 把上面那个语句读出来字符串贴上来看看。你可以试一下,用 string pattern=@"<b class=f16>[^<>]*</b>"; string line = "<b class=f16>-27℃~-17℃</b>";
MatchCollection match=Regex.Matches(line,pattern,RegexOptions.IgnoreCase); foreach(Match m in match) { Console.WriteLine(m.ToString()); }是可以读到字符串的。 肯定是原始字符串和你给出的不一样,所以才没有数据。
'读取一个网页原文件函数 Public Function GetDate(ByVal strUrl As String) As String Dim html As String = "" Dim resp As HttpWebResponse Dim req As HttpWebRequest = CType(HttpWebRequest.Create(strUrl), HttpWebRequest) req.Timeout = 25000 resp = CType(req.GetResponse(), HttpWebResponse) Dim sr As StreamReader = New StreamReader(resp.GetResponseStream(), System.Text.Encoding.Default)'选择默认编码 html = sr.ReadToEnd() Return html End Function Sub Main() Dim s As String = GetDate("http://weather.news.sina.com.cn/cgi-bin/figureWeather/simpleSearch.cgi?city=%B1%B1%BE%A9") Dim re As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex("<b class=f16>(?<tem>[^<]*?)</b>", RegexOptions.IgnoreCase) For Each m As System.Text.RegularExpressions.Match In re.Matches(s) Console.WriteLine(m.Groups("tem").Value()) Next Console.ReadLine() End Sub 返回结果:2℃~-4℃
string regexString = "<b class=f16>([^<>]*)</b>";MatchCollection mc = Regex.Matches(yourString, regexString);
foreach(Match m in mc)
{
string temp = m.Group[1].Value; //-27℃~-17℃
string temp2 = m.Group[0].Value; //"<b class=f16>-27℃~-17℃</b>
}
string line;
while ( (line=sr.ReadLine()) != null )
{
MatchCollection match=Regex.Matches(line,pattern,RegexOptions.IgnoreCase);
foreach(Match m in match)
{
listBox2.Text+=m.ToString();
}
}
把上面那个语句读出来字符串贴上来看看。你可以试一下,用
string pattern=@"<b class=f16>[^<>]*</b>";
string line = "<b class=f16>-27℃~-17℃</b>";
MatchCollection match=Regex.Matches(line,pattern,RegexOptions.IgnoreCase);
foreach(Match m in match)
{
Console.WriteLine(m.ToString());
}是可以读到字符串的。
肯定是原始字符串和你给出的不一样,所以才没有数据。
Templet source: /data1/www/htdocs/weather/figureWeather/htt/weatherHtt.html
Tables: 0
Errors: 0
-->
<html><head>
<title>天气预报_新浪网</title>
<meta http-equiv="Content-type" content="text/html; charset=gb2312">
<meta name="keywords" content="关键字1,关键字2">
<meta name="description" content="此页面的说明">
<style type="text/css">
td,p,li,select,input,textarea {font-size:12px;}.f10 {font-size: 10px;}
.f12 {font-size: 12px;}
.f14 {font-size: 14px;}
.f16 {font-size: 16px;}.l15 {line-height:150%;}A img{border:none;}
</style>
</head>
<body bgcolor=#DAE8FF topmargin=0 marginheight=0 leftmargin=0 marginwidth=0>
<table width=100% border=0 cellpadding=0 cellspacing=0>
<tr>
<td width=50% align=center style="color:#085AB8" class=l15>
<font face="黑体" style="font-size:18px;">今日天气</font>
<br>
<font class=f14><b>长春</b></font>
<br>
<img src="http://image2.sina.com.cn/dy/weather/images/figure/qing_big.gif" width="50" height="50" alt="晴">
<br>
<b>晴</b><br>
<b class=f16>-17℃~-23℃</b> </td>
<td style="color:#083887" class=l15>
2004年12月23日-24日<br>
风力:小于3级<br>
空气质量:较差<br>
紫外线强度:弱<br>
</td>
</tr>
</table>
</body>
</html>
System.Net.WebClient Client = new WebClient();
string t=@"http://weather.news.sina.com.cn/cgi-bin/figureWeather/simpleSearch.cgi?city=长春";
Stream strm = Client.OpenRead(t);
StreamReader sr = new StreamReader(strm);
string pattern=@"<b class=f16>[^<>]*</b>";
string line;
while ( (line=sr.ReadLine()) != null )
{ MatchCollection match=Regex.Matches(line,pattern,RegexOptions.IgnoreCase);
foreach(Match m in match)
{
listBox2.Text+=m.ToString();
}
}
strm.Close();
string pattern=@"<t";这些语句的"<t";都应该被选
<table width=100% border=0 cellpadding=0 cellspacing=0>
<tr>
<td width=50% align=center style="color:#085AB8" class=l15>
但是我只找到一个结果!
System.Net.WebClient Client = new WebClient();
string t=@"http://weather.news.sina.com.cn/cgi-bin/figureWeather/simpleSearch.cgi?city=%B1%B1%BE%A9"; //<<<<--注意这里, 应该是编码方式不一样,所以造成了上传后无法解析汉字,用16进制编码就好了。
Stream strm = Client.OpenRead(t);
StreamReader sr = new StreamReader(strm);
string pattern=@"<b class=f16>[^<>]*</b>";
string line;
if((line=sr.ReadToEnd()) != null ) //<<<---注意这里,一次把网页读完。
{ MatchCollection match=Regex.Matches(line,pattern,RegexOptions.IgnoreCase);
foreach(Match m in match)
{
listBox2.Text+=m.ToString();
}
}
strm.Close();
确切的说是所有的"<t"都被匹配了
这个网页中共有5个"<t",都可以匹配
你给我的编码是“北京”的,呵呵,请问长春的编码是什么啊,怎么弄?我得到的结果是
<b class=f16>-2桫-8</b>
中间的‘~’怎么变成‘桫’了?
所以说编码有问题,
[^<>]*
的含义,我怎么看不懂啊,书上没有这样的符号,难道不全??
尤其是^<>的含义
所以说编码有问题,
mobydick(敌伯威|我排著队拿著爱的号码牌)
能否帮我解决这个编码问题,就是输出为~,而不是‘桫’我将帖子加分到50!答谢!!
Dim html As String = "" Dim resp As HttpWebResponse
Dim req As HttpWebRequest = CType(HttpWebRequest.Create(strUrl), HttpWebRequest)
req.Timeout = 25000
resp = CType(req.GetResponse(), HttpWebResponse)
Dim sr As StreamReader = New StreamReader(resp.GetResponseStream(), System.Text.Encoding.Default)'选择默认编码
html = sr.ReadToEnd() Return html
End Function
Sub Main()
Dim s As String = GetDate("http://weather.news.sina.com.cn/cgi-bin/figureWeather/simpleSearch.cgi?city=%B1%B1%BE%A9") Dim re As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex("<b class=f16>(?<tem>[^<]*?)</b>", RegexOptions.IgnoreCase)
For Each m As System.Text.RegularExpressions.Match In re.Matches(s) Console.WriteLine(m.Groups("tem").Value()) Next
Console.ReadLine() End Sub
返回结果:2℃~-4℃
所以说编码有问题,
mobydick(敌伯威|我排著队拿著爱的号码牌)
能否帮我解决这个编码问题,就是输出为~,而不是‘桫’我将帖子加分到50!答谢!!
====================================楼主说的是真的!嘿嘿1 :)
{
Console.WriteLine( "Max : {0} Min : {1}" , m.Groups[ "MinTemp" ].Value , m.Groups[ "MaxTemp" ].Value );
}