<table width="880" border="0" cellpadding="5" cellspacing="1" bgcolor="#EAEAEA">
<tr>
<th width="86" bgcolor="#EFEFEF">货币名称</th>
<th width="86" bgcolor="#EFEFEF">现汇买入价</th>
<th width="86" bgcolor="#EFEFEF">现钞买入价</th>
<th width="86" bgcolor="#EFEFEF">卖出价</th>
<th width="86" bgcolor="#EFEFEF">基准价</th>
<th width="86" bgcolor="#EFEFEF">中行折算价</th>
<th width="86" bgcolor="#EFEFEF">发布日期</th>
<th width="86" bgcolor="#EFEFEF">发布时间</th>
</tr>
<tr align="center">
<td bgcolor="#FFFFFF">英镑</td>
<td bgcolor="#FFFFFF">1033.91</td>
<td bgcolor="#FFFFFF">1001.99</td>
<td bgcolor="#FFFFFF">1042.22</td>
<td bgcolor="#FFFFFF">1041.77</td>
<td bgcolor="#FFFFFF">1041.77</td>
<td bgcolor="#FFFFFF">2011-08-31</td>
<td bgcolor="#FFFFFF">17:08:55</td>
</tr>
<tr align="center">
<td bgcolor="#FFFFFF">港币</td>
<td bgcolor="#FFFFFF">81.65</td>
<td bgcolor="#FFFFFF">81</td>
<td bgcolor="#FFFFFF">81.96</td>
<td bgcolor="#FFFFFF">81.92</td>
<td bgcolor="#FFFFFF">81.92</td>
<td bgcolor="#FFFFFF">2011-08-31</td>
<td bgcolor="#FFFFFF">17:08:55</td>
</tr>
<tr align="center">
<td bgcolor="#FFFFFF">美元</td>
<td bgcolor="#FFFFFF">636.35</td>
<td bgcolor="#FFFFFF">631.24</td>
<td bgcolor="#FFFFFF">638.9</td>
<td bgcolor="#FFFFFF">638.67</td>
<td bgcolor="#FFFFFF">638.67</td>
<td bgcolor="#FFFFFF">2011-08-31</td>
<td bgcolor="#FFFFFF">17:08:55</td>
</tr>
</table>如上的html 我如何用正则表达式提取出每个<tr>的数据。
<tr>
<th width="86" bgcolor="#EFEFEF">货币名称</th>
<th width="86" bgcolor="#EFEFEF">现汇买入价</th>
<th width="86" bgcolor="#EFEFEF">现钞买入价</th>
<th width="86" bgcolor="#EFEFEF">卖出价</th>
<th width="86" bgcolor="#EFEFEF">基准价</th>
<th width="86" bgcolor="#EFEFEF">中行折算价</th>
<th width="86" bgcolor="#EFEFEF">发布日期</th>
<th width="86" bgcolor="#EFEFEF">发布时间</th>
</tr>
<tr align="center">
<td bgcolor="#FFFFFF">英镑</td>
<td bgcolor="#FFFFFF">1033.91</td>
<td bgcolor="#FFFFFF">1001.99</td>
<td bgcolor="#FFFFFF">1042.22</td>
<td bgcolor="#FFFFFF">1041.77</td>
<td bgcolor="#FFFFFF">1041.77</td>
<td bgcolor="#FFFFFF">2011-08-31</td>
<td bgcolor="#FFFFFF">17:08:55</td>
</tr>
<tr align="center">
<td bgcolor="#FFFFFF">港币</td>
<td bgcolor="#FFFFFF">81.65</td>
<td bgcolor="#FFFFFF">81</td>
<td bgcolor="#FFFFFF">81.96</td>
<td bgcolor="#FFFFFF">81.92</td>
<td bgcolor="#FFFFFF">81.92</td>
<td bgcolor="#FFFFFF">2011-08-31</td>
<td bgcolor="#FFFFFF">17:08:55</td>
</tr>
<tr align="center">
<td bgcolor="#FFFFFF">美元</td>
<td bgcolor="#FFFFFF">636.35</td>
<td bgcolor="#FFFFFF">631.24</td>
<td bgcolor="#FFFFFF">638.9</td>
<td bgcolor="#FFFFFF">638.67</td>
<td bgcolor="#FFFFFF">638.67</td>
<td bgcolor="#FFFFFF">2011-08-31</td>
<td bgcolor="#FFFFFF">17:08:55</td>
</tr>
</table>如上的html 我如何用正则表达式提取出每个<tr>的数据。
string source = @"<table width=""880"" border=""0"" cellpadding=""5"" cellspacing=""1"" bgcolor=""#EAEAEA"">
<tr>
<th width=""86"" bgcolor=""#EFEFEF"">货币名称</th>
<th width=""86"" bgcolor=""#EFEFEF"">现汇买入价</th>
<th width=""86"" bgcolor=""#EFEFEF"">现钞买入价</th>
<th width=""86"" bgcolor=""#EFEFEF"">卖出价</th>
<th width=""86"" bgcolor=""#EFEFEF"">基准价</th>
<th width=""86"" bgcolor=""#EFEFEF"">中行折算价</th>
<th width=""86"" bgcolor=""#EFEFEF"">发布日期</th>
<th width=""86"" bgcolor=""#EFEFEF"">发布时间</th>
</tr>
<tr align=""center"">
<td bgcolor=""#FFFFFF"">英镑</td>
<td bgcolor=""#FFFFFF"">1033.91</td>
<td bgcolor=""#FFFFFF"">1001.99</td>
<td bgcolor=""#FFFFFF"">1042.22</td>
<td bgcolor=""#FFFFFF"">1041.77</td>
<td bgcolor=""#FFFFFF"">1041.77</td>
<td bgcolor=""#FFFFFF"">2011-08-31</td>
<td bgcolor=""#FFFFFF"">17:08:55</td>
</tr>
<tr align=""center"">
<td bgcolor=""#FFFFFF"">港币</td>
<td bgcolor=""#FFFFFF"">81.65</td>
<td bgcolor=""#FFFFFF"">81</td>
<td bgcolor=""#FFFFFF"">81.96</td>
<td bgcolor=""#FFFFFF"">81.92</td>
<td bgcolor=""#FFFFFF"">81.92</td>
<td bgcolor=""#FFFFFF"">2011-08-31</td>
<td bgcolor=""#FFFFFF"">17:08:55</td>
</tr>
<tr align=""center"">
<td bgcolor=""#FFFFFF"">美元</td>
<td bgcolor=""#FFFFFF"">636.35</td>
<td bgcolor=""#FFFFFF"">631.24</td>
<td bgcolor=""#FFFFFF"">638.9</td>
<td bgcolor=""#FFFFFF"">638.67</td>
<td bgcolor=""#FFFFFF"">638.67</td>
<td bgcolor=""#FFFFFF"">2011-08-31</td>
<td bgcolor=""#FFFFFF"">17:08:55</td>
</tr>
</table>";
string patten = @"(?is)<td[^>]+>(?<content>[^<]+)</td>";
Regex reg = new Regex(patten);
MatchCollection collection = reg.Matches(source);
MessageBox.Show(collection.Count.ToString());
if (collection.Count > 0)
{
foreach (Match m in collection)
{
MessageBox.Show(m.Groups["content"].Value); }
}
else
{
MessageBox.Show("No Match");
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
<tr>
<th width=""86"" bgcolor=""#EFEFEF"">货币名称</th>
<th width=""86"" bgcolor=""#EFEFEF"">现汇买入价</th>
<th width=""86"" bgcolor=""#EFEFEF"">现钞买入价</th>
<th width=""86"" bgcolor=""#EFEFEF"">卖出价</th>
<th width=""86"" bgcolor=""#EFEFEF"">基准价</th>
<th width=""86"" bgcolor=""#EFEFEF"">中行折算价</th>
<th width=""86"" bgcolor=""#EFEFEF"">发布日期</th>
<th width=""86"" bgcolor=""#EFEFEF"">发布时间</th>
</tr>
<tr align=""center"">
<td bgcolor=""#FFFFFF"">英镑</td>
<td bgcolor=""#FFFFFF"">1033.91</td>
<td bgcolor=""#FFFFFF"">1001.99</td>
<td bgcolor=""#FFFFFF"">1042.22</td>
<td bgcolor=""#FFFFFF"">1041.77</td>
<td bgcolor=""#FFFFFF"">1041.77</td>
<td bgcolor=""#FFFFFF"">2011-08-31</td>
<td bgcolor=""#FFFFFF"">17:08:55</td>
</tr>
<tr align=""center"">
<td bgcolor=""#FFFFFF"">港币</td>
<td bgcolor=""#FFFFFF"">81.65</td>
<td bgcolor=""#FFFFFF"">81</td>
<td bgcolor=""#FFFFFF"">81.96</td>
<td bgcolor=""#FFFFFF"">81.92</td>
<td bgcolor=""#FFFFFF"">81.92</td>
<td bgcolor=""#FFFFFF"">2011-08-31</td>
<td bgcolor=""#FFFFFF"">17:08:55</td>
</tr>
<tr align=""center"">
<td bgcolor=""#FFFFFF"">美元</td>
<td bgcolor=""#FFFFFF"">636.35</td>
<td bgcolor=""#FFFFFF"">631.24</td>
<td bgcolor=""#FFFFFF"">638.9</td>
<td bgcolor=""#FFFFFF"">638.67</td>
<td bgcolor=""#FFFFFF"">638.67</td>
<td bgcolor=""#FFFFFF"">2011-08-31</td>
<td bgcolor=""#FFFFFF"">17:08:55</td>
</tr>
</table>";
Regex reg = new Regex(@"(?is)(?<=<tr[^>]*?>)\s*?<td[^>]*?>.*?</td>(?:\s*?<td[^>]*?>(.*?)</td>)*\s*(?=</tr>)");
foreach (Match m in reg.Matches(str))
{
foreach (Capture c in m.Groups[1].Captures)
Console.WriteLine(c.Value);
Console.WriteLine();
}
http://www.boc.cn/sourcedb/whpj/index.html
这个网址上的各个货币的报价信息。
上面两位运行我给的html是正确的,但是如果套用全部的代码,
ustbwuyi 前面多了几个nbsp;huangwenquan123 会匹配的多出一些东西,在帮我改改。谢谢。
Regex reg = new Regex(@"(?is)(?<=<tr[^>]*?>)\s*?<td[^>]*?>.*?</td>(?:\s*?<td\s*?bgcolor=""#FFFFFF""\s*>(.*?)</td>)*\s*(?=</tr>)");
foreach (Match m in reg.Matches(str))
{
foreach (Capture c in m.Groups[1].Captures)
Console.WriteLine(c.Value);
}
我也用控制台实验了一下,确实和你的一样,但是货币名称怎么没有了?
protected void Page_Load(object sender, EventArgs e)
{
string strTemp = Http.DownLoadData(@"http://www.boc.cn/sourcedb/whpj/index.html", Encoding.GetEncoding("UTF-8"));
//string strReg = @"(?is)<td[^>]+>(?<content>[^<]+)</td>";
string strReg = @"(?is)(?<=<tr[^>]*?>)\s*?<td[^>]*?>.*?</td>(?:\s*?<td\s*?bgcolor=""#FFFFFF""\s*>(.*?)</td>)*\s*(?=</tr>)";
Regex re = new Regex(strReg, RegexOptions.IgnoreCase | RegexOptions.Singleline);
foreach (Match m in re.Matches(strTemp))
{
Response.Write(m.Groups[1].Value);
Response.Write("<br>");
} Response.End();
} public static string DownLoadData(string strURL, Encoding oDownLoadDataEncoding)
{
string _EncodingName = oDownLoadDataEncoding.CodePage.ToString(); return DownLoadData(strURL, _EncodingName);
} public static string DownLoadData(string strURL, string DownLoadDataEncodingName)
{
byte[] _pageData = null;
string _pageHtml = ""; System.Net.WebClient _WebClient = new System.Net.WebClient();
_pageData = _WebClient.DownloadData(@strURL);
_WebClient.Dispose();
_pageHtml = Encoding.GetEncoding(int.Parse(DownLoadDataEncodingName)).GetString(_pageData);
return _pageHtml;
}
{
string strTemp = Http.DownLoadData(@"http://www.boc.cn/sourcedb/whpj/index.html", Encoding.GetEncoding("UTF-8"));
//string strReg = @"(?is)<td[^>]+>(?<content>[^<]+)</td>";
string strReg = @"(?is)(?<=<tr[^>]*?>)\s*?<td[^>]*?>.*?</td>(?:\s*?<td\s*?bgcolor=""#FFFFFF""\s*>(.*?)</td>)*\s*(?=</tr>)";
Regex re = new Regex(strReg, RegexOptions.IgnoreCase | RegexOptions.Singleline);
foreach (Match m in re.Matches(strTemp))
{
Response.Write(m.Groups[1].Value);
Response.Write("<br>");
} Response.End();
} public static string DownLoadData(string strURL, Encoding oDownLoadDataEncoding)
{
string _EncodingName = oDownLoadDataEncoding.CodePage.ToString(); return DownLoadData(strURL, _EncodingName);
} public static string DownLoadData(string strURL, string DownLoadDataEncodingName)
{
byte[] _pageData = null;
string _pageHtml = ""; System.Net.WebClient _WebClient = new System.Net.WebClient();
_pageData = _WebClient.DownloadData(@strURL);
_WebClient.Dispose();
_pageHtml = Encoding.GetEncoding(int.Parse(DownLoadDataEncodingName)).GetString(_pageData);
return _pageHtml;
}
现在弄上了
string str = File.ReadAllText(@"E:\1.txt", Encoding.GetEncoding("gb2312"));//换成你的字符串
Regex reg = new Regex(@"(?is)(?<=<tr[^>]*?>)(?:\s*?<td\s*?bgcolor=""#FFFFFF""\s*>(.*?)</td>)*\s*(?=</tr>)");
foreach (Match m in reg.Matches(str))
{
foreach (Capture c in m.Groups[1].Captures)
Console.WriteLine(c.Value);
}