string str=@"<td height=""25"" valign=""bottom"" colspan=""2"" align=""right""><div align=""center""><strong>24小时热线:18942238677</strong></div></td>
</tr>
<tr>
<td height=""25"" valign=""bottom"" width=""68"" align=""right""><li><a href=""123123.aspx"" title=""11111"">加盟代理</a></li></td>";
Regex reg = new Regex(@"(?<=24小时热线:)\d{11}");
Console.WriteLine(reg.Match(str).Value);
reg = new Regex(@"<a[^>]*?href=(['""\s]?)((?!http://)[^'""\s]+)\1[^>]*?>");
foreach (Match m in reg.Matches(str))
Console.WriteLine(m.Groups[2].Value);
</tr>
<tr>
<td height=""25"" valign=""bottom"" width=""68"" align=""right""><li><a href=""123123.aspx"" title=""11111"">加盟代理</a></li></td>";
Regex reg = new Regex(@"(?<=24小时热线:)\d{11}");
Console.WriteLine(reg.Match(str).Value);
reg = new Regex(@"<a[^>]*?href=(['""\s]?)((?!http://)[^'""\s]+)\1[^>]*?>");
foreach (Match m in reg.Matches(str))
Console.WriteLine(m.Groups[2].Value);
Match mt = Regex.Match(str, @"<strong>24小时热线:(?<PhoneNum>.*)</strong>",RegexOptions.Multiline| RegexOptions.IgnoreCase);
if (mt.Success)
{
textBox1.Text = mt.Groups["PhoneNum"].Value;
}
mt=Regex.Match(str,".*\\s?href=\\s?\"(?<url>.*)\"\\s.*",RegexOptions.Multiline| RegexOptions.IgnoreCase);
if(mt.Success)
{
textBox2.Text=mt.Groups["url"].Value;
}
Match mt = Regex.Match(str, @"<strong>24小时热线:(?<PhoneNum>.*)</strong>",RegexOptions.Multiline| RegexOptions.IgnoreCase);
if (mt.Success)
{
textBox1.Text = mt.Groups["PhoneNum"].Value;
}
mt=Regex.Match(str,".*\\s?href=\\s?\"(?<url>.*)\"\\s.*",RegexOptions.Multiline| RegexOptions.IgnoreCase);
if(mt.Success)
{
textBox2.Text=mt.Groups["url"].Value;
}
foreach(Match m in mc)
{
if(m.Groups["phone"].Success) Console.WriteLine("Phone:"+m.Groups["phone"].Value);
else if(m.Groups["url"].Success) Console.WriteLine("Url:"+m.Groups["url"].Value);
}
/// 从 html 文本中获取指定元素或具有指定属性的元素的 html 内容。
/// </summary>
/// <param name="html">要解析的 html 文本。</param>
/// <param name="tag">要获取内容的元素的标签。</param>
/// <param name="attr">要获取内容的元素的属性。</param>
/// <param name="attrValue">要获取内容的元素的属性的值。</param>
/// <returns></returns>
public static string[] GetInnerHtmls(string html, string tag, string attr, string attrValue) {
List<string> list = new List<string>();
if (string.IsNullOrEmpty(html)) {
return new string[] { };
} StringBuilder pattern = new StringBuilder();
pattern.Append("<");
if (!string.IsNullOrEmpty(tag)) {
pattern.AppendFormat("(?<tag>{0})[^>]*", tag);
}
else {
pattern.Append(@"(?<tag>\w+)[^>]*?");
}
if (!string.IsNullOrEmpty(attr)) {
pattern.Append(attr);
}
if (!string.IsNullOrEmpty(attrValue)) {
pattern.AppendFormat(@"\s*=\s*(""|')?{0}(""|')?\b?", attrValue);
}
pattern.Append(".*?>");
pattern.Append("(?<content>.*?)");
pattern.Append(@"</\k<tag>>"); Regex reg = new Regex(pattern.ToString(), RegexOptions.IgnoreCase | RegexOptions.Singleline);
MatchCollection mc = reg.Matches(html);
if (mc.Count > 0) {
foreach (Match m in mc) {
list.Add(m.Groups["content"].Value);
}
return list.ToArray();
}
return new string[] { };
}
public static string[] GetAttributeValues(string html, string tag, string filterAttr, string filterVal, string attr) {
List<string> list = new List<string>();
if (string.IsNullOrEmpty(html)) {
return new string[] { };
} StringBuilder pattern = new StringBuilder();
pattern.Append("<");
if (!string.IsNullOrEmpty(tag)) {
pattern.AppendFormat("{0}[^>]*", tag);
}
else {
pattern.Append(@"\w+[^>]*?");
}
if (!string.IsNullOrEmpty(filterAttr)) {
pattern.Append(@"\b*");
pattern.Append(filterAttr);
}
if (!string.IsNullOrEmpty(filterVal)) {
pattern.AppendFormat(@"\s*=\s*(""|')?{0}(""|')?", filterVal);
}
//if (!string.IsNullOrEmpty(attr)) {
// pattern.AppendFormat(@"[^>]*\b*{0}\s*=\s*(""|')?(?<value>[^>""']*)(""|')?\b*", attr);
//}
pattern.Append("[^>]*>"); Regex reg = new Regex(pattern.ToString(), RegexOptions.IgnoreCase | RegexOptions.Singleline);
MatchCollection mc = reg.Matches(html);
if (mc.Count > 0 && !string.IsNullOrEmpty(attr)) {
foreach (Match match in mc) {
string tmp = match.Value;
//Match m = Regex.Match(tmp, string.Format(@"[^>]*\b*{0}\s*=\s*(""|')?(?<value>[^>""']*)(""|')?\b*",attr));
Match m = Regex.Match(tmp,string.Format(@"<[^>]*\b*value\s*=\s*(""|')?(?<value>[^>""']*)(""|')?\s*.*?|>",attr), RegexOptions.IgnoreCase | RegexOptions.Singleline);
//Match m = Regex.Match(tmp, string.Format(@"<[^>]*value=(?<xx>[^>]*)\s.*?>", attr));
if (m.Success){
list.Add(m.Groups["value"].Value);
}
//list.Add(m.Groups["value"].Value);
}
return list.ToArray();
}
return new string[] { };
}
{
m.Value的值是个:冒号
}