一个页面有很多个<div class=\"title\">标签构成
<div class=\"title\"><d1><dd class=\"floatl  f14\"><strong><a href=\"/a/sjhcycy\" target=\"_blank\">杨传英</a></strong></dd><dd class=\"floatl padl3\"><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /></dd><dd class=\"floatr \"><a href=\"/a/sjhcycy\" target=\"_blank\">进入网上店铺&gt;&gt;</a></dd><dd class=\"floatr marr13 f14\"><strong>13715290773</strong></dd><dd class=\"floatr\"><img alt=\"手机号码\" src=\"http://img1.soufun.com/secondhouse/image/agent/iconphone.gif\" /></dd><dd class=\"floatr marr13 wid70\"><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\"
 src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /></dd><dd class=\"floatr marr13 wid50\">刚刚来过</dd><dd class=\"floatr marr13\">中原地产</dd></dl></div><div class=\"title\"></div>(备注:内容基本同上一个)
<div class=\"title\"></div>(备注:内容基本同一个)我想获得 杨传英 13715290773 中原地产抓取这些信息,以此类推获取下面<div class=\"title\"></div>名字 号码 公司信息
正则表达式不太熟悉,希望高手赐教

解决方案 »

  1.   

    private static void TestRegex05()
    {
        string html = "<div class=\"title\"><d1><dd class=\"floatl f14\"><strong><a href=\"/a/sjhcycy\" target=\"_blank\">杨传英</a></strong></dd><dd class=\"floatl padl3\"><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /><img src='http://img1.soufun.com/secondhouse/image/magent/star.jpg' height='14' /></dd><dd class=\"floatr \"><a href=\"/a/sjhcycy\" target=\"_blank\">进入网上店铺&gt;&gt;</a></dd><dd class=\"floatr marr13 f14\"><strong>13715290773</strong></dd><dd class=\"floatr\"><img alt=\"手机号码\" src=\"http://img1.soufun.com/secondhouse/image/agent/iconphone.gif\" /></dd><dd class=\"floatr marr13 wid70\"><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\"src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /><img alt=\"上周出勤7天\" src=\"http://img1.soufun.com/secondhouse/image/agent/ico.gif\" /></dd><dd class=\"floatr marr13 wid50\">刚刚来过</dd><dd class=\"floatr marr13\">中原地产</dd></dl></div>";
        MatchCollection mc = Regex.Matches(html, @"<div class=""title"">.+?<a[^>]+>(?<name>[^<]+)(?:[^>]*>){15}(?<tel>\d+)(?:[^>]*>){17}(?<state>[^<]+)");
        foreach (Match m in mc)
        {
            Console.WriteLine(m.Groups["name"].Value);
            Console.WriteLine(m.Groups["tel"].Value);
            Console.WriteLine(m.Groups["state"].Value);
        }
    }