Asp.net写采集正则问题

#region 执行正则提取出值
    /// <summary>
    /// 执行正则提取出值
    /// </summary>
    /// <param name="RegexString">正则表达式</param>
    /// <param name="pageStr">HtmlCode源代码</param>
    /// <returns></returns>
    public string GetRegValue(string RegexString, string pageStr)
    {
        string resString = "";
        Regex reg = new Regex(RegexString, RegexOptions.IgnoreCase);
        MatchCollection matches = reg.Matches(pageStr);
        foreach (Match match in matches)
        {
            resString += match.Groups[1].Value;
        }
        return resString;
    }
    #endregion----------------------------------------------------------------
protected void Button1_Click(object sender, EventArgs e)
    {
        string UrlAddress = "http://www.baidu.com";
        string Urlcode = GetHttpPageCode(UrlAddress, System.Text.Encoding.Default);
        string fd = GetRegValue("<title>.+?</title>", Urlcode);
        Response.Write(fd+"2323");
    }
很奇怪，输出的只有2323,fd上没有值，正确的应该是取出"百度一下，你就知道"

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

string fd = GetRegValue("(?<=<title>)(.*?)(?=</title>)", Urlcode);
protected void Button1_Click(object sender, EventArgs e)
    {
        string htmlStr = _GetHtml("http://www.baidu.com");        Regex re = new Regex("(?<=<title>)[^<]+(?=</title>)", RegexOptions.None);
        MatchCollection mc = re.Matches(htmlStr);
        foreach (Match ma in mc)
        {
            //ma.Value; 这就是你要的值
        }
    }    #region##得到整个网页的源码
    /// <summary>
    /// 得到整个网页的源码
    /// </summary>
    /// <param name="Url"></param>
    /// <returns></returns>
    public static string _GetHtml(string Url)
    {
        Stream MyInStream = null;
        string Html = "";
        try
        {
            HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url);
            HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();            MyInStream = MyResponse.GetResponseStream();            Encoding encode = System.Text.Encoding.Default;
            StreamReader sr = new StreamReader(MyInStream, encode);            Char[] read = new Char[256];
            int count = sr.Read(read, 0, 256);
            while (count > 0)
            {
                String str = new String(read, 0, count);
                Html += str;
                count = sr.Read(read, 0, 256);
            }
        }
        catch (Exception)
        {
            Html = "错误";
        }
        finally
        {
            if (MyInStream != null)
            {
                MyInStream.Close();
            }
        }
        return Html;
    }
    #endregion
Button1的CLick方法可以改一下
  protected void Button1_Click(object sender, EventArgs e)
    {
        string htmlStr = _GetHtml("http://www.baidu.com");        Regex re = new Regex("(?<=<title>)[^<]+(?=</title>)", RegexOptions.None);        Match mt = re.Match(htmlStr);
        string str = mt.Value;

        //str:百度一下，你就知道
    }
string UrlAddress = Url.Text.ToString().Trim();
        string Urlcode = GetHttpPageCode(UrlAddress, System.Text.Encoding.Default);
        string fd = GetRegValue("(?<=<title>)[^<]+(?=</title>)", Urlcode);
        Response.Write(fd+"2323");郁闷，还是不行，还是取不到<title></title>之间的内容
查看了一下，百度首页的源码。发现其<titile>百度一下，你就知道</title>并不是在一行。
所以
Regex reg = new Regex(RegexString, RegexOptions.IgnoreCase);
应改为
Regex reg = new Regex(RegexString, RegexOptions.IgnoreCase|RegexOptions.Multline);
正则不对，
改一下，支持回车就可以了"<title>.+?</title>"
改成
"<title>((?:(?!</title>)[\s\S])*)</title>"而且你之前的正则没有括号，而在你们的方法里，却是取第一个捕获组的内容，因此更不对了，
如果你想fd的内容要包含 title 标签时，则改成以下，
"(<title>(?:(?!</title>)[\s\S])*</title>)"把括号往外包。