关于.net页面抓取的问题,跪求!!!

本帖最后由 a57397873 于 2010-08-03 10:14:23 编辑

Regex htmlRegex = new Regex(@"<a [target=\"_blank\"][^>]*>(?<Content>[^<]*)</a>");
你试下
/// <summary>
    /// 返回超连接的数组
    /// 3、提取网页的链接，包括href和frame及iframe
    /// </summary>
    /// <param name="userInput"></param>
    /// <param name="WebText"></param>
    /// <returns></returns>
    public string[] Get_url_Array(string userInput, string WebText)
    {
        MatchCollection mc = Regex.Matches(WebText, userInput, RegexOptions.IgnoreCase);
        ArrayList Url_List = new ArrayList();
        foreach (Match m in mc)
        {
            Url_List.Add(m.Value);
        }
        return (string[])Url_List.ToArray(typeof(string));
    }
  string str = "===<a href=\"../p/20100621_4245.htm\" target=\"_blank\" title=\"“梦回1980” 专场演唱会\">“梦回1980” 专场演唱会</a>==";
            //string str = "===<a href=\"../p/20100621_4245.htm\"  title=\"“梦回1980” 专场演唱会\">“梦回1980” 专场演唱会</a>==";            Regex re = new Regex("<a([^>]*(?=target))*(target=\"(?<name>[^\"]*)\"[^>]*)?[^>]*>[^<|^>]*</a>");            GroupCollection gc = re.Matches(str)[0].Groups;            if (gc["name"] != null)
            {
                Console.WriteLine(gc["name"]);
            }            //Console.WriteLine(re.Matches(str).Count);
            //for (int i = 0; i < gc.Count; i++)
            //{
            //    Console.WriteLine(gc[i].Value + "=");
            //}
仓促写的，不要嘲笑