#region 执行正则提取出值
/// <summary>
/// 执行正则提取出值
/// </summary>
/// <param name="RegexString">正则表达式</param>
/// <param name="pageStr">HtmlCode源代码</param>
/// <returns></returns>
public string GetRegValue(string RegexString, string pageStr)
{
string resString = "";
Regex reg = new Regex(RegexString, RegexOptions.IgnoreCase);
MatchCollection matches = reg.Matches(pageStr);
foreach (Match match in matches)
{
resString += match.Groups[1].Value;
}
return resString;
}
#endregion----------------------------------------------------------------
protected void Button1_Click(object sender, EventArgs e)
{
string UrlAddress = "http://www.baidu.com";
string Urlcode = GetHttpPageCode(UrlAddress, System.Text.Encoding.Default);
string fd = GetRegValue("<title>.+?</title>", Urlcode);
Response.Write(fd+"2323");
}
很奇怪,输出的只有2323,fd上没有值,正确的应该是取出"百度一下,你就知道"
/// <summary>
/// 执行正则提取出值
/// </summary>
/// <param name="RegexString">正则表达式</param>
/// <param name="pageStr">HtmlCode源代码</param>
/// <returns></returns>
public string GetRegValue(string RegexString, string pageStr)
{
string resString = "";
Regex reg = new Regex(RegexString, RegexOptions.IgnoreCase);
MatchCollection matches = reg.Matches(pageStr);
foreach (Match match in matches)
{
resString += match.Groups[1].Value;
}
return resString;
}
#endregion----------------------------------------------------------------
protected void Button1_Click(object sender, EventArgs e)
{
string UrlAddress = "http://www.baidu.com";
string Urlcode = GetHttpPageCode(UrlAddress, System.Text.Encoding.Default);
string fd = GetRegValue("<title>.+?</title>", Urlcode);
Response.Write(fd+"2323");
}
很奇怪,输出的只有2323,fd上没有值,正确的应该是取出"百度一下,你就知道"
protected void Button1_Click(object sender, EventArgs e)
{
string htmlStr = _GetHtml("http://www.baidu.com"); Regex re = new Regex("(?<=<title>)[^<]+(?=</title>)", RegexOptions.None);
MatchCollection mc = re.Matches(htmlStr);
foreach (Match ma in mc)
{
//ma.Value; 这就是你要的值
}
} #region##得到整个网页的源码
/// <summary>
/// 得到整个网页的源码
/// </summary>
/// <param name="Url"></param>
/// <returns></returns>
public static string _GetHtml(string Url)
{
Stream MyInStream = null;
string Html = "";
try
{
HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url);
HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse(); MyInStream = MyResponse.GetResponseStream(); Encoding encode = System.Text.Encoding.Default;
StreamReader sr = new StreamReader(MyInStream, encode); Char[] read = new Char[256];
int count = sr.Read(read, 0, 256);
while (count > 0)
{
String str = new String(read, 0, count);
Html += str;
count = sr.Read(read, 0, 256);
}
}
catch (Exception)
{
Html = "错误";
}
finally
{
if (MyInStream != null)
{
MyInStream.Close();
}
}
return Html;
}
#endregion
Button1的CLick方法可以改一下
protected void Button1_Click(object sender, EventArgs e)
{
string htmlStr = _GetHtml("http://www.baidu.com"); Regex re = new Regex("(?<=<title>)[^<]+(?=</title>)", RegexOptions.None); Match mt = re.Match(htmlStr);
string str = mt.Value;
//str:百度一下,你就知道
}
string Urlcode = GetHttpPageCode(UrlAddress, System.Text.Encoding.Default);
string fd = GetRegValue("(?<=<title>)[^<]+(?=</title>)", Urlcode);
Response.Write(fd+"2323");郁闷,还是不行,还是取不到<title></title>之间的内容
所以
Regex reg = new Regex(RegexString, RegexOptions.IgnoreCase);
应改为
Regex reg = new Regex(RegexString, RegexOptions.IgnoreCase|RegexOptions.Multline);
改一下,支持回车就可以了"<title>.+?</title>"
改成
"<title>((?:(?!</title>)[\s\S])*)</title>"而且你之前的正则没有括号,而在你们的方法里,却是取第一个捕获组的内容,因此更不对了,
如果你想fd的内容要包含 title 标签时,则改成以下,
"(<title>(?:(?!</title>)[\s\S])*</title>)"把括号往外包。