例如 字符串如下:
<p align="center"><img src="http://xjjxt.com/web/UploadFiles/20108298138512.jpg" border="0" alt="" /></p> 我只要截取
<img src="http://xjjxt.com/web/UploadFiles/20108298138512.jpg" border="0" alt="" />
而我又不知道这一段的正确位置,因为它是时刻改变的! 而且这一段字符串中“20108298138512.jpg” 可以是其他的,长度也是不固定的
<p align="center"><img src="http://xjjxt.com/web/UploadFiles/20108298138512.jpg" border="0" alt="" /></p> 我只要截取
<img src="http://xjjxt.com/web/UploadFiles/20108298138512.jpg" border="0" alt="" />
而我又不知道这一段的正确位置,因为它是时刻改变的! 而且这一段字符串中“20108298138512.jpg” 可以是其他的,长度也是不固定的
{
string s=@"<p align=""center""><img src=""http://xjjxt.com/web/UploadFiles/20108298138512.jpg"" border=""0"" alt="""" /></p> "; foreach(Match m in Regex.Matches(s,@"(?i)<img[^>]*>"))
{
Console.WriteLine(m.Value);
}
/*
<img src="http://xjjxt.com/web/UploadFiles/20108298138512.jpg" border="0" alt="" />
*/
}
MatchCollection mc = Regex.Matches(s, "<img.*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase); foreach (Match m in mc)
{
Response.Write(Server.HtmlEncode(m.Value));
//Response.Write(m.Value);//m.Value
}
MatchCollection mc = reg.Matches(str);
foreach (Match m in mc)
{
TextBox2.Text += m.Groups[1].Value + "\n";
}
获取图片
"<img.*?>",
using System;
using System.Xml;
using System.Text;
using System.Net;
using System.IO;
using System.Collections;
using System.Text.RegularExpressions;public class App
{
public static void Main()
{
string strCode;
ArrayList alLinks;
Console.Write("请输入一个网页地址:");
string strURL = Console.ReadLine();
if(strURL.Substring(0,7) != @"http://")
{
strURL = @"http://" + strURL;
} Console.WriteLine("正在获取页面代码,请稍侯...");
strCode = GetPageSource(strURL); Console.WriteLine("正在提取超链接,请稍侯...");
alLinks = GetHyperLinks(strCode); Console.WriteLine("正在写入文件,请稍侯...");
WriteToXml(strURL,alLinks);
} // 获取指定网页的HTML代码
static string GetPageSource(string URL)
{
Uri uri =new Uri(URL); HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse(); hwReq.Method = "Get"; hwReq.KeepAlive = false; //从输入的网站提取HTML源码
StreamReader reader = new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));
return reader.ReadToEnd();
} // 提取HTML代码中的图片超链接
static ArrayList GetHyperLinks(string htmlCode)
{
//定义一个数组,存放要提取的信息
ArrayList al = new ArrayList(); //下面的正则表达式提取所有超链接
//string strRegex = @"http://([w-]+.)+[w-]+(/[w- ./?%&=]*)?";
//楼主你的要求是只提取图片,用下面的正则就行了
string strRegex=@"<img[^>]*/>";
//忽略大小写
Regex r = new Regex(strRegex,RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(htmlCode); foreach (Match m in mc)
{
al.Add(m.Value);
Console.WriteLine(m.Value);
} al.Sort();
return al;
} // 把网址写入xml文件
static void WriteToXml(string strURL, ArrayList alHyperLinks)
{
StreamWriter writer = new StreamWriter("图像链接列表.txt",true,Encoding.UTF8);
writer.WriteLine("提取自" + strURL + "的图片链接");
writer.WriteLine();
writer.WriteLine();
writer.WriteLine();
foreach(string str in alHyperLinks)
{
writer.WriteLine(str);
}
writer.Flush();
writer.Close();
}}