怎样从网页源代码中提取图片的路径？

嵌入图片的代码多种多样，如下：
<img src="...">
<img src='...'>
<img src=...>
<img ... src="...">
<img ... src='...'>
<img ... src=...>
<img ... src=... ...>
......
......
请问怎么样把src的值提取出来？
估计要用到正则表达式
哪位能不能给写出点源码？
我对正则表达式不是很熟

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

你看这样行不？？
import java.util.regex.*;
public class test {
  public static void main(String[] args) { //先用StringTokenizer把它分成如下数组.
String[] test = {"<img src=\"...\" >"
,"<img src='...'>"
,"<a id='a'>"
,"<img src=...>"
,"<img name src=\"...\" >"
,"<img id src='...'>"
,"<img ... src=...>"
,"<img ... src=... ...>"
};
//然后过滤
  String urlpStr = "<img.+>";
  Pattern p = Pattern.compile(urlpStr);
      Matcher m = null;//p.matcher(url);
  for(int i=0 ;i<test.length;i++){
m = p.matcher(test[i]);   if(m.find()){
  System.out.println(m.group());
  test[i] = m.group();
  }else{
  test[i] = "";
  }
  } String pstrHaed = "<img.+src=['\"]{0,1}";
String pstrEnd = "( |'|\"|>).*";
for(int i=0;i<test.length;i++){
String s=test[i].replaceAll(pstrHaed,"");
s=s.replaceAll(pstrEnd,"");
System.out.println(s);
}
  }
}
这种正则表达式不好写
可以考虑分步实现
先找<img ....>
再找src
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;public class LinkProcessor
    implements
        Serializable
{    private String baseUrl;    public LinkProcessor ()
    {
        baseUrl = null;
    }
    public String extract (String link, String base)
    {
        String ret;        try
        {
            if (null == link)
                link = "";
            else
                link = stripQuotes (link);
            if (null != getBaseUrl ())
                base = getBaseUrl ();
            if ((null == base) || ("".equals (link)))
                ret = link;
            else
            {
                URL url = constructUrl(link, base);
                ret = url.toExternalForm ();
            }
        }
        catch (MalformedURLException murle)
        {
            ret = link;
        }        return (Translate.decode (ret));
    }
    public String stripQuotes (String string)
    {
        // remove any double quotes from around string
        if (string.startsWith ("\"") && string.endsWith ("\"") && (1 < string.length ()))
            string = string.substring (1, string.length () - 1);        // remove any single quote from around string
        if (string.startsWith ("'") && string.endsWith ("'") && (1 < string.length ()))
            string = string.substring (1, string.length () - 1);        return (string);
    }
    public URL constructUrl(String link, String base)
        throws MalformedURLException {
        String path;
        boolean modified;
        boolean absolute;
        int index;
        URL url; // constructed URL combining relative link and base
        url = new URL (new URL (base), link);
        path = url.getFile ();
        modified = false;
        absolute = link.startsWith ("/");
        if (!absolute) {   // we prefer to fix incorrect relative links
            // this doesn't fix them all, just the ones at the start
            while (path.startsWith ("/.")) {
                if (path.startsWith ("/../")) {
                    path = path.substring (3);
                    modified = true;
                }
                else if (path.startsWith ("/./") || path.startsWith("/.")) {
                    path = path.substring (2);
                    modified = true;
                } else break;
            }
        }
        // fix backslashes
        while (-1 != (index = path.indexOf ("/\\"))) {
            path = path.substring (0, index + 1) + path.substring (index + 2);
            modified = true;
        }
        if (modified)
            url = new URL (url, path);
        return url;
    }
    public static String fixSpaces (String url)
    {
        int index;
        int length;
        char ch;
        StringBuffer returnURL;        index = url.indexOf (' ');
        if (-1 != index)
        {
            length = url.length ();
            returnURL = new StringBuffer (length * 3);
            returnURL.append (url.substring (0, index));
            for (int i = index; i < length; i++)
            {
                ch = url.charAt (i);
                if (ch==' ')
                    returnURL.append ("%20");
                else
                    returnURL.append (ch);
            }
            url = returnURL.toString ();
        }        return (url);
    }
    public static boolean isURL (String resourceLocn) {
        boolean ret;        try
        {
            new URL (resourceLocn);
            ret = true;
        }
        catch (MalformedURLException murle)
        {
            ret = false;
        }        return (ret);
    }
    public String getBaseUrl ()
    {
        return baseUrl;
    }    /**
     * Sets the baseUrl.
     * @param baseUrl The baseUrl to set
     */
    public void setBaseUrl (String baseUrl)
    {
        this.baseUrl = baseUrl;
    }
    public static String removeLastSlash(String baseUrl) {
      if(baseUrl.charAt(baseUrl.length()-1)=='/')
      {
         return baseUrl.substring(0,baseUrl.length()-1);
      }
      else
      {
         return baseUrl;
      }
    }
使用方法:LinkProcess lp = new LinkProcess();
String url = lp.extract("图片地址","当前页面地址");
Pattern p = Pattern.compile("<img.*?src.*?=.*?(.*?)>");
Matcher m = p.matcher("<img src=\"1.jsp\">");
while(m.find()){
  String url = m.group(1).replaceAll("\"|'","");
  System.out.println(url);
}