嵌入图片的代码多种多样,如下:
<img src="...">
<img src='...'>
<img src=...>
<img ... src="...">
<img ... src='...'>
<img ... src=...>
<img ... src=... ...>
......
......
请问怎么样把src的值提取出来?
估计要用到正则表达式
哪位能不能给写出点源码?
我对正则表达式不是很熟
<img src="...">
<img src='...'>
<img src=...>
<img ... src="...">
<img ... src='...'>
<img ... src=...>
<img ... src=... ...>
......
......
请问怎么样把src的值提取出来?
估计要用到正则表达式
哪位能不能给写出点源码?
我对正则表达式不是很熟
import java.util.regex.*;
public class test {
public static void main(String[] args) { //先用StringTokenizer把它分成如下数组.
String[] test = {"<img src=\"...\" >"
,"<img src='...'>"
,"<a id='a'>"
,"<img src=...>"
,"<img name src=\"...\" >"
,"<img id src='...'>"
,"<img ... src=...>"
,"<img ... src=... ...>"
};
//然后过滤
String urlpStr = "<img.+>";
Pattern p = Pattern.compile(urlpStr);
Matcher m = null;//p.matcher(url);
for(int i=0 ;i<test.length;i++){
m = p.matcher(test[i]); if(m.find()){
System.out.println(m.group());
test[i] = m.group();
}else{
test[i] = "";
}
} String pstrHaed = "<img.+src=['\"]{0,1}";
String pstrEnd = "( |'|\"|>).*";
for(int i=0;i<test.length;i++){
String s=test[i].replaceAll(pstrHaed,"");
s=s.replaceAll(pstrEnd,"");
System.out.println(s);
}
}
}
可以考虑分步实现
先找<img ....>
再找src
import java.net.MalformedURLException;
import java.net.URL;public class LinkProcessor
implements
Serializable
{ private String baseUrl; public LinkProcessor ()
{
baseUrl = null;
}
public String extract (String link, String base)
{
String ret; try
{
if (null == link)
link = "";
else
link = stripQuotes (link);
if (null != getBaseUrl ())
base = getBaseUrl ();
if ((null == base) || ("".equals (link)))
ret = link;
else
{
URL url = constructUrl(link, base);
ret = url.toExternalForm ();
}
}
catch (MalformedURLException murle)
{
ret = link;
} return (Translate.decode (ret));
}
public String stripQuotes (String string)
{
// remove any double quotes from around string
if (string.startsWith ("\"") && string.endsWith ("\"") && (1 < string.length ()))
string = string.substring (1, string.length () - 1); // remove any single quote from around string
if (string.startsWith ("'") && string.endsWith ("'") && (1 < string.length ()))
string = string.substring (1, string.length () - 1); return (string);
}
public URL constructUrl(String link, String base)
throws MalformedURLException {
String path;
boolean modified;
boolean absolute;
int index;
URL url; // constructed URL combining relative link and base
url = new URL (new URL (base), link);
path = url.getFile ();
modified = false;
absolute = link.startsWith ("/");
if (!absolute) { // we prefer to fix incorrect relative links
// this doesn't fix them all, just the ones at the start
while (path.startsWith ("/.")) {
if (path.startsWith ("/../")) {
path = path.substring (3);
modified = true;
}
else if (path.startsWith ("/./") || path.startsWith("/.")) {
path = path.substring (2);
modified = true;
} else break;
}
}
// fix backslashes
while (-1 != (index = path.indexOf ("/\\"))) {
path = path.substring (0, index + 1) + path.substring (index + 2);
modified = true;
}
if (modified)
url = new URL (url, path);
return url;
}
public static String fixSpaces (String url)
{
int index;
int length;
char ch;
StringBuffer returnURL; index = url.indexOf (' ');
if (-1 != index)
{
length = url.length ();
returnURL = new StringBuffer (length * 3);
returnURL.append (url.substring (0, index));
for (int i = index; i < length; i++)
{
ch = url.charAt (i);
if (ch==' ')
returnURL.append ("%20");
else
returnURL.append (ch);
}
url = returnURL.toString ();
} return (url);
}
public static boolean isURL (String resourceLocn) {
boolean ret; try
{
new URL (resourceLocn);
ret = true;
}
catch (MalformedURLException murle)
{
ret = false;
} return (ret);
}
public String getBaseUrl ()
{
return baseUrl;
} /**
* Sets the baseUrl.
* @param baseUrl The baseUrl to set
*/
public void setBaseUrl (String baseUrl)
{
this.baseUrl = baseUrl;
}
public static String removeLastSlash(String baseUrl) {
if(baseUrl.charAt(baseUrl.length()-1)=='/')
{
return baseUrl.substring(0,baseUrl.length()-1);
}
else
{
return baseUrl;
}
}
使用方法:LinkProcess lp = new LinkProcess();
String url = lp.extract("图片地址","当前页面地址");
Matcher m = p.matcher("<img src=\"1.jsp\">");
while(m.find()){
String url = m.group(1).replaceAll("\"|'","");
System.out.println(url);
}