public static ArrayList<String> getAllPicUrl(String htmlName) throws IOException { File f = new File(htmlName); byte[] bs = new byte[(int) f.length()]; InputStream is = new FileInputStream(f); is.read(bs); String str = new String(bs); Pattern p = Pattern.compile(".*?<img\\s* src=[\"|\']?([/0-9a-zA-Z_\\.]+)[\"|\']?[/]?>"); Matcher m = p.matcher(str); ArrayList<String> list = new ArrayList<String>(); while (m.find()) { list.add(m.group(1)); } return list; }
用我原来写的改的,呵呵 import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; public class TestRegex2 { public static void main(String[] args) { ArrayList<String> al; al = getAllPicUrl("D://test.html"); for(String s:al) { System.out.println(s); } }
File f = new File(htmlName);
byte[] bs = new byte[(int) f.length()];
InputStream is = new FileInputStream(f);
is.read(bs);
String str = new String(bs);
Pattern p = Pattern.compile(".*?<img\\s* src=[\"|\']?([/0-9a-zA-Z_\\.]+)[\"|\']?[/]?>");
Matcher m = p.matcher(str);
ArrayList<String> list = new ArrayList<String>();
while (m.find()) {
list.add(m.group(1));
}
return list;
}
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestRegex2 {
public static void main(String[] args) {
ArrayList<String> al;
al = getAllPicUrl("D://test.html");
for(String s:al) {
System.out.println(s);
}
}
public static ArrayList<String> getAllPicUrl(String htmlName){
BufferedReader br = null;
String str = null;
File f = new File(htmlName);
Pattern pNum = Pattern.compile("<img.*/>");
Matcher mNum = null;
ArrayList<String> al = new ArrayList<String>();
try {
br = new BufferedReader(new FileReader(f));
while((str=br.readLine())!=null) {
mNum = pNum.matcher(str);
mNum.reset();
while(mNum.find()) {
al.add(str.substring(mNum.start(), mNum.end()));
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(br!=null)br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return al;
}
}
public static ArrayList<String> getAllPicUrl(String htmlName) throws IOException {
File f = new File(htmlName);
byte[] bs = new byte[(int) f.length()];
InputStream is = new FileInputStream(f);
is.read(bs);
String str = new String(bs);
Pattern p = Pattern.compile(".*?<img\\s* src=[\"|\']?([^\"']+?)[\"|\']?[/]?>");
Matcher m = p.matcher(str);
ArrayList<String> list = new ArrayList<String>();
while (m.find()) {
list.add(m.group(1));
}
return list;
}样例
<table>
<tr>
<td style="line-height:20px">1<br>
2<br style="font-size:24px">
2<br><img src="1/2/3/4/1.gif"/>
2<br><img src="1/2/3/4/2.gif"/>
2<br><img src="1/2/3/4/3.gif"/>
2<br><img src="1/2/3/4/4.gif"/>
2<br><img src="1/2/3/4/5_6.gif"/>
</td><img src=6.gif/>
</tr><img src=中文.gif/>
</table>输出
1/2/3/4/1.gif
1/2/3/4/2.gif
1/2/3/4/3.gif
1/2/3/4/4.gif
1/2/3/4/5_6.gif
6.gif
中文.gif