查找relative path有问题,但是找不出正则表达式哪里有问题;高手帮忙import java.io.File;
import java.io.FileInputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class LookupSrc { public static void main(String[] args) throws Exception {
String type = "gif"; FileInputStream input = new FileInputStream(new File("c:/a.txt"));
byte[] bytes = new byte[input.available()];
input.read(bytes);
String html = new String(bytes); System.out.println("relative path:");
Matcher matcher = Pattern.compile("src=\"?(?!http|/)\\S+." + type + "\"?")
.matcher(html); while (matcher.find()) {
int posStart = matcher.start();
int posEnd = matcher.end();
String src = html.substring(posStart, posEnd);
System.out.println(src);
}
System.out.println("\nabsolute path:");
matcher = Pattern.compile("src=\"?[/]\\S+." + type + "\"?")
.matcher(html); while (matcher.find()) {
int posStart = matcher.start();
int posEnd = matcher.end();
String src = html.substring(posStart, posEnd);
System.out.println(src);
}
System.out.println("\nurl:");
matcher = Pattern.compile("src=\"?http:\\S+." + type + "\"?")
.matcher(html); while (matcher.find()) {
int posStart = matcher.start();
int posEnd = matcher.end();
String src = html.substring(posStart, posEnd);
System.out.println(src);
} }}
a.txt的内容是:#relative path
src="aaa.gif"
src=aaa.gif#absolute path
src="/aaa/aaa.gif"
src=/aaa/aaa.gif#url
src="http://www.site.com/aaa.gif"
src=http://www.site.com/aaa.gif
import java.io.FileInputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class LookupSrc { public static void main(String[] args) throws Exception {
String type = "gif"; FileInputStream input = new FileInputStream(new File("c:/a.txt"));
byte[] bytes = new byte[input.available()];
input.read(bytes);
String html = new String(bytes); System.out.println("relative path:");
Matcher matcher = Pattern.compile("src=\"?(?!http|/)\\S+." + type + "\"?")
.matcher(html); while (matcher.find()) {
int posStart = matcher.start();
int posEnd = matcher.end();
String src = html.substring(posStart, posEnd);
System.out.println(src);
}
System.out.println("\nabsolute path:");
matcher = Pattern.compile("src=\"?[/]\\S+." + type + "\"?")
.matcher(html); while (matcher.find()) {
int posStart = matcher.start();
int posEnd = matcher.end();
String src = html.substring(posStart, posEnd);
System.out.println(src);
}
System.out.println("\nurl:");
matcher = Pattern.compile("src=\"?http:\\S+." + type + "\"?")
.matcher(html); while (matcher.find()) {
int posStart = matcher.start();
int posEnd = matcher.end();
String src = html.substring(posStart, posEnd);
System.out.println(src);
} }}
a.txt的内容是:#relative path
src="aaa.gif"
src=aaa.gif#absolute path
src="/aaa/aaa.gif"
src=/aaa/aaa.gif#url
src="http://www.site.com/aaa.gif"
src=http://www.site.com/aaa.gif
Pattern.compile("src=\"?(?!http|/)\\S+\\." + type + "\"?")
Pattern.compile("src=\"?[/]\\S+\\." + type + "\"?")
Pattern.compile("src=\"?http:\\S+\\." + type + "\"?")
第一个输出是:
relative path:
src="aaa.gif"
src=aaa.gif
src="/aaa/aaa.gif"
src="http://www.site.com/aaa.gif"但是我期望的是:
relative path:
src="aaa.gif"
src=aaa.gif
Matcher matcher2 =Pattern.compile("(src=\"?[/]\\S+\\." + type + "\"?)").matcher(str);
Matcher matcher3 =Pattern.compile("(src=\"?http:\\S+\\." + type + "\"?)").matcher(str);
java.util.List list1=new ArrayList();
java.util.List list2=new ArrayList();
java.util.List list3=new ArrayList();
while(matcher1.find()){
list1.add(matcher1.group(1));
}
while(matcher2.find()){
list2.add(matcher2.group(1));
}
while(matcher3.find()){
list3.add(matcher3.group(1));
} for(int i=0;i<list1.size();i++){
System.out.println(list1.get(i));
}
System.out.println("================");
for(int i=0;i<list2.size();i++){
System.out.println(list2.get(i));
}
System.out.println("================");
for(int i=0;i<list3.size();i++){
System.out.println(list3.get(i));
}