<div id="blogIndex" class="operation">
<h4>文章索引</h4>
<ul>
<li class="weblog"><a href="catalog_2004.html">2004年索引</a></li>
<li class="weblog"><a href="catalog_2005.html">2005年索引</a></li>
</ul>
</div>
<h4>文章索引</h4>
<ul>
<li class="weblog"><a href="catalog_2004.html">2004年索引</a></li>
<li class="weblog"><a href="catalog_2005.html">2005年索引</a></li>
</ul>
</div>
Pattern p=Pattern.compile("<div id=\"blogIndex\" class=\"operation\">(.*?)</div>",Pattern.MULTILINE|Pattern.DOTALL);
Pattern p1=Pattern.compile(".*<a [^<]*href=\"([^<^\"]*)\">.*</a>",Pattern.MULTILINE|Pattern.DOTALL);
public class HelloSon {
public static void main(String[] args) {
String s="<div id=\"blogIndex\" class=\"operation\">\n"+
"<h4>文章索引</h4>\n"+
"<ul>\n"+
"<li class=\"weblog\"><a href=\"catalog_2004.html\">2004年索引</a></li>\n"+
"<li class=\"weblog\"><a href=\"catalog_2005.html\">2005年索引</a></li>\n"+
"</ul>\n"+
"</div>\n";
Pattern pa = Pattern.compile("^<li class=\"weblog\"><a href=(.*)>2",Pattern.MULTILINE);
Matcher ma = pa.matcher(s);
while(ma.find()){
System.out.println(ma.group(1));
}
}
}
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DrawOutLink
{
public static void main(String[] args)
{
try
{
FileInputStream input = new FileInputStream("link.txt");
InputStreamReader reader = new InputStreamReader(input);
BufferedReader bfreader = new BufferedReader(reader);
while (true)
{
String str = bfreader.readLine();
if (str == null)
return;
String regex = "href=\".*\"";
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(str);
if (m.find())
{
String hrefStr = str.substring(m.start()+6, m.end()-1);
System.out.println(hrefStr);
}
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
}