请问如何用htmlparser提取html中的文本呢？ - 调试易

请问如何用htmlparser提取html中的文本呢？

我看到里面有StringExtractor但是不知道怎么用
htmlparser里面的例子偶找不到
各位仁兄帮忙拉

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
public class HtmlUtils {
public static String extractText(String inputHtml) throws Exception{
  StringBuffer text = new StringBuffer();
  Parser parser = Parser.createParser(new String(inputHtml.getBytes(),"8859_1"));
  //遍历所有的节点
  NodeList nodes = parser.extractAllNodesThatMatch(new NodeFilter(){
   public boolean accept(Node node) {
    return true;
   }});
  Node node = nodes.elementAt(0);
  text.append(new String(node.toPlainTextString().getBytes("8859_1")));
  return text.toString();
}
public static void main(String[] args) throws Exception{
  String text = extractText("<td>点击<b><a href=index.jsp>这里</a></b>回到首页</td>");
  System.out.println(text);
}
}
http://www-900.ibm.com/developerWorks/cn/java/l-html-parser/