急！HtmlParser提取文本的问题，求教

我用HtmlParser在《开发自己的搜索引擎》基础上想提取
http://product.pconline.com.cn/notebook/lenovo/261638_detail.html 里的参数，把他们存到txt里，但那个类始终有问题，不知有没有牛人能够帮个忙？

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

不好意思就一个方法
public void extract() {
BufferedWriter bw = null;
//创建属性过滤器
NodeFilter attributes_filter = new AndFilter(new TagNameFilter("td"),
new HasAttributeFilter("WIDTH", "16%"));
//创建标题过滤器
NodeFilter title_filter = new AndFilter(new TagNameFilter("span"),
new HasAttributeFilter("class", "")
);

//提取标题信息
try {
//Parser根据过滤器返回所有满足过滤条件的节点
NodeList title_nodes = this.getParser().parse(title_filter);
//遍历所有节点
for (int i = 0; i < title_nodes.size(); i++) {
TableColumn node = (TableColumn) title_nodes.elementAt(i);
//用空格分割节点内胡html文本
String[] names = node.getChildrenHTML().split(" ");
StringBuffer title = new StringBuffer();
//创建要生成的文本文件名
for (int k = 0; k < names.length; k++) {
title.append(names[k]).append("-");
}
title.append((new Date()).getTime());
//创建要生成的文件
bw = new BufferedWriter(new FileWriter(new File(this
.getOutputPath()
+ title + ".txt")));
//获取当前提取页的完整URL地址
int startPos = getInuputFilePath().indexOf("mirror") + 6;
String url_seg = getInuputFilePath().substring(startPos);
url_seg = url_seg.replaceAll("\\\\", "/");
String url = "http:/" + url_seg;
System.out.println(url);
//写入当前提取页的完整URL地址
bw.write(url + NEWLINE);
bw.write(names[0] + NEWLINE);
bw.write(names[1] + NEWLINE);
}
} catch (Exception e) {
e.printStackTrace();
}
     //重置Parser
this.getParser().reset();
try {
//Parser根据过滤器返回所有满足过滤条件的节点
NodeList attributes_nodes = this.getParser().parse(attributes_filter);
for (int i = 0; i < attributes_nodes.size(); i++) {
//Parser根据过滤器返回所有满足过滤条件的节点
TableColumn node = (TableColumn) attributes_nodes.elementAt(i);
String text = node.getChildrenHTML();
//提取属性名信息
String result = getProp(
"<TD width=\"16%\" class=\"#FCFCFC\">(.*)</TD>",
node.toHtml(), 1);
//属性里面包含有link标签的情况
if (result.indexOf("<") != -1)
result = getProp(
"<TD CLASS=btd WIDTH=198 BGCOLOR=\"#FCFCFC\"(.*)>(.*)</a></B></TD>",
node.toHtml(), 2);
//提取属性值信息
TableColumn nodeExt = (TableColumn) node.getNextSibling()
.getNextSibling();
bw.write(StringUtils.trim(result) + ":"
+ StringUtils.trim(nodeExt.getChildrenHTML()));
bw.newLine();
continue;
}
} catch (Exception e) {
e.printStackTrace();
}
楼主，http://blog.sina.com.cn/s/blog_3c6ecea90100iub1.html，希望对你有帮助