import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;public class ExtractTable { static String html = " <html> <head> </head> <body>"
+ " <table> <tr> <td>hello table </td> </tr> </table> <table> <tr> <td>hello table <a href=http://www.baidu.com>tt </a> </td> </tr> </table> "
+ " </body> </html>";//这里是html的内容//static String html = "http://www.baidu.com"; public static void test5(String resource) throws Exception {
Parser myParser = new Parser(resource);
// Parser parser = new Parser(content);
// 设置编码
myParser.setEncoding("utf-8");
//String filterStr = "table";//这里析取得是标签为table的元素
String filterStr="table";
NodeFilter filter = new TagNameFilter(filterStr);//过滤这个标签
NodeList nodeList = myParser.extractAllNodesThatMatch(filter);//抽取所有table列表
for (int i = 0; i < nodeList.size(); i++) {
TableTag tabletag = (TableTag) nodeList.elementAt(i);
System.out.println(tabletag.toHtml());//打印出来
} } /**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
test5(html);//当然这里可以写成一个链接地址比如将html代替为"http://www.baidu.com"
}}
在String filterStr="table";这里,我把filterStr改为filterStr="a";为什么报错了啊??
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;public class ExtractTable { static String html = " <html> <head> </head> <body>"
+ " <table> <tr> <td>hello table </td> </tr> </table> <table> <tr> <td>hello table <a href=http://www.baidu.com>tt </a> </td> </tr> </table> "
+ " </body> </html>";//这里是html的内容//static String html = "http://www.baidu.com"; public static void test5(String resource) throws Exception {
Parser myParser = new Parser(resource);
// Parser parser = new Parser(content);
// 设置编码
myParser.setEncoding("utf-8");
//String filterStr = "table";//这里析取得是标签为table的元素
String filterStr="table";
NodeFilter filter = new TagNameFilter(filterStr);//过滤这个标签
NodeList nodeList = myParser.extractAllNodesThatMatch(filter);//抽取所有table列表
for (int i = 0; i < nodeList.size(); i++) {
TableTag tabletag = (TableTag) nodeList.elementAt(i);
System.out.println(tabletag.toHtml());//打印出来
} } /**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
test5(html);//当然这里可以写成一个链接地址比如将html代替为"http://www.baidu.com"
}}
在String filterStr="table";这里,我把filterStr改为filterStr="a";为什么报错了啊??
解决方案 »
免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货