JAVA利用SAX如何解析html提取里面的超链接 JAVA利用SAX如何解析html提取里面的超链接,不知道改怎么提取,和<a href></a>标签有关吗? 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 for(Element e : Jsoup.parse(url).select("a")){ System.out.println(e.attr("href"));} jsoup.jar 比 htmlparser更好。 sax就是要判断是什么标签,获取什么内容public class StrutsXMLParser { Map<String, Object> map = new HashMap<String, Object>(); class StrutsXMLHandler extends SAXHandler { private String cls; private boolean isUrl; private List<String> fileList = new ArrayList<String>(); @Override public void startDocument() throws SAXException { } @Override public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if("include".equals(qName)) { fileList.add(atts.getValue("file")); } if("constant".equals(qName)) { map.put(atts.getValue("name"), atts.getValue("value")); } if("action".equals(qName)) { cls = atts.getValue("class"); } if("result".equals(qName) && "error".equals(atts.getValue("name"))) { isUrl = true; } } @Override public void characters(char[] ch, int start, int length) throws SAXException { if(cls != null && isUrl) { String url = new String(ch, start, length); if(!map.containsKey(cls)) { cls = cls.substring(cls.lastIndexOf(".") + 1); map.put(cls, url); } isUrl = false; cls = null; } } @Override public void endElement(String namespaceURI, String localName, String qName) throws SAXException { } @SuppressWarnings("unchecked") @Override public void endDocument() throws SAXException { if(fileList.size() > 0) { map.put("fileList", fileList); } } } @SuppressWarnings("unchecked") public Map<String, Object> parser(String xml) throws Exception { String path = xml.substring(0, xml.lastIndexOf("/") + 1); SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser sp = spf.newSAXParser(); XMLReader xr = sp.getXMLReader(); StrutsXMLHandler handler = new StrutsXMLHandler(); xr.setContentHandler(handler); xr.parse(xml); List<String> fileList = (List<String>) map.get("fileList"); if(fileList != null) { map.remove("fileList"); Iterator it = fileList.iterator(); while(it.hasNext()) { parser(path + it.next()); } } return map; } <constant name="devMode" value="true" /> <constant name="struts.multipart.saveDir" value="c:\HDM_TOOL\upload\"></constant> <constant name="user.data.dir" value="c:\HDM_TOOL\DATA\"></constant> 请教一个8进制和16进制的正则问题 我的SSH框架项目中获取不到SESSION(会话)的值,请帮解决下 我刚开始上java课.怎么把下面的Applet嵌入到网页啊? 关于类的问题 谁给我解释下 还是先看下面代码 为什么会出错 高手麻烦帮忙解释下,为什么这样!!! 请教一个关于正则的我认为“比较难”的问题 要做一个程序的启动画面,这段程序有错误嘛? 为什么组件没有加载 怎样在jbuilder中控制按钮的大小?恳请赐教 如何在Applet中做本地打印?(可以用Swing组件) 老事重提,String和 == 求问,这道java题的输出结果,和解析。
System.out.println(e.attr("href"));
}
jsoup.jar 比 htmlparser更好。
Map<String, Object> map = new HashMap<String, Object>(); class StrutsXMLHandler extends SAXHandler {
private String cls;
private boolean isUrl;
private List<String> fileList = new ArrayList<String>(); @Override
public void startDocument() throws SAXException {
} @Override
public void startElement(String namespaceURI, String localName,
String qName, Attributes atts) throws SAXException {
if("include".equals(qName)) {
fileList.add(atts.getValue("file"));
} if("constant".equals(qName)) {
map.put(atts.getValue("name"), atts.getValue("value"));
} if("action".equals(qName)) {
cls = atts.getValue("class");
} if("result".equals(qName) && "error".equals(atts.getValue("name"))) {
isUrl = true;
}
} @Override
public void characters(char[] ch, int start, int length) throws SAXException {
if(cls != null && isUrl) {
String url = new String(ch, start, length); if(!map.containsKey(cls)) {
cls = cls.substring(cls.lastIndexOf(".") + 1);
map.put(cls, url);
} isUrl = false;
cls = null;
}
} @Override
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException {
} @SuppressWarnings("unchecked")
@Override
public void endDocument() throws SAXException {
if(fileList.size() > 0) {
map.put("fileList", fileList);
}
}
} @SuppressWarnings("unchecked")
public Map<String, Object> parser(String xml) throws Exception {
String path = xml.substring(0, xml.lastIndexOf("/") + 1);
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
StrutsXMLHandler handler = new StrutsXMLHandler();
xr.setContentHandler(handler);
xr.parse(xml); List<String> fileList = (List<String>) map.get("fileList"); if(fileList != null) {
map.remove("fileList");
Iterator it = fileList.iterator(); while(it.hasNext()) {
parser(path + it.next());
}
} return map;
}
<constant name="devMode" value="true" />
<constant name="struts.multipart.saveDir" value="c:\HDM_TOOL\upload\"></constant>
<constant name="user.data.dir" value="c:\HDM_TOOL\DATA\"></constant>