使用neko分析网页出错,程序:import com.sun.org.apache.xpath.internal.XPathAPI;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.transform.TransformerException;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;public class testJcIndex {
public static void main(String[] args) {
DOMParser parser = new DOMParser();
String strURL = "http://www.sohu.com";
BufferedReader in;
try {
in = new BufferedReader(new InputStreamReader(new URL(strURL).openStream()));
parser.parse(new InputSource(in));
in.close();
Document doc = parser.getDocument();
String productsXpath = "/html/body"; NodeList products = XPathAPI.selectNodeList(doc, productsXpath);
System.out.println("found: " + products.getLength());
Node node = null;
for (int i = 1; i < products.getLength(); i++) {System.out.println("{{" + i + "}}");
node = products.item(i);
NodeList nl = node.getChildNodes();
System.out.println(nl.getLength());
}
} catch (MalformedURLException ex) {
System.err.println(ex.getMessage());
} catch (IOException e) {
System.err.println(e.getMessage());
} catch (TransformerException e) {
System.err.println(e.getMessage());
} catch (SAXException e) {
System.err.println(e.getMessage());
}
}错误:Exception in thread "main" org.w3c.dom.DOMException: HIERARCHY_REQUEST_ERR: An attempt was made to insert a node where it is not permitted.
at org.apache.xerces.dom.ParentNode.internalInsertBefore(Unknown Source)
at org.apache.xerces.dom.ParentNode.insertBefore(Unknown Source)
at org.apache.xerces.dom.NodeImpl.appendChild(Unknown Source)
at org.apache.html.dom.HTMLDocumentImpl.getDocumentElement(Unknown Source)
at com.sun.org.apache.xpath.internal.XPathAPI.eval(XPathAPI.java:227)
at com.sun.org.apache.xpath.internal.XPathAPI.selectNodeList(XPathAPI.java:165)
at com.sun.org.apache.xpath.internal.XPathAPI.selectNodeList(XPathAPI.java:145)
at org.zzd.parserHtml.ty.testJcIndex.main(testJcIndex.java:39)不知这是怎么回事?我以前在内网使用neko没见过这种问题。我用的neko是1.9.14,xerces是其中自带的2.9.1
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.transform.TransformerException;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;public class testJcIndex {
public static void main(String[] args) {
DOMParser parser = new DOMParser();
String strURL = "http://www.sohu.com";
BufferedReader in;
try {
in = new BufferedReader(new InputStreamReader(new URL(strURL).openStream()));
parser.parse(new InputSource(in));
in.close();
Document doc = parser.getDocument();
String productsXpath = "/html/body"; NodeList products = XPathAPI.selectNodeList(doc, productsXpath);
System.out.println("found: " + products.getLength());
Node node = null;
for (int i = 1; i < products.getLength(); i++) {System.out.println("{{" + i + "}}");
node = products.item(i);
NodeList nl = node.getChildNodes();
System.out.println(nl.getLength());
}
} catch (MalformedURLException ex) {
System.err.println(ex.getMessage());
} catch (IOException e) {
System.err.println(e.getMessage());
} catch (TransformerException e) {
System.err.println(e.getMessage());
} catch (SAXException e) {
System.err.println(e.getMessage());
}
}错误:Exception in thread "main" org.w3c.dom.DOMException: HIERARCHY_REQUEST_ERR: An attempt was made to insert a node where it is not permitted.
at org.apache.xerces.dom.ParentNode.internalInsertBefore(Unknown Source)
at org.apache.xerces.dom.ParentNode.insertBefore(Unknown Source)
at org.apache.xerces.dom.NodeImpl.appendChild(Unknown Source)
at org.apache.html.dom.HTMLDocumentImpl.getDocumentElement(Unknown Source)
at com.sun.org.apache.xpath.internal.XPathAPI.eval(XPathAPI.java:227)
at com.sun.org.apache.xpath.internal.XPathAPI.selectNodeList(XPathAPI.java:165)
at com.sun.org.apache.xpath.internal.XPathAPI.selectNodeList(XPathAPI.java:145)
at org.zzd.parserHtml.ty.testJcIndex.main(testJcIndex.java:39)不知这是怎么回事?我以前在内网使用neko没见过这种问题。我用的neko是1.9.14,xerces是其中自带的2.9.1
http://home.searchfull.net:8080/blog/2007/06/04/1180954618352.html