有下面这样的Java程序:import java.io.File;
import java.io.IOException;
// JAXP
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
// DOM
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;public class ParseXml
{   public ParseXml()
   {
      // TODO Auto-generated constructor stub
   }   public static void main(String[] args)
   {
      try {
         // Get Document Builder Factory
         DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
         // Turn on validation, and turn off namespaces
         factory.setValidating(false);
         factory.setNamespaceAware(false);
         factory.setIgnoringElementContentWhitespace(true);
         DocumentBuilder builder = factory.newDocumentBuilder();
         Document doc = builder.parse(new File(".\\src\\input.xml"));
         // Print the document from the DOM tree and
         // feed it an initial indentation of nothing
         printNode(doc, "");
      } catch (ParserConfigurationException e)
      {
         System.out.print("ParserConfigurationException occurred: ");
         System.out.println("The underlying parser does not " + "support the requested features.");
      } catch (FactoryConfigurationError e)
      {
         System.out.print("FactoryConfigurationError occurred: ");
         System.out.println("Error occurred obtaining Document " + "Builder Factory.");
      } catch (IOException e)
      {
         System.out.print("IOException occurred: ");
         e.printStackTrace();
      } catch (SAXException e)
      {
         System.out.print("SAXException occurred: ");
         e.printStackTrace();
      }
   }   private static void printNode(Node node, String indent)
   {
      // print the DOM tree
      short nodeType = node.getNodeType();
      switch(nodeType)
      {
         case Node.DOCUMENT_NODE:
            break;
            
         case Node.ELEMENT_NODE:
            System.out.println(indent + node.getNodeName());
            break;
            
         case Node.TEXT_NODE:
            String nodeValue = node.getNodeValue();
            if (nodeValue != null)
               System.out.println(indent + node.getNodeName() + ": " + nodeValue);
            break;
            
         default:
            break;
      }
      
      NodeList nl = node.getChildNodes();
      for(int index = 0; index < nl.getLength(); index ++)
      {
         printNode(nl.item(index), indent + "   ");
      }
   }}这段程序解析下面的XML文件,并打印出各个节点的名称和值:<?xml version="1.0" encoding="gbk" standalone="yes"?>
<family>
   <fathor>
      <name>kingfox</name>
      <age>40</age>
   </fathor>
   <mothor>
      <name>apple</name>
      <age>32</age>
   </mothor>
   <daughter>
      <name>ivy</name>
      <age>8</age>
   </daughter>
   <brothor type="text">
      <name>biu</name>
      <age>36</age>
   </brothor>
</family>输出确是这样的结果:   family
      #text: 
   
      fathor
         #text: 
      
         name
            #text: kingfox
         #text: 
      
         age
            #text: 40
         #text: 
   
      #text: 
   
      mothor
         #text: 
      
         name
            #text: apple
         #text: 
      
         age
            #text: 32
         #text: 
   
      #text: 
   
      daughter
         #text: 
      
         name
            #text: ivy
         #text: 
      
         age
            #text: 8
         #text: 
   
      #text: 
   
      brothor
         #text: 
      
         name
            #text: biu
         #text: 
      
         age
            #text: 36
         #text: 
   
      #text: 
那些#text节点和其后的空行,其实就是对应了每个XML节点前面的空格。难道Java的XML处理器会将用于XML文件格式化的那些空格也当作text节点来处理?如何设置能够忽略这些空格?

解决方案 »

  1.   

    好像用JDOM或DOM4j来解析就不会有你说的问题了。
      

  2.   

    JDOM/DOM4J也一样,虽然在JDOM/DOM4J中调用node.getChildren()方法,不会返回Text信息;但如果需要节点的Text信息,可以调用node.getText(),这个方法也会返回\t\n等这些特殊字符。
      

  3.   

    节点之间的换行和空白符,始末节点之间的值都是Text node,之间输出就可以了
    对Document节点,取得它的第一个子节点,也就是跟节点进行递归输出。
    对Element节点可以对的下一个兄弟节点NextSibling进行递归输出。
    其他节点就返回。private static void printNode(Node node)
    {
    // print the DOM tree
    short nodeType = node.getNodeType();
    switch(nodeType)
    {
    case Node.DOCUMENT_NODE:
    Node n = node.getFirstChild(); printNode(n); return; case Node.ELEMENT_NODE:
    System.out.print("<" + node.getNodeName()+">");

    NodeList nl = node.getChildNodes();
    for(int index = 0; index < nl.getLength(); index ++)
    {
    printNode(nl.item(index));
    } System.out.print("<"+node.getNodeName()+"/>");
    break;
    case Node.TEXT_NODE:
    System.out.print(node.getNodeValue());
    return; default:
    System.out.print("--" + node.getNodeType());
    return;
    }
    Node ns = node.getNextSibling();
    if(ns!=null)
    printNode(ns); }<family>
       <fathor>
          <name>kingfox<name/>
          
          <age>40<age/>
       
       <fathor/>
       
       <mothor>
          <name>apple<name/>
          
          <age>32<age/>
       
       <mothor/>
       
       <daughter>
          <name>ivy<name/>
          
          <age>8<age/>
       
       <daughter/>
       
       <brothor>
          <name>biu<name/>
          
          <age>36<age/>
       
       <brothor/><family/>
      

  4.   

    谢谢5楼,不过这种方法还是没有解决那些空格和回车字符的问题。后来找到了一种办法,对于text节点,取出其nodeValue之后,调用trim函数即可去掉头尾的空格和回车换行符号。现在的代码如下:
    import java.io.File;
    import java.io.IOException;
    // JAXP
    import javax.xml.parsers.FactoryConfigurationError;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.DocumentBuilder;
    // DOM
    import org.w3c.dom.Document;
    import org.w3c.dom.Element;
    import org.w3c.dom.Node;
    import org.w3c.dom.NodeList;
    import org.xml.sax.*;public class ParseXml
    {   public ParseXml()
       {
       }   public static void main(String[] args)
       {
          ParseXml px = new ParseXml();
          px.run();
       }
       
       public void run()
       {
          try {
             // Get Document Builder Factory
             DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
             // Turn on validation, and turn off namespaces
             factory.setValidating(false);
             factory.setNamespaceAware(false);
             factory.setIgnoringElementContentWhitespace(true);
             DocumentBuilder builder = factory.newDocumentBuilder();
             Document doc = builder.parse(new File(".\\src\\input.xml"));
             Element rootElement = doc.getDocumentElement();
             // Print the document from the DOM tree and
             // feed it an initial indentation of nothing
             printNode(rootElement, "");
          } catch (ParserConfigurationException e)
          {
             System.out.print("ParserConfigurationException occurred: ");
             System.out.println("The underlying parser does not " + "support the requested features.");
          } catch (FactoryConfigurationError e)
          {
             System.out.print("FactoryConfigurationError occurred: ");
             System.out.println("Error occurred obtaining Document " + "Builder Factory.");
          } catch (IOException e)
          {
             System.out.print("IOException occurred: ");
             e.printStackTrace();
          } catch (SAXException e)
          {
             System.out.print("SAXException occurred: ");
             e.printStackTrace();
          }
       }   private void printNode(Node node, String indent)
       {
          // print the DOM tree
          short nodeType = node.getNodeType();      switch (nodeType)
          {
             case Node.DOCUMENT_NODE:
                break;         case Node.ELEMENT_NODE:
                String nodeName = node.getNodeName();
                System.out.println(indent + "<" + nodeName + ">");
                if (node.hasChildNodes())
                {
                   Node childNode = node.getFirstChild();
                   do {
                      printNode(childNode, indent + "   ");
                      childNode = childNode.getNextSibling();
                   } while(childNode != null);
                }
                System.out.println(indent + "</" + nodeName + ">");
                break;         case Node.TEXT_NODE:
                String nodeValue = node.getNodeValue();
                if (!nodeValue.trim().equals(""))
                {
                   System.out.println(indent + "   " + nodeValue);
                }
                break;         default:
                break;
          }
       }}输出如下:<family>
       <fathor>
          <name>
                kingfox
          </name>
          <age>
                40
          </age>
          <favourite>
          </favourite>
       </fathor>
       <mothor>
          <name>
                apple
          </name>
          <age>
                32
          </age>
       </mothor>
       <daughter>
          <name>
                ivy
          </name>
          <age>
                8
          </age>
       </daughter>
       <brothor>
          <name>
                biu
          </name>
          <age>
                36
          </age>
       </brothor>
    </family>