java解析XML遇到有汉字的XML不能解析,各位大虾给点思路
<?xml version="1.0" encoding="utf-8"?><SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:comm3="http://www.chinatelecom.com.cn/schema/ctcc/common/v2_1" xmlns:sms7="http://www.chinatelecom.com.cn/schema/ctcc/sms/notification/v2_1/local"><SOAP-ENV:Header><comm3:NotifySOAPHeader><SAN>10628888</SAN><spId>88888888</spId><spRevId>333</spRevId><spRevpassword>123</spRevpassword><timeStamp>1216171414</timeStamp><productId>188800888080000008888</productId><linkId>434242</linkId><multicastMessaging>false</multicastMessaging></comm3:NotifySOAPHeader></SOAP-ENV:Header><SOAP-ENV:Body><sms7:notifySmsReception><sms7:registrationIdentifier>10628888</sms7:registrationIdentifier><sms7:message><message>中国</message><senderAddress>tel:18608513826</senderAddress><smsServiceActivationNumber>tel:10628888</smsServiceActivationNumber></sms7:message></sms7:notifySmsReception></SOAP-ENV:Body></SOAP-ENV:Envelope>com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 2 of 2-byte UTF-8 sequence.代码如下:
package com.guiyangyaxun.isagsms.util;import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;public class XMLReader {
public String strXML;
public int nXmlState;// 0表示上行,1表示状态报告 private ByteArrayInputStream stream;
private DocumentBuilderFactory factory;
private DocumentBuilder builder;
private Document doc; public XMLReader(String sXML){
this.strXML = sXML;
try {
stream = new ByteArrayInputStream(strXML.getBytes());
factory = DocumentBuilderFactory.newInstance();
builder = factory.newDocumentBuilder();
doc = builder.parse(stream);
NodeList nl = doc.getElementsByTagName("sms7:notifySmsDeliveryReceipt");
if (nl.getLength() > 0) {
nXmlState = 1;
} else {
nXmlState = 0;
}
} catch (Exception e) {
e.printStackTrace();
}
} /**
* 根据形参返回节点值
*
* @param strXmlNodeName
* @return
*/
public String getNodeValue(String strXmlNodeName) {
String strXmlNodeValue = ""; strXmlNodeValue = doc.getElementsByTagName(strXmlNodeName).item(0)
.getFirstChild().getNodeValue(); return strXmlNodeValue;
}
}
<?xml version="1.0" encoding="utf-8"?><SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:comm3="http://www.chinatelecom.com.cn/schema/ctcc/common/v2_1" xmlns:sms7="http://www.chinatelecom.com.cn/schema/ctcc/sms/notification/v2_1/local"><SOAP-ENV:Header><comm3:NotifySOAPHeader><SAN>10628888</SAN><spId>88888888</spId><spRevId>333</spRevId><spRevpassword>123</spRevpassword><timeStamp>1216171414</timeStamp><productId>188800888080000008888</productId><linkId>434242</linkId><multicastMessaging>false</multicastMessaging></comm3:NotifySOAPHeader></SOAP-ENV:Header><SOAP-ENV:Body><sms7:notifySmsReception><sms7:registrationIdentifier>10628888</sms7:registrationIdentifier><sms7:message><message>中国</message><senderAddress>tel:18608513826</senderAddress><smsServiceActivationNumber>tel:10628888</smsServiceActivationNumber></sms7:message></sms7:notifySmsReception></SOAP-ENV:Body></SOAP-ENV:Envelope>com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 2 of 2-byte UTF-8 sequence.代码如下:
package com.guiyangyaxun.isagsms.util;import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;public class XMLReader {
public String strXML;
public int nXmlState;// 0表示上行,1表示状态报告 private ByteArrayInputStream stream;
private DocumentBuilderFactory factory;
private DocumentBuilder builder;
private Document doc; public XMLReader(String sXML){
this.strXML = sXML;
try {
stream = new ByteArrayInputStream(strXML.getBytes());
factory = DocumentBuilderFactory.newInstance();
builder = factory.newDocumentBuilder();
doc = builder.parse(stream);
NodeList nl = doc.getElementsByTagName("sms7:notifySmsDeliveryReceipt");
if (nl.getLength() > 0) {
nXmlState = 1;
} else {
nXmlState = 0;
}
} catch (Exception e) {
e.printStackTrace();
}
} /**
* 根据形参返回节点值
*
* @param strXmlNodeName
* @return
*/
public String getNodeValue(String strXmlNodeName) {
String strXmlNodeValue = ""; strXmlNodeValue = doc.getElementsByTagName(strXmlNodeName).item(0)
.getFirstChild().getNodeValue(); return strXmlNodeValue;
}
}
package com.guiyangyaxun.isagsms.util;import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;public class XMLReader {
public String strXML;
public int nXmlState;// 0表示上行,1表示状态报告private ByteArrayInputStream stream;
private DocumentBuilderFactory factory;
private DocumentBuilder builder;
private Document doc;public XMLReader(String sXML){
this.strXML = sXML;
try {
stream = new ByteArrayInputStream(strXML.getBytes());
factory = DocumentBuilderFactory.newInstance();
builder = factory.newDocumentBuilder();
doc = builder.parse(stream);
NodeList nl = doc.getElementsByTagName("sms7:notifySmsDeliveryReceipt");
if (nl.getLength() > 0) {
nXmlState = 1;
} else {
nXmlState = 0;
}
} catch (Exception e) {
e.printStackTrace();
}
}/**
* 根据形参返回节点值
*
* @param strXmlNodeName
* @return
*/
public String getNodeValue(String strXmlNodeName) {
String strXmlNodeValue = "";strXmlNodeValue = doc.getElementsByTagName(strXmlNodeName).item(0)
.getFirstChild().getNodeValue();return strXmlNodeValue;
}
}
ByteArrayInputStream bais = new ByteArrayInputStream(source.getBytes());Document doc = builder.parse(bais);在这一行报错改为ByteArrayInputStream bais = new ByteArrayInputStream(source.getBytes("UTF-8"));Document doc = builder.parse(bais);
一般国内的操作系统默认都是GBK或者GB18030等,所以会乱码