我的代码里面已经做过网上好多人说的中文处理了,但是我发现还是有个别字转换成字符串的时候出错。我用的是dom4j.
我的程序如下:
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;import org.dom4j.Document;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;public class Dom4jTest { public static void main(String[] args) throws Exception {
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(new File("e:\\str2file_gb2312.xml"));
String s= doc2String(document);
System.err.println("s="+s);
}
/**
* doc2String 将XML文档内容转为String
*
* @return 字符串
* @param document
* @throws IOException
*/
public static String doc2String(Document document) throws IOException {
String s = "";
try {
//使用输出流来进行转化
ByteArrayOutputStream out = new ByteArrayOutputStream();
//使用GB2312编码
OutputFormat format = new OutputFormat(" ", true, "gb2312");
XMLWriter writer = new XMLWriter(out, format);
writer.write(document);
s = out.toString("gb2312");
} catch (Exception ex) {
ex.printStackTrace();
}
return s;
}
}
xml文件如下:
str2file_gb2312.xml
<?xml version="1.0" encoding="GB2312" ?>
<a>abc123一二三人口手猋犇囧</a>输出结果为:
s=<?xml version="1.0" encoding="gb2312"?><a>abc123一二三人口手???</a>这几个字“猋犇囧”怎么不能转换,好着急,有高手能帮我一下吗?类似不能转换的汉字还有好多,是代码有问题还是我的字库里面缺少这些字啊,有人帮帮我吗?
------------------------------
帮我测测吧,jar包可以到这里下
dom4j-1.6.1.jar下载地址:
http://sourceforge.net/projects/dom4j/files/dom4j/1.6.1/dom4j-1.6.1.jar/download
jaxen-1.1-beta-6.jar下载地址:
http://download.csdn.net/source/1171179
我的程序如下:
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;import org.dom4j.Document;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;public class Dom4jTest { public static void main(String[] args) throws Exception {
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(new File("e:\\str2file_gb2312.xml"));
String s= doc2String(document);
System.err.println("s="+s);
}
/**
* doc2String 将XML文档内容转为String
*
* @return 字符串
* @param document
* @throws IOException
*/
public static String doc2String(Document document) throws IOException {
String s = "";
try {
//使用输出流来进行转化
ByteArrayOutputStream out = new ByteArrayOutputStream();
//使用GB2312编码
OutputFormat format = new OutputFormat(" ", true, "gb2312");
XMLWriter writer = new XMLWriter(out, format);
writer.write(document);
s = out.toString("gb2312");
} catch (Exception ex) {
ex.printStackTrace();
}
return s;
}
}
xml文件如下:
str2file_gb2312.xml
<?xml version="1.0" encoding="GB2312" ?>
<a>abc123一二三人口手猋犇囧</a>输出结果为:
s=<?xml version="1.0" encoding="gb2312"?><a>abc123一二三人口手???</a>这几个字“猋犇囧”怎么不能转换,好着急,有高手能帮我一下吗?类似不能转换的汉字还有好多,是代码有问题还是我的字库里面缺少这些字啊,有人帮帮我吗?
------------------------------
帮我测测吧,jar包可以到这里下
dom4j-1.6.1.jar下载地址:
http://sourceforge.net/projects/dom4j/files/dom4j/1.6.1/dom4j-1.6.1.jar/download
jaxen-1.1-beta-6.jar下载地址:
http://download.csdn.net/source/1171179
package test;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;import org.dom4j.Document;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;public class Dom4jTest { public static void main(String[] args) throws Exception {
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(new File("E:\\workspace\\EnterpriseInfoPortal\\noname1.xml"));
String s= doc2String(document);
System.err.println("s="+s);
}
/**
* doc2String 将XML文档内容转为String
*
* @return 字符串
* @param document
* @throws IOException
*/
public static String doc2String(Document document) throws IOException {
String s = "";
try {
//使用输出流来进行转化
ByteArrayOutputStream out = new ByteArrayOutputStream();
//使用GB2312编码
OutputFormat format = new OutputFormat(" ", true, "UTF-8");
XMLWriter writer = new XMLWriter(out, format);
writer.write(document);
s = out.toString("UTF-8");
} catch (Exception ex) {
ex.printStackTrace();
}
return s;
}
}
xml:《此xml是在jbuider里面,整个工程的properties->info-> Text file encoding 项里面设置为 UTF-8时生成的,用editplus编辑的会报错》
<?xml version="1.0" encoding="UTF-8" ?>
<a>abc123一二三人口手猋犇囧</a>输出:
IWAV0055I Java Bean test.Dom4jTest started with the main method
s=<?xml version="1.0" encoding="UTF-8"?><a>abc123一二三人口手猋犇囧</a>
sorry,这我就不知道了。gb2312可能不包含这些字的编码,怎么弄实在想不出来啊
1.把xml以文本方式读到一个字符串里面。
2.把字符串第一个encoding="GB2312" 替换成 encoding="UTF-8"
3.再把这个字符串交给dom去以UTF-8的方式解析输出。具体我没试过,不知道能不能行哈。等下有空给你试下。
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;import org.dom4j.Document;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;public class Dom4jTest { public static void main(String[] args) throws Exception {
SAXReader saxReader = new SAXReader();
String fileName = "E:\\noname1.xml";
String saveFileName = "E:\\noname1_bak.xml";
//先读出来,替换掉字符串。要求原始文件一定要有<?xml version="1.0" encoding="UTF-8"?> 这行
//且encoding="GB2312" 必须连在一起中间没有空格。
String fileStr = Dom4jTest.readFile(fileName).replaceFirst("encoding=\"gb2312\"", "encoding=\"UTF-8\"");
byte[] save = fileStr.getBytes("UTF-8");
//以UTF-8方式写回另一个bak文件。原始文件和bak文件要不要删由你的需求定。
FileOutputStream file =new FileOutputStream(saveFileName);
file.write(save);
file.close();
//再以UTF-8解析。
Document document = saxReader.read(new File(saveFileName));
String s= doc2String(document);
System.err.println("s="+s);
}
/**
* doc2String 将XML文档内容转为String
*
* @return 字符串
* @param document
* @throws IOException
*/
public static String doc2String(Document document) throws IOException {
String s = "";
try {
//使用输出流来进行转化
ByteArrayOutputStream out = new ByteArrayOutputStream();
//使用GB2312编码
OutputFormat format = new OutputFormat(" ", true, "UTF-8");
XMLWriter writer = new XMLWriter(out, format);
writer.write(document);
s = out.toString("UTF-8");
} catch (Exception ex) {
ex.printStackTrace();
}
return s;
}
public static String readFile(String fileName) {
String ret = "";
File f = new File(fileName);
try {
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(f))); String line="";
while((line = br.readLine())!=null){
ret+=line;
}
br.close();
} catch (Exception e) {
ret = ""; }
return ret;
}
}
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;import org.dom4j.Document;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;public class Dom4jTest { public static void main(String[] args) throws Exception {
SAXReader saxReader = new SAXReader();
String fileName = "E:\\noname1.xml";
String bakFileName = "E:\\noname1_bak.xml";
String saveFileName = "E:\\aaa.xml";
String fileStr = Dom4jTest.readFile(fileName).replaceFirst(
"encoding=\"gb2312\"", "encoding=\"UTF-8\"");
byte[] save = fileStr.getBytes("UTF-8");
FileOutputStream file = new FileOutputStream(bakFileName);
file.write(save);
file.close();
Document document = saxReader.read(new File(bakFileName));
String s = doc2String(document);
Dom4jTest.saveFile(saveFileName, s.replaceFirst(
"encoding=\"UTF-8\"", "encoding=\"gb2312\""));
System.err.println("s=" + s);
} public static void saveFile(String fileName, String contents)
throws IOException {
File f = new File(fileName); if (!f.createNewFile()) {
throw new IOException("file create failure...");
} try {
BufferedWriter output = new BufferedWriter(new FileWriter(f));
output.write(contents);
output.close();
} catch (Exception e) {
throw new IOException(e);
}
} /**
* doc2String 将XML文档内容转为String
*
* @return 字符串
* @param document
* @throws IOException
*/
public static String doc2String(Document document) throws IOException {
String s = "";
try {
// 使用输出流来进行转化
ByteArrayOutputStream out = new ByteArrayOutputStream();
// 使用GB2312编码
OutputFormat format = new OutputFormat(" ", true, "UTF-8");
XMLWriter writer = new XMLWriter(out, format);
writer.write(document);
s = out.toString("UTF-8");
} catch (Exception ex) {
ex.printStackTrace();
}
return s;
} public static String readFile(String fileName) {
String ret = "";
File f = new File(fileName);
try {
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(f))); String line = "";
while ((line = br.readLine()) != null) {
ret += line;
} br.close();
} catch (Exception e) {
ret = ""; }
return ret;
}}