将GB2312的Html文件内容以UTF-8的方式输出时,中文字符是"????????????????" /**
* 获取GB2312编码文件内容
* @param filename 编码为GB2312的Html文件名
* @return
*/
public String getDocument(String filename){
StringBuffer htmlBuffer = new StringBuffer(); //字符缓冲区
String line = null; //一行的内容
//读取文件内容
try {
//打开文件
BufferedReader reader = new BufferedReader(new FileReader(filename));
//一行一行的读取文件内容
while ((line = reader.readLine()) != null){
// line = GBKToUTF(line);
htmlBuffer.append(line + "\n");//加行结束
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return htmlBuffer.toString();
}
/**
* 将GBK编码转换成UTF8编码
* @param source_gb 源GBK编码
* @return UTF8编码
*/
public String GBKToUTF(String source_gb){
String target_utf8 = null; //目标类型字符串
byte []bytesGBK = null; //源编码字节数组
//GBK转UTF8
try {
bytesGBK = source_gb.getBytes("GB2312"); //取源GB2312字符编码
target_utf8 = new String(bytesGBK, "UTF-8");//生成UTF8编码
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return target_utf8;
} String htmlText = getDocument(filename);
System.out.println(htmlText);
* 获取GB2312编码文件内容
* @param filename 编码为GB2312的Html文件名
* @return
*/
public String getDocument(String filename){
StringBuffer htmlBuffer = new StringBuffer(); //字符缓冲区
String line = null; //一行的内容
//读取文件内容
try {
//打开文件
BufferedReader reader = new BufferedReader(new FileReader(filename));
//一行一行的读取文件内容
while ((line = reader.readLine()) != null){
// line = GBKToUTF(line);
htmlBuffer.append(line + "\n");//加行结束
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return htmlBuffer.toString();
}
/**
* 将GBK编码转换成UTF8编码
* @param source_gb 源GBK编码
* @return UTF8编码
*/
public String GBKToUTF(String source_gb){
String target_utf8 = null; //目标类型字符串
byte []bytesGBK = null; //源编码字节数组
//GBK转UTF8
try {
bytesGBK = source_gb.getBytes("GB2312"); //取源GB2312字符编码
target_utf8 = new String(bytesGBK, "UTF-8");//生成UTF8编码
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return target_utf8;
} String htmlText = getDocument(filename);
System.out.println(htmlText);
Java内部的数据都是 Unicode 编码这里写错啦// 这一步是将unicode转换为"GB2312"的byte数组
bytesGBK = source_gb.getBytes("GB2312"); // 而你这里写的是将“gb2312”编码的byte数组,按照UTF-8解释当然错啦。
target_utf8 = new String(bytesGBK, "UTF-8");
对String或是char类型 只理解成 Unicode而你的
BufferedReader reader = new BufferedReader(new FileReader(filename));
是按照本地编码(GBK)去读的,并没有指定编码。line = reader.readLine())是读取行,并给line,这里就转成unicode了。所以Java的String只能有一种类型,那就是unicode记住Java对其内部的String,char是按照Unicode去理解的。
target_utf8 = new String(line.getBytes("GB2312"), "UTF-8");