gbk 转 utf8 问题 http://topic.csdn.net/u/20080807/15/f7cec80d-c6a2-4cca-9d42-17290b937831.html?seed=627059027参考了 这个文章 觉得最后那个的思路是对的,但结果还是一样.....求救 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 用java程序将GBK字符转成UTF-8编码格式的代码2008-07-11 17:40package test; import java.io.UnsupportedEncodingException; /** * 用java程序将GBK字符转成UTF-8编码格式的代码 * 对字符串要一个字符一个字符的转. * 下面以第一个字符为例讲解程序的运行过程: * 1.从字符串中获得单个字符; * 2.将字符转成以二进制形式表示的字符串(可以理解成将字符转成整形,再转成二进制,再转成字符串表现.) * 3.new一个StringBuffer,并把上一步得到的字符串赋于该StringBuffer,如果长度不到16位,则左边用"0"补齐. * 4.在得到的StringBuffer的第0个下标的位置插入"1110"; * 5.再在经过第四步之后得到的StringBuffer的第8个下标的位置插入"10"; * 6.然后在经过第五步之后得到的StringBuffer的第16个下标的位置插入"10"; * 7.将经过第六步之后得到的结果转换成String型的,并按8位的长度成字三个字符串,存入一个字符数组中. * 8.把字符数组转换成字节数组; * 9.用String的构造方法转成UTF-8编码 */ public class GBK2UTF8 { public static void main(String args[]) { String mystr="要转换的字符";int length = mystr.length(); String[] s = new String[length * 3]; byte[] b = new byte[length * 3]; for (int i = 0; i < length; i++) { char c = mystr.charAt(i); String str = Integer.toBinaryString(c); int l = 16 - str.length(); StringBuffer sb = new StringBuffer(); for (int j = 0; j < l; j++) { sb.append("0"); } sb.append(str); sb.insert(0, "1110"); sb.insert(8, "10"); sb.insert(16, "10"); String str2 = sb.toString(); s[i * 3] = str2.substring(0, 8); s[i * 3 + 1] = str2.substring(8, 16); s[i * 3 + 2] = str2.substring(16); } for (int i = 0; i < s.length; i++) { b[i] = Integer.valueOf(s[i], 2).byteValue(); } try { String string = new String(b, "utf-8"); System.out.println(string); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } 网上还看到了 这个另外那个是 public String convertString(String gbk){ String utf8 = ""; try { utf8 = new String(gbk2utf8(gbk),"UTF-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return utf8; } public byte[] gbk2utf8(String chenese) { char c[] = chenese.toCharArray(); byte[] fullByte = new byte[3 * c.length]; for (int i = 0; i < c.length; i++) { int m = (int) c[i]; String word = Integer.toBinaryString(m); StringBuffer sb = new StringBuffer(); int len = 16 - word.length(); for (int j = 0; j < len; j++) { sb.append("0"); } sb.append(word); sb.insert(0, "1110"); sb.insert(8, "10"); sb.insert(16, "10"); String s1 = sb.substring(0, 8); String s2 = sb.substring(8, 16); String s3 = sb.substring(16); byte b0 = Integer.valueOf(s1, 2).byteValue(); byte b1 = Integer.valueOf(s2, 2).byteValue(); byte b2 = Integer.valueOf(s3, 2).byteValue(); byte[] bf = new byte[3]; bf[0] = b0; fullByte[i * 3] = bf[0]; bf[1] = b1; fullByte[i * 3 + 1] = bf[1]; bf[2] = b2; fullByte[i * 3 + 2] = bf[2]; } return fullByte; }结果都是不行.................. 是啊 为什么不写个filter 呢? 总比这样一次次的转换要强的多 恩,上述说写个filter是正确的,网上有很多这样的源码! <!-- 字符集过滤器 --> <filter> <filter-name>CharacterEncodingFilter</filter-name> <filter-class> org.springframework.web.filter.CharacterEncodingFilter </filter-class> <init-param> <param-name>encoding</param-name> <param-value>GBK</param-value> </init-param> <init-param> <param-name>forceEncoding</param-name> <param-value>true</param-value> </init-param> </filter> <filter-mapping> <filter-name>CharacterEncodingFilter</filter-name> <url-pattern>*.do</url-pattern> </filter-mapping> <filter-mapping> <filter-name>CharacterEncodingFilter</filter-name> <url-pattern>*.jsp</url-pattern> </filter-mapping>看了一下项目的配置文件,貌似 配置了 filter获取到的 应该是 GBK 编码.在debug 模式下,"String b = getParameter("name");" 看到的是乱码 ,应该是java编码是utf-8 问题.然后"b = new String(dish_id.getBytes("GBK"), "UTF-8");" 转换后 能看到一部分,但是每串中文,如果是奇数个字,那最后那个字就是 乱码.我想吧 b 的内容写到 excel 里面. 用直接获取的b 来写入excel,打开是乱码...... 转换后又有几个字是乱码,郁闷 用java.nio.charset来解决这种问题,先查一下getbytes("GBK")后的是不是一个汉字三个字节 10楼的兄弟那个是spring集成的CharacterEncodingFilter,如果不用spring的话就不行了我这倒是有一个characterEncodingFilter的类,看看成不成import javax.servlet.*;import javax.servlet.http.HttpServletRequest;import java.io.IOException;/** * Encoding Character to UTF-8================= */ public class CharacterEncodingFilter implements Filter { private FilterConfig config = null; // default to UTF-8 private String targetEncoding = "UTF-8"; public void init(FilterConfig config) throws ServletException { this.config = config; this.targetEncoding = config.getInitParameter("encoding"); } public void destroy() { config = null; targetEncoding = null; } public void doFilter(ServletRequest srequest, ServletResponse sresponse, FilterChain chain) throws IOException, ServletException { HttpServletRequest request = (HttpServletRequest) srequest; request.setCharacterEncoding(targetEncoding); chain.doFilter(srequest, sresponse); }} 你可以在传递之前就先讲字符转换成utf8的形式啊,然后在serlvet里面再转回来,就保证不回出错了。转换的方式跟b = new String(dish_id.getBytes("GBK"), "UTF-8"); 差不多,只是转为byte型,就可以了 1.加了过滤器2.你的MySQL里面的字符编码设置正确定义SetCharacterEncodingFilter类,实现Filter接口,代码如下:import java.io.IOException;import javax.servlet.Filter;import javax.servlet.FilterChain;import javax.servlet.FilterConfig;import javax.servlet.ServletException;import javax.servlet.ServletRequest;import javax.servlet.ServletResponse;public class SetCharacterEncodingFilter implements Filter {protected String encoding = null;protected FilterConfig filterConfig = null;protected boolean ignore = true;//destroy方法public void destroy() { this.encoding = null; this.filterConfig = null;}//选择设置使用的字符编码public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { // Conditionally select and set the character encoding to be used if (ignore || (request.getCharacterEncoding() == null)) { String encoding = selectEncoding(request); if (encoding != null) request.setCharacterEncoding(encoding); } // Pass control on to the next filter chain.doFilter(request, response);}//将这个filter放置在服务器中public void init(FilterConfig filterConfig) throws ServletException { this.filterConfig = filterConfig; this.encoding = filterConfig.getInitParameter("encoding"); String value = filterConfig.getInitParameter("ignore"); if (value == null) this.ignore = true; else if (value.equalsIgnoreCase("true")) this.ignore = true; else if (value.equalsIgnoreCase("yes")) this.ignore = true; else this.ignore = false;}//选择适当的字符编码protected String selectEncoding(ServletRequest request) { return (this.encoding);}}配置这个Filter,只需在web.xml文件中添加如下代码:<filter> <filter-name>Set Character Encoding</filter-name> <filter-class>com.wanglei.SetCharacterEncodingFilter</filter-class> <init-param> <param-name>encoding</param-name> <param-value>UTF-8</param-value> </init-param></filter> <filter-mapping> <filter-name>Set Character Encoding</filter-name> <url-pattern>/*</url-pattern></filter-mapping> 一个Filter搞定,不想写就用Spring的那个吧,呵呵 ldap 用户验证失败,在线等急急急!!! Struts action相应的问题 iReport与JasperReport出PDF问题 日期数据从JSP到PO影射时,发生的错误 jtable如何控制某列的数据精度,比如只保留两位小数 一个STRUTS的问题。。。请大家一定进来看看 请问JDO和JDBC之间有什么关系? 请大家帮忙看一下吧,小问题,关于jdk1.4.2的安装 关于ejb的配置问题 重金寻求答案 问一个java的问题 现在java用什么技术较多?
import java.io.UnsupportedEncodingException;
/**
* 用java程序将GBK字符转成UTF-8编码格式的代码
* 对字符串要一个字符一个字符的转.
* 下面以第一个字符为例讲解程序的运行过程:
* 1.从字符串中获得单个字符;
* 2.将字符转成以二进制形式表示的字符串(可以理解成将字符转成整形,再转成二进制,再转成字符串表现.)
* 3.new一个StringBuffer,并把上一步得到的字符串赋于该StringBuffer,如果长度不到16位,则左边用"0"补齐.
* 4.在得到的StringBuffer的第0个下标的位置插入"1110";
* 5.再在经过第四步之后得到的StringBuffer的第8个下标的位置插入"10";
* 6.然后在经过第五步之后得到的StringBuffer的第16个下标的位置插入"10";
* 7.将经过第六步之后得到的结果转换成String型的,并按8位的长度成字三个字符串,存入一个字符数组中.
* 8.把字符数组转换成字节数组;
* 9.用String的构造方法转成UTF-8编码
*/
public class GBK2UTF8 {
public static void main(String args[]) {
String mystr="要转换的字符";
int length = mystr.length();
String[] s = new String[length * 3];
byte[] b = new byte[length * 3];
for (int i = 0; i < length; i++) {
char c = mystr.charAt(i);
String str = Integer.toBinaryString(c);
int l = 16 - str.length();
StringBuffer sb = new StringBuffer();
for (int j = 0; j < l; j++) {
sb.append("0");
}
sb.append(str);
sb.insert(0, "1110");
sb.insert(8, "10");
sb.insert(16, "10");
String str2 = sb.toString();
s[i * 3] = str2.substring(0, 8);
s[i * 3 + 1] = str2.substring(8, 16);
s[i * 3 + 2] = str2.substring(16);
}
for (int i = 0; i < s.length; i++) {
b[i] = Integer.valueOf(s[i], 2).byteValue();
}
try {
String string = new String(b, "utf-8");
System.out.println(string);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
网上还看到了 这个另外那个是 public String convertString(String gbk){
String utf8 = "";
try {
utf8 = new String(gbk2utf8(gbk),"UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return utf8;
} public byte[] gbk2utf8(String chenese) {
char c[] = chenese.toCharArray();
byte[] fullByte = new byte[3 * c.length];
for (int i = 0; i < c.length; i++) {
int m = (int) c[i];
String word = Integer.toBinaryString(m); StringBuffer sb = new StringBuffer();
int len = 16 - word.length();
for (int j = 0; j < len; j++) {
sb.append("0");
}
sb.append(word);
sb.insert(0, "1110");
sb.insert(8, "10");
sb.insert(16, "10"); String s1 = sb.substring(0, 8);
String s2 = sb.substring(8, 16);
String s3 = sb.substring(16); byte b0 = Integer.valueOf(s1, 2).byteValue();
byte b1 = Integer.valueOf(s2, 2).byteValue();
byte b2 = Integer.valueOf(s3, 2).byteValue();
byte[] bf = new byte[3];
bf[0] = b0;
fullByte[i * 3] = bf[0];
bf[1] = b1;
fullByte[i * 3 + 1] = bf[1];
bf[2] = b2;
fullByte[i * 3 + 2] = bf[2]; }
return fullByte;
}
结果都是不行..................
<filter>
<filter-name>CharacterEncodingFilter</filter-name>
<filter-class>
org.springframework.web.filter.CharacterEncodingFilter
</filter-class>
<init-param>
<param-name>encoding</param-name>
<param-value>GBK</param-value>
</init-param>
<init-param>
<param-name>forceEncoding</param-name>
<param-value>true</param-value>
</init-param>
</filter>
<filter-mapping>
<filter-name>CharacterEncodingFilter</filter-name>
<url-pattern>*.do</url-pattern>
</filter-mapping>
<filter-mapping>
<filter-name>CharacterEncodingFilter</filter-name>
<url-pattern>*.jsp</url-pattern>
</filter-mapping>看了一下项目的配置文件,貌似 配置了 filter获取到的 应该是 GBK 编码.在debug 模式下,"String b = getParameter("name");" 看到的是乱码 ,应该是java编码是utf-8 问题.
然后"b = new String(dish_id.getBytes("GBK"), "UTF-8");" 转换后 能看到一部分,但是每串中文,如果是奇数个字,那最后那个字就是 乱码.我想吧 b 的内容写到 excel 里面. 用直接获取的b 来写入excel,打开是乱码...... 转换后又有几个字是乱码,郁闷
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;/**
* Encoding Character to UTF-8
=================
*/ public class CharacterEncodingFilter
implements Filter { private FilterConfig config = null;
// default to UTF-8
private String targetEncoding = "UTF-8"; public void init(FilterConfig config) throws ServletException {
this.config = config;
this.targetEncoding = config.getInitParameter("encoding");
} public void destroy() {
config = null;
targetEncoding = null;
} public void doFilter(ServletRequest srequest, ServletResponse sresponse,
FilterChain chain) throws IOException, ServletException { HttpServletRequest request = (HttpServletRequest) srequest;
request.setCharacterEncoding(targetEncoding); chain.doFilter(srequest, sresponse);
}}
转换的方式跟b = new String(dish_id.getBytes("GBK"), "UTF-8");
差不多,只是转为byte型,就可以了
2.你的MySQL里面的字符编码设置正确定义SetCharacterEncodingFilter类,实现Filter接口,代码如下:
import java.io.IOException;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;public class SetCharacterEncodingFilter implements Filter {
protected String encoding = null;
protected FilterConfig filterConfig = null;
protected boolean ignore = true;//destroy方法
public void destroy() { this.encoding = null;
this.filterConfig = null;}//选择设置使用的字符编码
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException { // Conditionally select and set the character encoding to be used
if (ignore || (request.getCharacterEncoding() == null)) {
String encoding = selectEncoding(request);
if (encoding != null)
request.setCharacterEncoding(encoding);
} // Pass control on to the next filter
chain.doFilter(request, response);}//将这个filter放置在服务器中
public void init(FilterConfig filterConfig) throws ServletException {
this.filterConfig = filterConfig;
this.encoding = filterConfig.getInitParameter("encoding");
String value = filterConfig.getInitParameter("ignore");
if (value == null)
this.ignore = true;
else if (value.equalsIgnoreCase("true"))
this.ignore = true;
else if (value.equalsIgnoreCase("yes"))
this.ignore = true;
else
this.ignore = false;}
//选择适当的字符编码
protected String selectEncoding(ServletRequest request) {
return (this.encoding);}}
配置这个Filter,只需在web.xml文件中添加如下代码:<filter>
<filter-name>Set Character Encoding</filter-name>
<filter-class>com.wanglei.SetCharacterEncodingFilter</filter-class>
<init-param>
<param-name>encoding</param-name>
<param-value>UTF-8</param-value>
</init-param>
</filter>
<filter-mapping>
<filter-name>Set Character Encoding</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>