gbk 转 utf8 问题

用java程序将GBK字符转成UTF-8编码格式的代码2008-07-11 17:40package test;
import java.io.UnsupportedEncodingException;
/**
* 用java程序将GBK字符转成UTF-8编码格式的代码
* 对字符串要一个字符一个字符的转.
* 下面以第一个字符为例讲解程序的运行过程:
* 1.从字符串中获得单个字符;
* 2.将字符转成以二进制形式表示的字符串(可以理解成将字符转成整形,再转成二进制,再转成字符串表现.)
* 3.new一个StringBuffer,并把上一步得到的字符串赋于该StringBuffer,如果长度不到16位,则左边用"0"补齐.
* 4.在得到的StringBuffer的第0个下标的位置插入"1110";
* 5.再在经过第四步之后得到的StringBuffer的第8个下标的位置插入"10";
* 6.然后在经过第五步之后得到的StringBuffer的第16个下标的位置插入"10";
* 7.将经过第六步之后得到的结果转换成String型的,并按8位的长度成字三个字符串,存入一个字符数组中.
* 8.把字符数组转换成字节数组;
* 9.用String的构造方法转成UTF-8编码
*/
public class GBK2UTF8 {
public static void main(String args[]) {
String mystr="要转换的字符";
int length = mystr.length();
String[] s = new String[length * 3];
byte[] b = new byte[length * 3];
for (int i = 0; i < length; i++) {
   char c = mystr.charAt(i);
   String str = Integer.toBinaryString(c);
   int l = 16 - str.length();
   StringBuffer sb = new StringBuffer();
   for (int j = 0; j < l; j++) {
    sb.append("0");
   }
   sb.append(str);
   sb.insert(0, "1110");
   sb.insert(8, "10");
   sb.insert(16, "10");
   String str2 = sb.toString();
   s[i * 3] = str2.substring(0, 8);
   s[i * 3 + 1] = str2.substring(8, 16);
   s[i * 3 + 2] = str2.substring(16);
}
for (int i = 0; i < s.length; i++) {
   b[i] = Integer.valueOf(s[i], 2).byteValue();
}
try {
   String string = new String(b, "utf-8");
   System.out.println(string);
} catch (UnsupportedEncodingException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
}
}
}

网上还看到了这个另外那个是   public String convertString(String gbk){
        String utf8 = "";
        try {
            utf8 = new String(gbk2utf8(gbk),"UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return utf8;
    }    public byte[] gbk2utf8(String chenese) {
        char c[] = chenese.toCharArray();
        byte[] fullByte = new byte[3 * c.length];
        for (int i = 0; i < c.length; i++) {
            int m = (int) c[i];
            String word = Integer.toBinaryString(m);            StringBuffer sb = new StringBuffer();
            int len = 16 - word.length();
            for (int j = 0; j < len; j++) {
                sb.append("0");
            }
            sb.append(word);
            sb.insert(0, "1110");
            sb.insert(8, "10");
            sb.insert(16, "10");            String s1 = sb.substring(0, 8);
            String s2 = sb.substring(8, 16);
            String s3 = sb.substring(16);            byte b0 = Integer.valueOf(s1, 2).byteValue();
            byte b1 = Integer.valueOf(s2, 2).byteValue();
            byte b2 = Integer.valueOf(s3, 2).byteValue();
            byte[] bf = new byte[3];
            bf[0] = b0;
            fullByte[i * 3] = bf[0];
            bf[1] = b1;
            fullByte[i * 3 + 1] = bf[1];
            bf[2] = b2;
            fullByte[i * 3 + 2] = bf[2];        }
        return fullByte;
    }
结果都是不行..................

是啊为什么不写个filter 呢？总比这样一次次的转换要强的多

恩，上述说写个filter是正确的，网上有很多这样的源码！

<filter>
<filter-name>CharacterEncodingFilter</filter-name>
<filter-class>
org.springframework.web.filter.CharacterEncodingFilter
</filter-class>
<init-param>
<param-name>encoding</param-name>
<param-value>GBK</param-value>
</init-param>
<init-param>
<param-name>forceEncoding</param-name>
<param-value>true</param-value>
</init-param>
</filter>
<filter-mapping>
<filter-name>CharacterEncodingFilter</filter-name>
<url-pattern>*.do</url-pattern>
</filter-mapping>
<filter-mapping>
<filter-name>CharacterEncodingFilter</filter-name>
<url-pattern>*.jsp</url-pattern>
</filter-mapping>看了一下项目的配置文件,貌似配置了 filter获取到的应该是 GBK 编码.在debug 模式下,"String b = getParameter("name");" 看到的是乱码 ,应该是java编码是utf-8 问题.
然后"b = new String(dish_id.getBytes("GBK"), "UTF-8");" 转换后能看到一部分,但是每串中文,如果是奇数个字,那最后那个字就是乱码.我想吧 b 的内容写到 excel 里面. 用直接获取的b 来写入excel,打开是乱码...... 转换后又有几个字是乱码,郁闷

用java.nio.charset来解决这种问题，先查一下getbytes("GBK")后的是不是一个汉字三个字节

10楼的兄弟那个是spring集成的CharacterEncodingFilter，如果不用spring的话就不行了我这倒是有一个characterEncodingFilter的类，看看成不成import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;/**
* Encoding Character to UTF-8
=================
*/ public class CharacterEncodingFilter
        implements Filter {    private FilterConfig config = null;
    // default to UTF-8
    private String targetEncoding = "UTF-8";    public void init(FilterConfig config) throws ServletException {
        this.config = config;
        this.targetEncoding = config.getInitParameter("encoding");
    }    public void destroy() {
        config = null;
        targetEncoding = null;
    }    public void doFilter(ServletRequest srequest, ServletResponse sresponse,
                         FilterChain chain) throws IOException, ServletException {        HttpServletRequest request = (HttpServletRequest) srequest;
        request.setCharacterEncoding(targetEncoding);        chain.doFilter(srequest, sresponse);
    }}

你可以在传递之前就先讲字符转换成utf8的形式啊，然后在serlvet里面再转回来，就保证不回出错了。
转换的方式跟b = new String(dish_id.getBytes("GBK"), "UTF-8");
差不多，只是转为byte型，就可以了

1.加了过滤器
2.你的MySQL里面的字符编码设置正确定义SetCharacterEncodingFilter类，实现Filter接口，代码如下：
import java.io.IOException;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;public class SetCharacterEncodingFilter implements Filter {
protected String encoding = null;
protected FilterConfig filterConfig = null;
protected boolean ignore = true;//destroy方法
public void destroy() {   this.encoding = null;
   this.filterConfig = null;}//选择设置使用的字符编码
public void doFilter(ServletRequest request, ServletResponse response,
    FilterChain chain) throws IOException, ServletException {   // Conditionally select and set the character encoding to be used
   if (ignore || (request.getCharacterEncoding() == null)) {
    String encoding = selectEncoding(request);
    if (encoding != null)
     request.setCharacterEncoding(encoding);
   }   // Pass control on to the next filter
   chain.doFilter(request, response);}//将这个filter放置在服务器中
public void init(FilterConfig filterConfig) throws ServletException {
   this.filterConfig = filterConfig;
   this.encoding = filterConfig.getInitParameter("encoding");
   String value = filterConfig.getInitParameter("ignore");
   if (value == null)
    this.ignore = true;
   else if (value.equalsIgnoreCase("true"))
    this.ignore = true;
   else if (value.equalsIgnoreCase("yes"))
    this.ignore = true;
   else
    this.ignore = false;}
//选择适当的字符编码
protected String selectEncoding(ServletRequest request) {

   return (this.encoding);}}
配置这个Filter，只需在web.xml文件中添加如下代码：<filter>
   <filter-name>Set Character Encoding</filter-name>
   <filter-class>com.wanglei.SetCharacterEncodingFilter</filter-class>
   <init-param>
    <param-name>encoding</param-name>
    <param-value>UTF-8</param-value>
   </init-param>
</filter>
   <filter-mapping>
   <filter-name>Set Character Encoding</filter-name>
   <url-pattern>/*</url-pattern>
</filter-mapping>

一个Filter搞定，不想写就用Spring的那个吧，呵呵

调试易

gbk 转 utf8 问题

解决方案 »