要把GB2312或BIG5转换成unicode 得用:
unicodeString = new String(myString.getBytes(), "GB2312");
或
unicodeString = new String(myString.getBytes(), "Big5");
unicodeString = new String(myString.getBytes(), "GB2312");
或
unicodeString = new String(myString.getBytes(), "Big5");
static public String convertUTF8String2Unicode(String instr)
throws IOException {
//byte[] strbytes = instr.getBytes();
int charindex = instr.length();
int actualValue;
int inputValue;
StringBuffer sbtemp = new StringBuffer();for (int i = 0; i < charindex;) {actualValue = -1;
inputValue = instr.charAt(i++);inputValue &= 0xff; if ((inputValue & 0x80) == 0) {
actualValue = inputValue;
}
else if ((inputValue & 0xF8) == 0xF0) {
actualValue = (inputValue & 0x1f) << 18; int nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F) << 12; nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F) << 6; nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F);
}
else if ((inputValue & 0xF0) == 0xE0) {
actualValue = (inputValue & 0x1f) << 12; int nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F) << 6; nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F);
}
else if ((inputValue & 0xE0) == 0xC0) {
actualValue = (inputValue & 0x1f) << 6; int nextByte = instr.charAt(i++) & 0xff;
if ((nextByte & 0xC0) != 0x80)
throw new IOException("Invalid UTF-8 format");
actualValue += (nextByte & 0x3F);
}
sbtemp.append((char) actualValue);
} return sbtemp.toString();
}public static byte[] convertUnicode2UTF8Byte(String instr) {
int len = instr.length();
byte[] abyte = new byte[len << 2];
int j = 0;
for (int i = 0; i < len; i++) {
char c = instr.charAt(i);if (c < 0x80) {
abyte[j++] = (byte) c;
}
else if (c < 0x0800) {
abyte[j++] = (byte) (((c >> 6) & 0x1F) | 0xC0);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
}
else if (c < 0x010000) {
abyte[j++] = (byte) (((c >> 12) & 0x0F) | 0xE0);
abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
}
else if (c < 0x200000) {
abyte[j++] = (byte) (((c >> 18) & 0x07) | 0xF8);
abyte[j++] = (byte) (((c >> 12) & 0x3F) | 0x80);
abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
}
}byte[] retbyte = new byte[j];
for (int i = 0; i < j; i++) {
retbyte[i] = abyte[i];
}
return retbyte;
}
public static String ISO106462Unicode(byte[] myByte){
String result=new String("");
StringBuffer sb = new StringBuffer("");
try
{
/*将字符串转换成byte数组*/
//byte[] myByte= str.getBytes("ISO10646"); int len = myByte.length; for(int i=0;i < len;i=i+2)
{
byte hiByte=myByte[i];
byte loByte=myByte[i+1]; int ch =(int)hiByte << 8;
ch = ch & 0xff00;
ch +=(int)loByte & 0xff; sb.append((char)ch);
} result = new String(sb.toString()); }
catch(Exception e)
{
System.out.println("Encoding Error");
}
return result;
}public static byte[] Unicode2Byte(String s)
{
int len = s.length();
byte abyte[] = new byte[len << 1];
int j = 0;
for(int i = 0; i < len; i++)
{
char c = s.charAt(i);
abyte[j++] = (byte)(c & 0xff);
abyte[j++] = (byte)(c >> 8);
}return abyte;
}
}
* UnicodeTest.java
*
* Created on July 29, 2003, 12:59 PM
*//**
*
* @author abc
* @version
*/public class UnicodeTest
{ public static void main(String args[])
{
UnicodeTest UT = new UnicodeTest();
UT.test1();
} public void test1()
{
String str = "测试信息abc123";
try
{
byte[] b = str.getBytes("GBK");
System.out.println(str + " -(GBK)编码: " + bytesToHexStr(b));
System.out.println(""); str = new String(b, "GBK");
System.out.println("从GBK编码 " + bytesToHexStr(b) + " 重新转换为字串: " + str);
System.out.println(""); b = str.getBytes("UnicodeBigUned");
System.out.println(str + " -(UCS2)编码: " + bytesToHexStr(b));
System.out.println(""); str = new String(b, "UnicodeBigUned");
System.out.println("从(UCS2)编码 " + bytesToHexStr(b) + " 重新转换为字串: " + str);
System.out.println(""); b = str.getBytes("ASCII");
System.out.println(str + " -(ASCII)编码: " + bytesToHexStr(b));
System.out.println("");
}
catch(Exception e){}
} private String bytesToHexStr(byte[] b)
{
if (b == null) return "";
StringBuffer strBuffer = new StringBuffer(b.length * 3);
for(int i = 0; i < b.length; i++)
{
strBuffer.append(Integer.toHexString(b[i] & 0xff));
strBuffer.append(" ");
}
return strBuffer.toString();
}}运行此小程序的输出结果是:
测试信息abc123 -(GBK)编码: b2 e2 ca d4 d0 c5 cf a2 61 62 63 31 32 33从GBK编码 b2 e2 ca d4 d0 c5 cf a2 61 62 63 31 32 33 重新转换为字串: 测试信息abc123测试信息abc123 -(UCS2)编码: 6d 4b 8b d5 4f e1 60 6f 0 61 0 62 0 63 0 31 0 32 0 33从(UCS2)编码 6d 4b 8b d5 4f e1 60 6f 0 61 0 62 0 63 0 31 0 32 0 33 重新转换为字串: 测试信息abc123测试信息abc123 -(ASCII)编码: 3f 3f 3f 3f 61 62 63 31 32 33