java 怎么样读取一个字符的Unicode 值?

要把GB2312或BIG5转换成unicode 得用：
unicodeString = new String(myString.getBytes(), "GB2312");
或
unicodeString = new String(myString.getBytes(), "Big5");

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

class transCN{
static public String convertUTF8String2Unicode(String instr)
throws IOException {
//byte[] strbytes = instr.getBytes();
int charindex = instr.length();
int actualValue;
int inputValue;
StringBuffer sbtemp = new StringBuffer();for (int i = 0; i < charindex;) {actualValue = -1;
inputValue = instr.charAt(i++);inputValue &= 0xff;   if ((inputValue & 0x80) == 0) {
       actualValue = inputValue;
   }
   else if ((inputValue & 0xF8) == 0xF0) {
       actualValue = (inputValue & 0x1f) << 18;       int nextByte = instr.charAt(i++) & 0xff;
       if ((nextByte & 0xC0) != 0x80)
           throw new IOException("Invalid UTF-8 format");
       actualValue += (nextByte & 0x3F) << 12;       nextByte = instr.charAt(i++) & 0xff;
       if ((nextByte & 0xC0) != 0x80)
           throw new IOException("Invalid UTF-8 format");
       actualValue += (nextByte & 0x3F) << 6;       nextByte = instr.charAt(i++) & 0xff;
       if ((nextByte & 0xC0) != 0x80)
       throw new IOException("Invalid UTF-8 format");
       actualValue += (nextByte & 0x3F);
       }
       else if ((inputValue & 0xF0) == 0xE0) {
       actualValue = (inputValue & 0x1f) << 12;       int nextByte = instr.charAt(i++) & 0xff;
       if ((nextByte & 0xC0) != 0x80)
           throw new IOException("Invalid UTF-8 format");
       actualValue += (nextByte & 0x3F) << 6;   nextByte = instr.charAt(i++) & 0xff;
   if ((nextByte & 0xC0) != 0x80)
       throw new IOException("Invalid UTF-8 format");
   actualValue += (nextByte & 0x3F);
   }
   else if ((inputValue & 0xE0) == 0xC0) {
   actualValue = (inputValue & 0x1f) << 6;   int nextByte = instr.charAt(i++) & 0xff;
   if ((nextByte & 0xC0) != 0x80)
   throw new IOException("Invalid UTF-8 format");
   actualValue += (nextByte & 0x3F);
   }
   sbtemp.append((char) actualValue);
   }   return sbtemp.toString();
   }public static byte[] convertUnicode2UTF8Byte(String instr) {
int len = instr.length();
byte[] abyte = new byte[len << 2];
int j = 0;
for (int i = 0; i < len; i++) {
char c = instr.charAt(i);if (c < 0x80) {
abyte[j++] = (byte) c;
}
else if (c < 0x0800) {
abyte[j++] = (byte) (((c >> 6) & 0x1F) | 0xC0);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
}
else if (c < 0x010000) {
abyte[j++] = (byte) (((c >> 12) & 0x0F) | 0xE0);
abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
}
else if (c < 0x200000) {
abyte[j++] = (byte) (((c >> 18) & 0x07) | 0xF8);
abyte[j++] = (byte) (((c >> 12) & 0x3F) | 0x80);
abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
abyte[j++] = (byte) ((c & 0x3F) | 0x80);
}
}byte[] retbyte = new byte[j];
for (int i = 0; i < j; i++) {
retbyte[i] = abyte[i];
}
return retbyte;
}

public static String ISO106462Unicode(byte[] myByte){
String result=new String("");

StringBuffer sb = new StringBuffer("");
try
{
/*将字符串转换成byte数组*/
   //byte[] myByte= str.getBytes("ISO10646");   int len = myByte.length;   for(int i=0;i < len;i=i+2)
   {
     byte hiByte=myByte[i];
     byte loByte=myByte[i+1];     int ch =(int)hiByte << 8;
      ch = ch & 0xff00;
      ch +=(int)loByte & 0xff;      sb.append((char)ch);
   }   result = new String(sb.toString());   }
   catch(Exception e)
   {
     System.out.println("Encoding Error");
   }
return result;
}public static byte[] Unicode2Byte(String s)
{
int len = s.length();
byte abyte[] = new byte[len << 1];
int j = 0;
for(int i = 0; i < len; i++)
{
char c = s.charAt(i);
abyte[j++] = (byte)(c & 0xff);
abyte[j++] = (byte)(c >> 8);
}return abyte;
}
}
/*
* UnicodeTest.java
*
* Created on July 29, 2003, 12:59 PM
*//**
  *
  * @author  abc
  * @version
  */public class UnicodeTest
{   public static void main(String args[])
   {
      UnicodeTest  UT = new UnicodeTest();
      UT.test1();
   }   public void test1()
   {
      String str = "测试信息abc123";
      try
      {
         byte[] b = str.getBytes("GBK");
         System.out.println(str + " -(GBK)编码: " + bytesToHexStr(b));
         System.out.println("");         str = new String(b, "GBK");
         System.out.println("从GBK编码 "  + bytesToHexStr(b) + " 重新转换为字串: " + str);
         System.out.println("");         b = str.getBytes("UnicodeBigUned");
         System.out.println(str + " -(UCS2)编码: " + bytesToHexStr(b));
         System.out.println("");         str = new String(b, "UnicodeBigUned");
         System.out.println("从(UCS2)编码 "  + bytesToHexStr(b) + " 重新转换为字串: " + str);
         System.out.println("");         b = str.getBytes("ASCII");
         System.out.println(str + " -(ASCII)编码: " + bytesToHexStr(b));
         System.out.println("");
      }
      catch(Exception e){}
   }   private String bytesToHexStr(byte[] b)
   {
      if (b == null) return "";
      StringBuffer strBuffer = new StringBuffer(b.length * 3);
      for(int i = 0; i < b.length; i++)
      {
         strBuffer.append(Integer.toHexString(b[i] & 0xff));
         strBuffer.append(" ");
      }
      return strBuffer.toString();
   }}运行此小程序的输出结果是：
测试信息abc123 -(GBK)编码: b2 e2 ca d4 d0 c5 cf a2 61 62 63 31 32 33从GBK编码 b2 e2 ca d4 d0 c5 cf a2 61 62 63 31 32 33  重新转换为字串: 测试信息abc123测试信息abc123 -(UCS2)编码: 6d 4b 8b d5 4f e1 60 6f 0 61 0 62 0 63 0 31 0 32 0 33从(UCS2)编码 6d 4b 8b d5 4f e1 60 6f 0 61 0 62 0 63 0 31 0 32 0 33  重新转换为字串: 测试信息abc123测试信息abc123 -(ASCII)编码: 3f 3f 3f 3f 61 62 63 31 32 33