public class UnicodeTest implements java.io.Serializable { public UnicodeTest() {
} public static String toHEXString(byte b) {
return ("" + "0123456789ABCDEF".charAt(0xf & b >> 4) + "0123456789ABCDEF"
.charAt(b & 0xf));
} public static String getUnicode(String setStr) {
String szRet = "";
String szRetTmp = null;
String str = null;
try {
try {
str = new String(setStr.getBytes("ISO-8859-1"), "GBK");
} catch (Exception ex) {
ex.printStackTrace();
}
byte[] bb = str.getBytes("UTF16");
for (int i = 2; i < bb.length; i++) {
i++;
if (bb[i - 1] != 0)
szRetTmp = "&#x" + toHEXString(bb[i - 1])
+ toHEXString(bb[i]) + ";";
else
szRetTmp = new String(bb, i, 1, "GBK");
szRet = szRet + szRetTmp;
}
} catch (Exception ex) {
szRet = "Sorry Convert2Unicode method fail!";
ex.printStackTrace();
}
return szRet;
} public static String getUnicode(String setStr, String encoding) {
String str = null;
try {
str = new String(setStr.getBytes(encoding), "ISO-8859-1");
} catch (Exception ex) {
ex.printStackTrace();
}
return getUnicode(str);
}
}在网上找的一段得到字符的unicode代码,有点看不懂,特意问一下
在getUnicode(String setStr, String encoding)方法里,为什么先把原来的encoding格式先转化为ISO-8859-1,在
getUnicode(String setStr)又要把ISO-8859-1格式转化为GBK干嘛啊?
在这里
i++;
if (bb[i - 1] != 0)
szRetTmp = "&#x" + toHEXString(bb[i - 1])
+ toHEXString(bb[i]) + ";";
else
szRetTmp = new String(bb, i, 1, "GBK");
为什么先要自加1,先从2开始呢,还有这里的if和else分别是代表什么呢
} public static String toHEXString(byte b) {
return ("" + "0123456789ABCDEF".charAt(0xf & b >> 4) + "0123456789ABCDEF"
.charAt(b & 0xf));
} public static String getUnicode(String setStr) {
String szRet = "";
String szRetTmp = null;
String str = null;
try {
try {
str = new String(setStr.getBytes("ISO-8859-1"), "GBK");
} catch (Exception ex) {
ex.printStackTrace();
}
byte[] bb = str.getBytes("UTF16");
for (int i = 2; i < bb.length; i++) {
i++;
if (bb[i - 1] != 0)
szRetTmp = "&#x" + toHEXString(bb[i - 1])
+ toHEXString(bb[i]) + ";";
else
szRetTmp = new String(bb, i, 1, "GBK");
szRet = szRet + szRetTmp;
}
} catch (Exception ex) {
szRet = "Sorry Convert2Unicode method fail!";
ex.printStackTrace();
}
return szRet;
} public static String getUnicode(String setStr, String encoding) {
String str = null;
try {
str = new String(setStr.getBytes(encoding), "ISO-8859-1");
} catch (Exception ex) {
ex.printStackTrace();
}
return getUnicode(str);
}
}在网上找的一段得到字符的unicode代码,有点看不懂,特意问一下
在getUnicode(String setStr, String encoding)方法里,为什么先把原来的encoding格式先转化为ISO-8859-1,在
getUnicode(String setStr)又要把ISO-8859-1格式转化为GBK干嘛啊?
在这里
i++;
if (bb[i - 1] != 0)
szRetTmp = "&#x" + toHEXString(bb[i - 1])
+ toHEXString(bb[i]) + ";";
else
szRetTmp = new String(bb, i, 1, "GBK");
为什么先要自加1,先从2开始呢,还有这里的if和else分别是代表什么呢
try {这种样子的。像:
public static String toHEXString(byte b) {
return ("" + "0123456789ABCDEF".charAt(0xf & b > > 4) + "0123456789ABCDEF"
.charAt(b & 0xf));
}
这段代码,改为:private final static char[] hexChar = "0123456789ABCDEF".toCharArray();public static String toHEXString(byte b) {
return ("" + hexChar[0xf & b >> 4] + hexChar[b & 0xf]);
} 这样更好一些的。