中文转ASCII?? String s = "abc啊"; byte[] b = s.getBytes(); char[] c = new char[b.length]; for(int i = 0 ; i < c.length ; i++) { c[i] = (char)b[i]; } 有什么意义啊,看都看不懂??
#这是gb2312.txt 文件的部分内容 #(0x[a-fA-F0-9]+)=(0x[a-fA-F0-9]+)(\-[a-zA-Z ]+) # Format: Three tab-separated columns # Column #1 is the GB2312 code (in hex as 0xXXXX) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 the Unicode name (follows a comment sign, '#') # The official names for Unicode characters U+4E00 # to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX", # The following algorithms can be used to change the hex form# of GB2312 to other standard forms: # # To change hex to EUC form, add 0x8080 # To change hex to kuten form, first subtract 0x2020. Then # the high and low bytes correspond to the ku and ten of # the kuten form. For example, 0x2121 -> 0x0101 -> 0101; # 0x777E -> 0x575E -> 8794 0x2121=0x3000 0x2122=0x3001 0x2123=0x3002 0x2124=0x30FB 0x2125=0x02C9 0x2126=0x02C7 0x2127=0x00A8 0x2128=0x3003 0x2129=0x3005 0x212A=0x2015 0x212B=0xFF5E 0x212C=0x2016 0x212D=0x2026 0x212E=0x2018 0x212F=0x2019 #....
private static byte[] encode(String s) { char[] c = s.toCharArray(); int len = c.length; // Count the number of encoded bytes... int count = 0; for (int i = 0; i < len; i++) { int ch = c[i]; if (ch <= 0x7f) { count++; } else if (ch <= 0x7ff) { count += 2; } else { count += 3; } } // Now return the encoded bytes... byte[] b = new byte[count]; int off = 0; for (int i = 0; i < len; i++) { int ch = c[i]; if (ch <= 0x7f) { b[off++] = (byte)ch; } else if (ch <= 0x7ff) { b[off++] = (byte)((ch >> 6) | 0xc0); b[off++] = (byte)((ch & 0x3f) | 0x80); } else { b[off++] = (byte)((ch >> 12) | 0xe0); b[off++] = (byte)(((ch >> 6) & 0x3f) | 0x80); b[off++] = (byte)((ch & 0x3f) | 0x80); } } return b; } private static String decode(byte[] b, int off, int len) { // First, count the number of characters in the sequence int count = 0; int max = off + len; int i = off; while (i < max) { int c = b[i++] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx count++; break; case 12: case 13: // 110xxxxx 10xxxxxx if ((int)(b[i++] & 0xc0) != 0x80) { throw new IllegalArgumentException(); } count++; break; case 14: // 1110xxxx 10xxxxxx 10xxxxxx if (((int)(b[i++] & 0xc0) != 0x80) || ((int)(b[i++] & 0xc0) != 0x80)) { throw new IllegalArgumentException(); } count++; break; default: // 10xxxxxx, 1111xxxx throw new IllegalArgumentException(); } } if (i != max) { throw new IllegalArgumentException(); } // Now decode the characters... char[] cs = new char[count]; i = 0; while (off < max) { int c = b[off++] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx cs[i++] = (char)c; break; case 12: case 13: // 110xxxxx 10xxxxxx cs[i++] = (char)(((c & 0x1f) << 6) | (b[off++] & 0x3f)); break; case 14: // 1110xxxx 10xxxxxx 10xxxxxx int t = (b[off++] & 0x3f) << 6; cs[i++] = (char)(((c & 0x0f) << 12) | t | (b[off++] & 0x3f)); break; default: // 10xxxxxx, 1111xxxx throw new IllegalArgumentException(); } } return new String(cs, 0, count); }
to netramper(过山风)
是不是要解决这样的问题:类似“测试中文test”的字符串没办法存入Mysql, 因为不是UTF8格式。 == >我解决的问题是不同wap手机的(charset) form post 到服务器的数据统一转换为utf8 ,存储为mysql。从数据库取数据用 String s = rs.getString("name"); String name = new String(s.getBytes("ISO8859-1", "UTF-8"); 可以取到UTF-8 编码,在页面上可以正确显示问题。页面设置charset = "UTF-8";TO Acylas(Acylas) 感谢你的代码。对我很有帮助,但仍不能解决问题。是否有完整的代码请出示。
自己搞定。 =============== 我需要解决的问题是不同wap手机的(charset) form post 到服务器的数据统一转换为utf8 ,存储为mysql。从数据库取数据用 String s = rs.getString("name"); String name = new String(s.getBytes("ISO8859-1", "UTF-8"); 可以取到UTF-8 编码,在页面上可以正确显示问题。页面设置charset = "UTF-8"; //example of gb2312 //CHARSET = UTF-8 String HTTP_ACCEPT_CHARSET = request.getHeader("Accept-Charset"); String str = request.getParameter("gbstr"); if(HTTP_ACCEPT_CHARSET.startsWith("GB2312")){ str = toCharset(str, "ISO8859-1", "GB2312"); //client ->iso8859-1->gb2312->UTF-8 str = toCharset(str, "UTF-8", "ISO8859-1");//utf-8->ISO8859-1 }else if(HTTP_ACCEPT_CHARSET.startsWith("UTF-8")){ ;//DONNOT CONVERT }String HTTP_ACCEPT_CHARSET = request.getHeader("Accept-Charset"); String str = request.getParameter("gbstr"); if(HTTP_ACCEPT_CHARSET.startsWith("GB2312")){} public String toCharset(String str, String fromCharset , String toCharset){ String s = ""; try{ s = new String(str.getBytes(fromCharset), toCharset); }catch(UnsupportedEncodingException er){ System.out.println(er.toString()); return ""; } return s; }//大家新年快乐!
to zhoucm990(枫树) !
==============
给我的好像不是我需要的。我需要的是一个函数或算法。
php4 中有这个。
utf8_encode(String s);
utf8_decode(String s);
String s = "abc啊";
byte[] b = s.getBytes();
char[] c = new char[b.length];
for(int i = 0 ; i < c.length ; i++)
{
c[i] = (char)b[i];
}
有什么意义啊,看都看不懂??
那是JDBC的问题,和系统环境也有关系,跟ASCII有啥干系?
String ts=new String(ss.getBytes(),"UTF-8");
mysql - utf8
clients ->GBK/big5/iso8859-1 -> utf8
select * from tbl where q = '"+ utf8_encode(request.getParameter("name"));
标题 Java中文处理学习笔记——Hello Unicode 建议楼主去看
ascii2utf(String s)
String ss = "abc没找到";
String ts = ascii2utf(ss); //ts ="abc娌℃壘鍒?"
功能相同!TO relive(六道轮回,无想转生)
感谢参与!
数据库读出的没有问题!
php4中也有此函数。
utf8_encode(String s);
我要求的功能同此方法功能。
我在网上也搜索到一个老外的c/s版的功能。但是仅对个人版免费,叫 ParallelGraphics
所有up者也有分。To CoolAbu(绿茶的白开水(★★★★)) (
String ss = "abc没找到";
String ts=new String(ss.getBytes(),"UTF-8");
###
=>这种方法早测试过!行不通!
class StringConvert{
//constructor
var $gb2312txt ;
function StringConvert(){
$this->gb2312txt = "gb2312.txt";
}
function gb2utf8($gb){
if(!trim($gb))
return $gb;
$filename=$this->gb2312txt;
$tmp=file($filename);
$codetable=array();
while(list($key,$value)=each($tmp))
$codetable[hexdec(substr($value,0,6))]=substr($value,7,6);
$outmsg="";
while($gb){
if (ord(substr($gb,0,1))>127){
$thisv=substr($gb,0,2);
$gb=substr($gb,2,strlen($gb));
$temp="".$this->u2utf8(hexdec($codetable[hexdec(bin2hex($thisv))-0x8080]));
for($i=0;$i<strlen($temp);$i+=3)
$outmsg.=chr(substr($temp,$i,3));
}else{
$outmsg.=$this->u2utf8(substr($gb,0,1));
$gb=substr($gb,1,strlen($gb));
}
}
return $outmsg;
} function u2utf8($c){
$str="";
if ($c < 0x80)
$str.=$c;
else if ($c < 0x800){
$str.=(0xC0 | $c>>6);
$str.=(0x80 | $c & 0x3F);
}else if ($c < 0x10000) {
$str.=(0xE0 | $c>>12);
$str.=(0x80 | $c>>6 & 0x3F);
$str.=(0x80 | $c & 0x3F);
}else if ($c < 0x200000) {
$str.=(0xF0 | $c>>18);
$str.=(0x80 | $c>>12 & 0x3F);
$str.=(0x80 | $c>>6 & 0x3F);
$str.=(0x80 | $c & 0x3F);
}
return $str;
}
}
?>
#(0x[a-fA-F0-9]+)=(0x[a-fA-F0-9]+)(\-[a-zA-Z ]+)
# Format: Three tab-separated columns
# Column #1 is the GB2312 code (in hex as 0xXXXX)
# Column #2 is the Unicode (in hex as 0xXXXX)
# Column #3 the Unicode name (follows a comment sign, '#')
# The official names for Unicode characters U+4E00
# to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX",
# The following algorithms can be used to change the hex form# of GB2312 to other standard forms:
#
# To change hex to EUC form, add 0x8080
# To change hex to kuten form, first subtract 0x2020. Then
# the high and low bytes correspond to the ku and ten of
# the kuten form. For example, 0x2121 -> 0x0101 -> 0101;
# 0x777E -> 0x575E -> 8794
0x2121=0x3000
0x2122=0x3001
0x2123=0x3002
0x2124=0x30FB
0x2125=0x02C9
0x2126=0x02C7
0x2127=0x00A8
0x2128=0x3003
0x2129=0x3005
0x212A=0x2015
0x212B=0xFF5E
0x212C=0x2016
0x212D=0x2026
0x212E=0x2018
0x212F=0x2019
#....
找到Base64的编码、解码包,问题就解决了。
(不好意思,一时手头活多,没办法帮你去搜.)
http://liangmi2.just2u.net/java/script/259.htm
只是一个加密解密的东西。i:测试中文test
encode:5rWL6K+V5Lit5paHdGVzdA==
decode:测试中文test
char[] c = s.toCharArray();
int len = c.length;
// Count the number of encoded bytes...
int count = 0;
for (int i = 0; i < len; i++) {
int ch = c[i];
if (ch <= 0x7f) {
count++;
} else if (ch <= 0x7ff) {
count += 2;
} else {
count += 3;
}
}
// Now return the encoded bytes...
byte[] b = new byte[count];
int off = 0;
for (int i = 0; i < len; i++) {
int ch = c[i];
if (ch <= 0x7f) {
b[off++] = (byte)ch;
} else if (ch <= 0x7ff) {
b[off++] = (byte)((ch >> 6) | 0xc0);
b[off++] = (byte)((ch & 0x3f) | 0x80);
} else {
b[off++] = (byte)((ch >> 12) | 0xe0);
b[off++] = (byte)(((ch >> 6) & 0x3f) | 0x80);
b[off++] = (byte)((ch & 0x3f) | 0x80);
}
}
return b;
}
private static String decode(byte[] b, int off, int len) {
// First, count the number of characters in the sequence
int count = 0;
int max = off + len;
int i = off;
while (i < max) {
int c = b[i++] & 0xff;
switch (c >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
// 0xxxxxxx
count++;
break;
case 12: case 13:
// 110xxxxx 10xxxxxx
if ((int)(b[i++] & 0xc0) != 0x80) {
throw new IllegalArgumentException();
}
count++;
break;
case 14:
// 1110xxxx 10xxxxxx 10xxxxxx
if (((int)(b[i++] & 0xc0) != 0x80) ||
((int)(b[i++] & 0xc0) != 0x80)) {
throw new IllegalArgumentException();
}
count++;
break;
default:
// 10xxxxxx, 1111xxxx
throw new IllegalArgumentException();
}
}
if (i != max) {
throw new IllegalArgumentException();
}
// Now decode the characters...
char[] cs = new char[count];
i = 0;
while (off < max) {
int c = b[off++] & 0xff;
switch (c >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
// 0xxxxxxx
cs[i++] = (char)c;
break;
case 12: case 13:
// 110xxxxx 10xxxxxx
cs[i++] = (char)(((c & 0x1f) << 6) | (b[off++] & 0x3f));
break;
case 14:
// 1110xxxx 10xxxxxx 10xxxxxx
int t = (b[off++] & 0x3f) << 6;
cs[i++] = (char)(((c & 0x0f) << 12) | t | (b[off++] & 0x3f));
break;
default:
// 10xxxxxx, 1111xxxx
throw new IllegalArgumentException();
}
}
return new String(cs, 0, count);
}
是不是要解决这样的问题:类似“测试中文test”的字符串没办法存入Mysql, 因为不是UTF8格式。
== >我解决的问题是不同wap手机的(charset) form post 到服务器的数据统一转换为utf8 ,存储为mysql。从数据库取数据用
String s = rs.getString("name");
String name = new String(s.getBytes("ISO8859-1", "UTF-8");
可以取到UTF-8 编码,在页面上可以正确显示问题。页面设置charset = "UTF-8";TO Acylas(Acylas)
感谢你的代码。对我很有帮助,但仍不能解决问题。是否有完整的代码请出示。
===============
我需要解决的问题是不同wap手机的(charset) form post 到服务器的数据统一转换为utf8 ,存储为mysql。从数据库取数据用
String s = rs.getString("name");
String name = new String(s.getBytes("ISO8859-1", "UTF-8");
可以取到UTF-8 编码,在页面上可以正确显示问题。页面设置charset = "UTF-8";
//example of gb2312
//CHARSET = UTF-8
String HTTP_ACCEPT_CHARSET = request.getHeader("Accept-Charset");
String str = request.getParameter("gbstr");
if(HTTP_ACCEPT_CHARSET.startsWith("GB2312")){
str = toCharset(str, "ISO8859-1", "GB2312"); //client ->iso8859-1->gb2312->UTF-8
str = toCharset(str, "UTF-8", "ISO8859-1");//utf-8->ISO8859-1
}else if(HTTP_ACCEPT_CHARSET.startsWith("UTF-8")){
;//DONNOT CONVERT
}String HTTP_ACCEPT_CHARSET = request.getHeader("Accept-Charset");
String str = request.getParameter("gbstr");
if(HTTP_ACCEPT_CHARSET.startsWith("GB2312")){}
public String toCharset(String str, String fromCharset , String toCharset){
String s = "";
try{
s = new String(str.getBytes(fromCharset), toCharset);
}catch(UnsupportedEncodingException er){
System.out.println(er.toString());
return "";
}
return s;
}//大家新年快乐!