求助,转码问题 google中搜索“中国”取得的url中对应的编码“%E4%B8%AD%E5%9B%BD”请问这种转码怎样处理,谢谢 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 因为在做链接来源统计的时候需要把 http://www.baidu.com/baidu?word= ... B%F7&tn=myie2dg 这类的URL编码还原成明文字串,一般大部分的网站都是用普通的URL编码形式,如上面链接中的badu,这种很容易转换和还原,Java包里提供了两个类的不同方法URLEncode.encode()和URLDecode.decode()可以很方便实现,但也有特别一点的就是Google了,http://www.google.com/search?hl= ... 9C%E7%B4%A2&lr= 他们的编码和别人不一样,如果使用URLDecode.decode()的话则变成乱码,查询的一些相关资料都说Google使用的是UTF-8编码,这点我就有些奇怪了,如果Google使用的是UTF-8编码,那别人使用的又是什么?IE的高级选项里不是有项“始终以UTF-8形式发送URL”的吗?但是UTF-8一个中文是3byte,而一般的编码则是2个byte,这就是为什么一般的URL中是以两组'%'代码表示一个汉字,如“中”的URL编码为"%D6%D0",而UTF-8则为3组,“中”为"%E4%B8%AD",这个问题我在Google里也没得到较好回答。我对各种编码形式了解的不是很好,之前只看过如何将字符串转成Utf8-URL编码的方法,其实也挺简单的,直接转成byte后直接取其16进制值前面加个%就行,还原方法在网上搜了几圈居然没发现有现成的!倒是也是几个人在CSDN问了此类的问题。最后还是决定自己搞定了,基本上是toUTF8的原路退回法,再加了个检测URL链接是否UTF-8形式的方法,觉得已经蛮好用了。可以拿出来share一下。import java.io.UnsupportedEncodingException;import java.net.URLEncoder;import java.net.URLDecoder;/*** Title:字符编码工具类 * Description: * Copyright: flashman.com.cn Copyright (c) 2005* Company: flashman.com.cn * @author: jeffzhu* @version 1.0*/public class CharTools { /** * 转换编码 ISO-8859-1到GB2312 * @param text * @return */ public String ISO2GB(String text) { String result = ""; try { result = new String(text.getBytes("ISO-8859-1"), "GB2312"); } catch (UnsupportedEncodingException ex) { result = ex.toString(); } return result; } /** * 转换编码 GB2312到ISO-8859-1 * @param text * @return */ public String GB2ISO(String text) { String result = ""; try { result = new String(text.getBytes("GB2312"), "ISO-8859-1"); } catch (UnsupportedEncodingException ex) { ex.printStackTrace(); } return result; } /** * Utf8URL编码 * @param s * @return */ public String Utf8URLencode(String text) { StringBuffer result = new StringBuffer(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (c >= 0 && c <= 255) { result.append(c); }else { byte[] b = new byte[0]; try { b = Character.toString(c).getBytes("UTF-8"); }catch (Exception ex) { } for (int j = 0; j < b.length; j++) { int k = b[j]; if (k < 0) k += 256; result.append("%" + Integer.toHexString(k).toUpperCase()); } } } return result.toString(); } /** * Utf8URL解码 * @param text * @return */ public String Utf8URLdecode(String text) { String result = ""; int p = 0; if (text!=null && text.length()>0){ text = text.toLowerCase(); p = text.indexOf("%e"); if (p == -1) return text; while (p != -1) { result += text.substring(0, p); text = text.substring(p, text.length()); if (text == "" || text.length() < 9) return result; result += CodeToWord(text.substring(0, 9)); text = text.substring(9, text.length()); p = text.indexOf("%e"); } } return result + text; } /** * utf8URL编码转字符 * @param text * @return */ private String CodeToWord(String text) { String result; if (Utf8codeCheck(text)) { byte[] code = new byte[3]; code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256); code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256); code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256); try { result = new String(code, "UTF-8"); }catch (UnsupportedEncodingException ex) { result = null; } } else { result = text; } return result; } /** * 编码是否有效 * @param text * @return */ private boolean Utf8codeCheck(String text){ String sign = ""; if (text.startsWith("%e")) for (int i = 0, p = 0; p != -1; i++) { p = text.indexOf("%", p); if (p != -1) p++; sign += p; } return sign.equals("147-1"); } /** * 是否Utf8Url编码 * @param text * @return */ public boolean isUtf8Url(String text) { text = text.toLowerCase(); int p = text.indexOf("%"); if (p != -1 && text.length() - p > 9) { text = text.substring(p, p + 9); } return Utf8codeCheck(text); } /** * 测试 * @param args */ public static void main(String[] args) { CharTools charTools = new CharTools(); String url; url = "http://www.google.com/search?hl=zh-CN&newwindow=1&q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&btnG=%E6%90%9C%E7%B4%A2&lr="; if(charTools.isUtf8Url(url)){ System.out.println(charTools.Utf8URLdecode(url)); }else{ System.out.println(URLDecoder.decode(url)); } url = "http://www.baidu.com/baidu?word=%D6%D0%B9%FA%B4%F3%B0%D9%BF%C6%D4%DA%CF%DF%C8%AB%CE%C4%BC%EC%CB%F7&tn=myie2dg"; if(charTools.isUtf8Url(url)){ System.out.println(charTools.Utf8URLdecode(url)); }else{ System.out.println(URLDecoder.decode(url)); } }} 用这么麻烦吗?用这2个类就可以了啊。java.net.URLDecoder.decode()java.net.URLEncoder.encode() 看看:java.net.URLDecoder.decode()java.net.URLEncoder.encode() 的API转换和反转换 URL编码转换java.net.URLDecoder.decode()java.net.URLEncoder.encode() 获取网页源码,有的网站是乱码,有的网站是正常的。 有关循环 关于JAVA中复制文件的问题。 字符串转编码后单个汉字出现乱码, 其他汉字显示正常 求高手们帮忙,谢谢各位了! swt 写浏览器问题 高手请进 变量命名的区别 where does the java download? 如何取得类所在的文件路径? macOS更新到10.15Catalina版本后logisim打不开了 谁能告诉我怎样在SWT下独立运行GEF的例子 如何将JAVA类包让多个项目共享
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.net.URLDecoder;/**
* Title:字符编码工具类
*
Description:
*
Copyright: flashman.com.cn Copyright (c) 2005
*
Company: flashman.com.cn
* @author: jeffzhu
* @version 1.0
*/
public class CharTools { /**
* 转换编码 ISO-8859-1到GB2312
* @param text
* @return
*/
public String ISO2GB(String text) {
String result = "";
try {
result = new String(text.getBytes("ISO-8859-1"), "GB2312");
}
catch (UnsupportedEncodingException ex) {
result = ex.toString();
}
return result;
} /**
* 转换编码 GB2312到ISO-8859-1
* @param text
* @return
*/
public String GB2ISO(String text) {
String result = "";
try {
result = new String(text.getBytes("GB2312"), "ISO-8859-1");
}
catch (UnsupportedEncodingException ex) {
ex.printStackTrace();
}
return result;
}
/**
* Utf8URL编码
* @param s
* @return
*/
public String Utf8URLencode(String text) {
StringBuffer result = new StringBuffer(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i);
if (c >= 0 && c <= 255) {
result.append(c);
}else { byte[] b = new byte[0];
try {
b = Character.toString(c).getBytes("UTF-8");
}catch (Exception ex) {
} for (int j = 0; j < b.length; j++) {
int k = b[j];
if (k < 0) k += 256;
result.append("%" + Integer.toHexString(k).toUpperCase());
} }
} return result.toString();
} /**
* Utf8URL解码
* @param text
* @return
*/
public String Utf8URLdecode(String text) {
String result = "";
int p = 0; if (text!=null && text.length()>0){
text = text.toLowerCase();
p = text.indexOf("%e");
if (p == -1) return text; while (p != -1) {
result += text.substring(0, p);
text = text.substring(p, text.length());
if (text == "" || text.length() < 9) return result; result += CodeToWord(text.substring(0, 9));
text = text.substring(9, text.length());
p = text.indexOf("%e");
} } return result + text;
} /**
* utf8URL编码转字符
* @param text
* @return
*/
private String CodeToWord(String text) {
String result; if (Utf8codeCheck(text)) {
byte[] code = new byte[3];
code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256);
code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256);
code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256);
try {
result = new String(code, "UTF-8");
}catch (UnsupportedEncodingException ex) {
result = null;
}
}
else {
result = text;
} return result;
} /**
* 编码是否有效
* @param text
* @return
*/
private boolean Utf8codeCheck(String text){
String sign = "";
if (text.startsWith("%e"))
for (int i = 0, p = 0; p != -1; i++) {
p = text.indexOf("%", p);
if (p != -1)
p++;
sign += p;
}
return sign.equals("147-1");
} /**
* 是否Utf8Url编码
* @param text
* @return
*/
public boolean isUtf8Url(String text) {
text = text.toLowerCase();
int p = text.indexOf("%");
if (p != -1 && text.length() - p > 9) {
text = text.substring(p, p + 9);
}
return Utf8codeCheck(text);
} /**
* 测试
* @param args
*/
public static void main(String[] args) { CharTools charTools = new CharTools(); String url; url = "http://www.google.com/search?hl=zh-CN&newwindow=1&q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&btnG=%E6%90%9C%E7%B4%A2&lr=";
if(charTools.isUtf8Url(url)){
System.out.println(charTools.Utf8URLdecode(url));
}else{
System.out.println(URLDecoder.decode(url));
} url = "http://www.baidu.com/baidu?word=%D6%D0%B9%FA%B4%F3%B0%D9%BF%C6%D4%DA%CF%DF%C8%AB%CE%C4%BC%EC%CB%F7&tn=myie2dg";
if(charTools.isUtf8Url(url)){
System.out.println(charTools.Utf8URLdecode(url));
}else{
System.out.println(URLDecoder.decode(url));
} }}
java.net.URLDecoder.decode()
java.net.URLEncoder.encode()
java.net.URLDecoder.decode()
java.net.URLEncoder.encode()
的API
转换和反转换
java.net.URLDecoder.decode()
java.net.URLEncoder.encode()