httpclient乱码问题,急求

byte[] dataResponseBody = method.getResponseBody();
String result = new String(dataResponseBody);这时把result打印出来看看是不是乱码。我觉得如果你用byte来接收数据的话应该能避免乱码。
有问题可以给我讨论，最近我正再用这个。我邮箱是[email protected]

String url = "http://bill.finance.sina.com.cn/bill/detail_download.php?stock_code=sz000935&bill_size=40000";
        HttpClient client = new HttpClient();
        client.getParams().setContentCharset("GBK");
        HttpMethod method;
        boolean headok=false;
        boolean httpget=false;
                method = new GetMethod(url);
                method.addRequestHeader("Content","text/html,charset=GBK");
                int statusCode = client.executeMethod(method);
                if (statusCode != HttpStatus.SC_OK) {
                    System.err.println("Method failed: "
                    + method.getStatusLine());
                }
                else{
                    httpget=true;
                }
                //java.io.InputStream is=method.getResponseBodyAsStream();
                byte[] dataResponseBody = method.getResponseBody();
                String result = new String(dataResponseBody);
                System.out.println(result);
-------------------------------------------------------
还是乱码的

这个好像是 Excel 文件诶，不是文本文件！

是","分开的文本,以xls为扩展名

是Excel 文件，不是文本文件，你直接打开http://bill.finance.sina.com.cn/bill/detail_download.php?stock_code=sz000656&bill_size=40000是下载XLS文件啊。method.addRequestHeader("Content","text/html,charset=GBK");
这行代码把原是Excel 的文件转成了文本。

如果是excel文件,有什么东东可以读出文本吗?

用了pio还是不行,有没有老大给个解决办法

先用输入流把文件下载下来，再用POI或者JXL读吧。

你可以直接用httpclient读取那个网页，不一定非要下载那个文件不过读取那个页面最近也变成了乱码，不知道为什么

hehe,我也遇到同样的问题，我是抓取网页，困扰我几天了。原来在http://stock.finance.sina.com.cn/bill/detail.php?stock_code=sh601166时是好的。换了服务器到http://bill.finance.sina.com.cn/bill/detail.php?stock_code=sh601166就全是乱码了。可能与服务器端编码有关。我看过，页面内容是一样的。

我用的HTTP client 4.0，这段代码如下，得到的还是乱码：
    private static String catchBigTransactionWebPage(String stockCode) {
        HttpClient client = new DefaultHttpClient();
        //client.getParams().setParameter("http.protocol.content-charset", "UTF-8");
        client.getParams().setParameter(HTTP.CONTENT_ENCODING, "US-ASCII");
        HttpResponse response = null;
        HttpUriRequest uriRequest = null;
        //HttpGet httpGet = null;
        String webPageContent = null;
        try {
            //String urlString = "http://bill.finance.sina.com.cn/bill/detail.php?stock_code=";
            String urlString = "/bill/detail.php?stock_code=";
            if (stockCode.startsWith("00")) {
                urlString = urlString + "sz" + stockCode;
            } else if (stockCode.startsWith("60")) {
                urlString = urlString + "sh" + stockCode;
            }
            //httpGet = new HttpGet("urlstring");
            //URIUtils.createURI(scheme, host, port, path, query, fragment)
            uriRequest = new HttpGet(URIUtils.createURI("http","bill.finance.sina.com.cn",80,"/bill/detail.php","stock_code=sh601699",null));
            //uriRequest.getParams().setParameter(HTTP.CONTENT_ENCODING, "gb2312");
            //uriRequest.getParams().setParameter(HTTP.DEFAULT_PROTOCOL_CHARSET, "gb2312");
            //uriRequest.getParams().setParameter(HTTP.DEFAULT_CONTENT_CHARSET, "gb2312");
            //uriRequest.getParams().setParameter(HTTP.CHARSET_PARAM, "gb2312");
            //uriRequest.setHeader("Content-type", "text/xml; charset=gb2312");
            //uriRequest.setHeader("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
        } catch (URISyntaxException e1) {
            e1.printStackTrace();
            return "";
        }        int tryCount = 0;
        boolean success = false;
        while (tryCount < 10 && !success) {
            try {
                tryCount++;
                //response = client.execute(httpGet);
                response = client.execute(uriRequest);
                //response.setLocale(Locale.CHINA);
                System.out.println("Locale = " + response.getLocale());
                //client.getParams().setParameter(HTTP.CONTENT_ENCODING, "UTF-8");
                //response.addHeader(HTTP.CONTENT_ENCODING, "UTF-8");
                success = true;
            } catch (HttpException e) {
                if(tryCount==10) e.printStackTrace();
                else System.out.print("HttpException,Try again!");
                success = false;
            } catch (IOException e) {
                if(tryCount==10) e.printStackTrace();
                else System.out.print("IOException,Try again!");
                success = false;
            }
        }
        if(!success){
            return "";
        }        HttpEntity entity = response.getEntity();


        BufferedReader in = null;
        if (entity != null) {
            try {
                //in = new BufferedReader(new InputStreamReader(entity.getContent(),"US-ASCII"));
                in = new BufferedReader(new InputStreamReader(entity.getContent()));                StringBuffer buffer = new StringBuffer();
                String line = null;
                while ((line = in.readLine()) != null) {
                    buffer.append(line);
                    buffer.append('\n');
                }
                webPageContent = buffer.toString();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if(webPageContent != null)
            System.out.print("," + webPageContent.length() + "字节 "+webPageContent);
        else
            System.out.print("Null page return , Can't get the page for this stock !!!");

        byte [] b=null;

        try {
            b = webPageContent.getBytes("US-ASCII");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        try {
            String name = new String(b, "UTF-8");
            System.out.print(name);
            name = new String(b, "GB2312");
            System.out.print(name);
            name = new String(b, "GBK");
            System.out.print(name);
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }

        return webPageContent;
    }

这里，介绍一种解决抓取后网页内容显示为乱码的办法。前几天，在抓取某网站的信息时(http://www.99sj.com/Price/Price/Default.aspx)，第一次碰到了这种应用下的乱码问题。于是上网查了一下，提供的解决办法大致有两种：１>　　private static final String CONTENT_CHARSET = "GBK";　httpClient.getParams().setContentCharset("UTF-8");　or　httpClient.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, ＣONTENT_CHARSET);2>　　private static final String CONTENTTYPE    = " text/html; charset=GBK";　　getMethod.setRequestHeader("Content-Type", CONTENTTYPE);测试了，没有任何效果（换成UTF-8也不行）。也用了String result = new String(pageSrc.getBytes("UTF-8"),"GBK")，依然无效。在焦头烂额时想到了以前在学校时经常用的一句话：找问题要会追根溯源。仔细想想，字符串里面的文本内容也是通过文件流获取的，既然转换字符串字符编码不起作用，那可以设置文件流的默认编码吗？查了jdk，是可行的。private static final String CHARSET = "UTF-8";InputStream ins = getMethod.getResponseBodyAsStream();//按指定的字符集构建文件流
BufferedReader br = new BufferedReader(new InputStreamReader(ins,CHARSET));
StringBuffer sbf = new StringBuffer();
String line = null;
while ((line = br.readLine()) != null)
{
     sbf.append(line);
}
/** 回收资源 */
br.close();
getMethod.releaseConnection();     /** 页面源文件 */
     pageSource = sbf.toString();问题解决，^_^。这里的CHARSET要根据实际情况设置

使用HttpClient获取网页内容，编码问题确实是困扰程序员的难题。经过长期总结，终于得到一个相对“完美”的解决方案：用流的方式实现字符编码转换+String字符编码，具体请看下列代码：
//构造HttpClient的实例
HttpClient httpClient = new HttpClient();
//创建GET方法的实例
GetMethod getMethod = new GetMethod(url);
//使用系统提供的默认的恢复策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler());
//定义一个输入流
InputStream ins = null;
//定义文件流
BufferedReader br =null;
try {
  //执行getMethod
  int statusCode = httpClient.executeMethod(getMethod);
  if (statusCode != HttpStatus.SC_OK) {
    System.err.println("方法失败: "
       + getMethod.getStatusLine());
   }

  //使用getResponseBodyAsStream读取页面内容，这个方法对于目标地址中有大量数据需要传输是最佳的。
  ins = getMethod.getResponseBodyAsStream();
  String charset = getMethod.getResponseCharSet();
  if(charset.toUpperCase().equals("ISO-8859-1")){
    charset = "gbk";
  }
  //按服务器编码字符集构建文件流，这里的CHARSET要根据实际情况设置
  br = new BufferedReader(new InputStreamReader(ins,getMethod.getResponseCharSet()));
  StringBuffer sbf = new StringBuffer();
  String line = null;
    while ((line = br.readLine()) != null)
    {
      sbf.append(line);
    }
  String result = new String(sbf.toString().getBytes(getMethod.getResponseCharSet()),charset);
  //输出内容
  System.out.println(result);
  //服务器编码
   System.out.println("服务器编码是："+getMethod.getResponseCharSet());
} catch (HttpException e) {
  //发生致命的异常，可能是协议不对或者返回的内容有问题
  System.out.println("请检查您所提供的HTTP地址！");
    e.printStackTrace();
} catch (IOException e) {
  //发生网络异常
   e.printStackTrace();
} finally {
  //关闭流，释放连接
}

调试易

httpclient乱码问题,急求

解决方案 »