用httpclient读取网页正常,但读取
http://bill.finance.sina.com.cn/bill/detail_download.php?stock_code=sz000656&bill_size=40000
这个东东怎么读都是乱码(已设置method.addRequestHeader("Content","text/html,charset=GBK"); ),这个url是一下下载的文本文件,怎么办呢?
http://bill.finance.sina.com.cn/bill/detail_download.php?stock_code=sz000656&bill_size=40000
这个东东怎么读都是乱码(已设置method.addRequestHeader("Content","text/html,charset=GBK"); ),这个url是一下下载的文本文件,怎么办呢?
String result = new String(dataResponseBody);这时把result打印出来看看是不是乱码。我觉得如果你用byte来接收数据的话应该能避免乱码。
有问题可以给我讨论,最近我正再用这个。我邮箱是[email protected]
HttpClient client = new HttpClient();
client.getParams().setContentCharset("GBK");
HttpMethod method;
boolean headok=false;
boolean httpget=false;
method = new GetMethod(url);
method.addRequestHeader("Content","text/html,charset=GBK");
int statusCode = client.executeMethod(method);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: "
+ method.getStatusLine());
}
else{
httpget=true;
}
//java.io.InputStream is=method.getResponseBodyAsStream();
byte[] dataResponseBody = method.getResponseBody();
String result = new String(dataResponseBody);
System.out.println(result);
-------------------------------------------------------
还是乱码的
这行代码把原是Excel 的文件转成了文本。
private static String catchBigTransactionWebPage(String stockCode) {
HttpClient client = new DefaultHttpClient();
//client.getParams().setParameter("http.protocol.content-charset", "UTF-8");
client.getParams().setParameter(HTTP.CONTENT_ENCODING, "US-ASCII");
HttpResponse response = null;
HttpUriRequest uriRequest = null;
//HttpGet httpGet = null;
String webPageContent = null;
try {
//String urlString = "http://bill.finance.sina.com.cn/bill/detail.php?stock_code=";
String urlString = "/bill/detail.php?stock_code=";
if (stockCode.startsWith("00")) {
urlString = urlString + "sz" + stockCode;
} else if (stockCode.startsWith("60")) {
urlString = urlString + "sh" + stockCode;
}
//httpGet = new HttpGet("urlstring");
//URIUtils.createURI(scheme, host, port, path, query, fragment)
uriRequest = new HttpGet(URIUtils.createURI("http","bill.finance.sina.com.cn",80,"/bill/detail.php","stock_code=sh601699",null));
//uriRequest.getParams().setParameter(HTTP.CONTENT_ENCODING, "gb2312");
//uriRequest.getParams().setParameter(HTTP.DEFAULT_PROTOCOL_CHARSET, "gb2312");
//uriRequest.getParams().setParameter(HTTP.DEFAULT_CONTENT_CHARSET, "gb2312");
//uriRequest.getParams().setParameter(HTTP.CHARSET_PARAM, "gb2312");
//uriRequest.setHeader("Content-type", "text/xml; charset=gb2312");
//uriRequest.setHeader("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
} catch (URISyntaxException e1) {
e1.printStackTrace();
return "";
} int tryCount = 0;
boolean success = false;
while (tryCount < 10 && !success) {
try {
tryCount++;
//response = client.execute(httpGet);
response = client.execute(uriRequest);
//response.setLocale(Locale.CHINA);
System.out.println("Locale = " + response.getLocale());
//client.getParams().setParameter(HTTP.CONTENT_ENCODING, "UTF-8");
//response.addHeader(HTTP.CONTENT_ENCODING, "UTF-8");
success = true;
} catch (HttpException e) {
if(tryCount==10) e.printStackTrace();
else System.out.print("HttpException,Try again!");
success = false;
} catch (IOException e) {
if(tryCount==10) e.printStackTrace();
else System.out.print("IOException,Try again!");
success = false;
}
}
if(!success){
return "";
} HttpEntity entity = response.getEntity();
BufferedReader in = null;
if (entity != null) {
try {
//in = new BufferedReader(new InputStreamReader(entity.getContent(),"US-ASCII"));
in = new BufferedReader(new InputStreamReader(entity.getContent())); StringBuffer buffer = new StringBuffer();
String line = null;
while ((line = in.readLine()) != null) {
buffer.append(line);
buffer.append('\n');
}
webPageContent = buffer.toString();
} catch (IOException e) {
e.printStackTrace();
}
}
if(webPageContent != null)
System.out.print("," + webPageContent.length() + "字节 "+webPageContent);
else
System.out.print("Null page return , Can't get the page for this stock !!!");
byte [] b=null;
try {
b = webPageContent.getBytes("US-ASCII");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
try {
String name = new String(b, "UTF-8");
System.out.print(name);
name = new String(b, "GB2312");
System.out.print(name);
name = new String(b, "GBK");
System.out.print(name);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return webPageContent;
}
BufferedReader br = new BufferedReader(new InputStreamReader(ins,CHARSET));
StringBuffer sbf = new StringBuffer();
String line = null;
while ((line = br.readLine()) != null)
{
sbf.append(line);
}
/** 回收资源 */
br.close();
getMethod.releaseConnection(); /** 页面源文件 */
pageSource = sbf.toString();问题解决,^_^。这里的CHARSET要根据实际情况设置
//构造HttpClient的实例
HttpClient httpClient = new HttpClient();
//创建GET方法的实例
GetMethod getMethod = new GetMethod(url);
//使用系统提供的默认的恢复策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler());
//定义一个输入流
InputStream ins = null;
//定义文件流
BufferedReader br =null;
try {
//执行getMethod
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("方法失败: "
+ getMethod.getStatusLine());
}
//使用getResponseBodyAsStream读取页面内容,这个方法对于目标地址中有大量数据需要传输是最佳的。
ins = getMethod.getResponseBodyAsStream();
String charset = getMethod.getResponseCharSet();
if(charset.toUpperCase().equals("ISO-8859-1")){
charset = "gbk";
}
//按服务器编码字符集构建文件流,这里的CHARSET要根据实际情况设置
br = new BufferedReader(new InputStreamReader(ins,getMethod.getResponseCharSet()));
StringBuffer sbf = new StringBuffer();
String line = null;
while ((line = br.readLine()) != null)
{
sbf.append(line);
}
String result = new String(sbf.toString().getBytes(getMethod.getResponseCharSet()),charset);
//输出内容
System.out.println(result);
//服务器编码
System.out.println("服务器编码是:"+getMethod.getResponseCharSet());
} catch (HttpException e) {
//发生致命的异常,可能是协议不对或者返回的内容有问题
System.out.println("请检查您所提供的HTTP地址!");
e.printStackTrace();
} catch (IOException e) {
//发生网络异常
e.printStackTrace();
} finally {
//关闭流,释放连接
}