HttpURLConnection抓取这个页面乱码 无论用什么编码都乱码,而且就这个页面有问题,在浏览器中正常,请教一下怎么抓取这个。http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 用curl或者迅雷下载下来也是乱码。 网页经过压缩了。public static void main(String[] args) throws Exception { String result = ""; URL url = new URL("http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js"); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setConnectTimeout(7 * 1000); if (conn.getResponseCode() != 200) throw new RuntimeException("请求url失败"); InputStream is = conn.getInputStream(); if ("gzip".equals(conn.getContentEncoding())) { result = readDataForZgip(is, "utf-8"); } conn.disconnect(); System.err.println("ContentEncoding: " + conn.getContentEncoding()); System.out.println(result); } public static String readDataForZgip(InputStream inStream, String charsetName) throws Exception { GZIPInputStream gzipStream = new GZIPInputStream(inStream); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = -1; while ((len = gzipStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } byte[] data = outStream.toByteArray(); outStream.close(); gzipStream.close(); inStream.close(); return new String(data, charsetName); } 为什么我的没问题?抓出来是这样的var id_s=new Array( "14971271" ,"14971272" ,"14971273" ,"14971247" ,"14971285" ,"14971274" ,"14971280" ,"14971282" ,"14971283" ,"14971277" ,"14971276" ,"14971279" ,"14971275" ,"14971281" ,"14971284" ,"14971286" ,"14971261" ,"14971245" ,"14971292" ,"14971260" ,"14971265" ,"14971253" ,"14971305" ,"14971306" ,"14971293" ,"14971246" ,"14971252" ,"14971294" ,"14971308" ,"14971307" ,"14971298" ,"14971299" ,"14971296" ,"14971297" ,"14971295" ,"14971303" ,"14971278" ,"14971244" ,"14971240" ,"14971313" ,"14971254" ,"14971302" ,"14971311" ,"14971312" ,"14971314" ,"14971241" ,"14971242" ,"14971255" ,"14971256" ,"14971257" ,"14971287" ,"14971270" ,"14971304" ,"14971288" ,"14971301" ,"14971300" ,"14971243" ,"14971239" ,"14971249" ,"14971267" ,"14971269" ,"14971268" ,"14971266" ,"14971258" ,"14971259" ,"14971238" ,"14971250" ,"14971251" ,"14971248" ,"14971262" ,"14971263" ,"14971264" ,"14971290" ,"14971289" ,"14971291" ,"14971316" ,"14971317" ,"14971310" ,"14971309" ,"14971315" ); //B标签ID、已售完数组 var sold_outArr=new Object(); sold_outArr.b_ids=new Array(); sold_outArr.s_outArr=new Array();//已售完数组 var id_s_ar=new Array(); var id_href=new Array(); var red_cut; var userType;//用户类型 var s_spl=new Array(); //id_sp为分批数,整除时为正确,产生余数时在 var id_sp=(id_s.length-id_s.length%50)/50; var i,c1,c,t,g,brand_idtmp,s_tmp=0; //summery// function change()//遍历清除---已售完标签解决方案 { } $(document).ready(function sethrefs(){ i=0; c=0; g=0; var a=this.location.href; var atmp=a.split("/"); var atmp1=a.split("-"); if(atmp1[1]!=0&&atmp1[1]!=1&&atmp1[1]!=2&&atmp1[1]!=3&&atmp1[1]!=4&&atmp1[1]!=undefined) { brand_idtmp=atmp1[1].split(".")[0]; } else { brand_idtmp=atmp1[2]; } //获取用户类型 var VipNewUser = !!$.Cookie.get('VipNewUser'), vip_new_b_user = !!$.Cookie.get('vip_new_b_user'), vip_new_old_user = !!$.Cookie.get('vip_new_old_user'), userType = 0; if (!vip_new_old_user || VipNewUser) { userType = 0; } else { userType = vip_new_b_user ? 1 : 1; } // sold_outArr.b_ids=('L_soldout_' + id_s.toString().replace(/,/g, ",L_soldout_")).split(","); $(".wrapper a").each(function(){ if(this.name==""){ id_href[i]="http://shop.vipshop.com/detail-"+brand_idtmp+"-"+id_s[i]+".html"; $(this).attr("id",sold_outArr.b_ids[i]); $(this).attr("href",id_href[i]); $(this).attr("target","_blank"); i++;} }); //已售完 $.ajax ({ url : 'http://stock.vipshop.com/list/', data : { brandId: brand_idtmp, is_old: userType }, cache : true, jsonp: 'callback', jsonpCallback : 'te_pingou', success : function (re) { sold_outArr.s_outArr='#L_soldout_' + re.sold_out.replace(/,/g, ",#L_soldout_"); $(sold_outArr.s_outArr).find("b").show(); }, dataType : 'jsonp' }); /*change();*/ }) Connection conn = Jsoup.connect(url); conn.timeout(0); conn.ignoreContentType(true); Document doc = conn.get(); System.out.println(doc.text()); 或者用htmlparserString path="http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js"; Parser parser=new Parser(path); parser.setEncoding("utf-8"); NodeList list=parser.parse(null); System.out.println(list.toHtml()); 如何在JTextArea内弹出框 编写一个用来创建一个ArrayList对象的程序,但是不明确导入java.util.* 类中的static型变量以及什么时候初始化的? asp中如何调用java编写的类? 声音???!!!如何录音啊?(更多分相送!) 为什么JAVA的运行速度特别慢 如何实现远程类的调用呀?(已附代码) 大家来帮帮我!!! 在框架结构的网页中,子框架中的表单无法传递参数,怎么办?? 关于内部类的方法的调用问题! java Map相关 求推荐java并发书籍
String result = "";
URL url = new URL("http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(7 * 1000);
if (conn.getResponseCode() != 200)
throw new RuntimeException("请求url失败");
InputStream is = conn.getInputStream();
if ("gzip".equals(conn.getContentEncoding())) {
result = readDataForZgip(is, "utf-8");
}
conn.disconnect();
System.err.println("ContentEncoding: " + conn.getContentEncoding());
System.out.println(result);
} public static String readDataForZgip(InputStream inStream,
String charsetName) throws Exception {
GZIPInputStream gzipStream = new GZIPInputStream(inStream);
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = -1;
while ((len = gzipStream.read(buffer)) != -1) {
outStream.write(buffer, 0, len);
}
byte[] data = outStream.toByteArray();
outStream.close();
gzipStream.close();
inStream.close();
return new String(data, charsetName);
}
Connection conn = Jsoup.connect(url);
conn.timeout(0);
conn.ignoreContentType(true);
Document doc = conn.get();
System.out.println(doc.text());
Parser parser=new Parser(path);
parser.setEncoding("utf-8");
NodeList list=parser.parse(null);
System.out.println(list.toHtml());