HttpURLConnection抓取这个页面乱码 无论用什么编码都乱码,而且就这个页面有问题,在浏览器中正常,请教一下怎么抓取这个。http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 用curl或者迅雷下载下来也是乱码。 网页经过压缩了。public static void main(String[] args) throws Exception { String result = ""; URL url = new URL("http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js"); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setConnectTimeout(7 * 1000); if (conn.getResponseCode() != 200) throw new RuntimeException("请求url失败"); InputStream is = conn.getInputStream(); if ("gzip".equals(conn.getContentEncoding())) { result = readDataForZgip(is, "utf-8"); } conn.disconnect(); System.err.println("ContentEncoding: " + conn.getContentEncoding()); System.out.println(result); } public static String readDataForZgip(InputStream inStream, String charsetName) throws Exception { GZIPInputStream gzipStream = new GZIPInputStream(inStream); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = -1; while ((len = gzipStream.read(buffer)) != -1) { outStream.write(buffer, 0, len); } byte[] data = outStream.toByteArray(); outStream.close(); gzipStream.close(); inStream.close(); return new String(data, charsetName); } 为什么我的没问题?抓出来是这样的var id_s=new Array( "14971271" ,"14971272" ,"14971273" ,"14971247" ,"14971285" ,"14971274" ,"14971280" ,"14971282" ,"14971283" ,"14971277" ,"14971276" ,"14971279" ,"14971275" ,"14971281" ,"14971284" ,"14971286" ,"14971261" ,"14971245" ,"14971292" ,"14971260" ,"14971265" ,"14971253" ,"14971305" ,"14971306" ,"14971293" ,"14971246" ,"14971252" ,"14971294" ,"14971308" ,"14971307" ,"14971298" ,"14971299" ,"14971296" ,"14971297" ,"14971295" ,"14971303" ,"14971278" ,"14971244" ,"14971240" ,"14971313" ,"14971254" ,"14971302" ,"14971311" ,"14971312" ,"14971314" ,"14971241" ,"14971242" ,"14971255" ,"14971256" ,"14971257" ,"14971287" ,"14971270" ,"14971304" ,"14971288" ,"14971301" ,"14971300" ,"14971243" ,"14971239" ,"14971249" ,"14971267" ,"14971269" ,"14971268" ,"14971266" ,"14971258" ,"14971259" ,"14971238" ,"14971250" ,"14971251" ,"14971248" ,"14971262" ,"14971263" ,"14971264" ,"14971290" ,"14971289" ,"14971291" ,"14971316" ,"14971317" ,"14971310" ,"14971309" ,"14971315" ); //B标签ID、已售完数组 var sold_outArr=new Object(); sold_outArr.b_ids=new Array(); sold_outArr.s_outArr=new Array();//已售完数组 var id_s_ar=new Array(); var id_href=new Array(); var red_cut; var userType;//用户类型 var s_spl=new Array(); //id_sp为分批数,整除时为正确,产生余数时在 var id_sp=(id_s.length-id_s.length%50)/50; var i,c1,c,t,g,brand_idtmp,s_tmp=0; //summery// function change()//遍历清除---已售完标签解决方案 { } $(document).ready(function sethrefs(){ i=0; c=0; g=0; var a=this.location.href; var atmp=a.split("/"); var atmp1=a.split("-"); if(atmp1[1]!=0&&atmp1[1]!=1&&atmp1[1]!=2&&atmp1[1]!=3&&atmp1[1]!=4&&atmp1[1]!=undefined) { brand_idtmp=atmp1[1].split(".")[0]; } else { brand_idtmp=atmp1[2]; } //获取用户类型 var VipNewUser = !!$.Cookie.get('VipNewUser'), vip_new_b_user = !!$.Cookie.get('vip_new_b_user'), vip_new_old_user = !!$.Cookie.get('vip_new_old_user'), userType = 0; if (!vip_new_old_user || VipNewUser) { userType = 0; } else { userType = vip_new_b_user ? 1 : 1; } // sold_outArr.b_ids=('L_soldout_' + id_s.toString().replace(/,/g, ",L_soldout_")).split(","); $(".wrapper a").each(function(){ if(this.name==""){ id_href[i]="http://shop.vipshop.com/detail-"+brand_idtmp+"-"+id_s[i]+".html"; $(this).attr("id",sold_outArr.b_ids[i]); $(this).attr("href",id_href[i]); $(this).attr("target","_blank"); i++;} }); //已售完 $.ajax ({ url : 'http://stock.vipshop.com/list/', data : { brandId: brand_idtmp, is_old: userType }, cache : true, jsonp: 'callback', jsonpCallback : 'te_pingou', success : function (re) { sold_outArr.s_outArr='#L_soldout_' + re.sold_out.replace(/,/g, ",#L_soldout_"); $(sold_outArr.s_outArr).find("b").show(); }, dataType : 'jsonp' }); /*change();*/ }) Connection conn = Jsoup.connect(url); conn.timeout(0); conn.ignoreContentType(true); Document doc = conn.get(); System.out.println(doc.text()); 或者用htmlparserString path="http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js"; Parser parser=new Parser(path); parser.setEncoding("utf-8"); NodeList list=parser.parse(null); System.out.println(list.toHtml()); 如何根据对象里面的的某1个字段对List排序 求套与core java2配套的练习题 软件测试的前景浅谈 通用的数据库访问问题? 哪里查找com.borland.dx.sql.dataset中的所有方法 在java中如何定义结构体、及使用 接口作用 java的hook问题。 JWindowr的闪烁问题 怎么把JAVA的标题栏换成XP的啊 java Map相关 求推荐java并发书籍
String result = "";
URL url = new URL("http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(7 * 1000);
if (conn.getResponseCode() != 200)
throw new RuntimeException("请求url失败");
InputStream is = conn.getInputStream();
if ("gzip".equals(conn.getContentEncoding())) {
result = readDataForZgip(is, "utf-8");
}
conn.disconnect();
System.err.println("ContentEncoding: " + conn.getContentEncoding());
System.out.println(result);
} public static String readDataForZgip(InputStream inStream,
String charsetName) throws Exception {
GZIPInputStream gzipStream = new GZIPInputStream(inStream);
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = -1;
while ((len = gzipStream.read(buffer)) != -1) {
outStream.write(buffer, 0, len);
}
byte[] data = outStream.toByteArray();
outStream.close();
gzipStream.close();
inStream.close();
return new String(data, charsetName);
}
Connection conn = Jsoup.connect(url);
conn.timeout(0);
conn.ignoreContentType(true);
Document doc = conn.get();
System.out.println(doc.text());
Parser parser=new Parser(path);
parser.setEncoding("utf-8");
NodeList list=parser.parse(null);
System.out.println(list.toHtml());