无论用什么编码都乱码,而且就这个页面有问题,在浏览器中正常,请教一下怎么抓取这个。
http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js

解决方案 »

  1.   

    用curl或者迅雷下载下来也是乱码。
      

  2.   

    网页经过压缩了。public static void main(String[] args) throws Exception {
    String result = "";
    URL url = new URL("http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js");
    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
    conn.setConnectTimeout(7 * 1000);
    if (conn.getResponseCode() != 200)
    throw new RuntimeException("请求url失败");
    InputStream is = conn.getInputStream();
    if ("gzip".equals(conn.getContentEncoding())) {
    result = readDataForZgip(is, "utf-8");
    }
    conn.disconnect();
    System.err.println("ContentEncoding: " + conn.getContentEncoding());
    System.out.println(result);
    } public static String readDataForZgip(InputStream inStream,
    String charsetName) throws Exception {
    GZIPInputStream gzipStream = new GZIPInputStream(inStream);
    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    byte[] buffer = new byte[1024];
    int len = -1;
    while ((len = gzipStream.read(buffer)) != -1) {
    outStream.write(buffer, 0, len);
    }
    byte[] data = outStream.toByteArray();
    outStream.close();
    gzipStream.close();
    inStream.close();
    return new String(data, charsetName);
    }
      

  3.   

    为什么我的没问题?抓出来是这样的var id_s=new Array( "14971271" ,"14971272" ,"14971273" ,"14971247" ,"14971285" ,"14971274" ,"14971280" ,"14971282" ,"14971283" ,"14971277" ,"14971276" ,"14971279" ,"14971275" ,"14971281" ,"14971284" ,"14971286" ,"14971261" ,"14971245" ,"14971292" ,"14971260" ,"14971265" ,"14971253" ,"14971305" ,"14971306" ,"14971293" ,"14971246" ,"14971252" ,"14971294" ,"14971308" ,"14971307" ,"14971298" ,"14971299" ,"14971296" ,"14971297" ,"14971295" ,"14971303" ,"14971278" ,"14971244" ,"14971240" ,"14971313" ,"14971254" ,"14971302" ,"14971311" ,"14971312" ,"14971314" ,"14971241" ,"14971242" ,"14971255" ,"14971256" ,"14971257" ,"14971287" ,"14971270" ,"14971304" ,"14971288" ,"14971301" ,"14971300" ,"14971243" ,"14971239" ,"14971249" ,"14971267" ,"14971269" ,"14971268" ,"14971266" ,"14971258" ,"14971259" ,"14971238" ,"14971250" ,"14971251" ,"14971248" ,"14971262" ,"14971263" ,"14971264" ,"14971290" ,"14971289" ,"14971291" ,"14971316" ,"14971317" ,"14971310" ,"14971309" ,"14971315" ); //B标签ID、已售完数组 var sold_outArr=new Object(); sold_outArr.b_ids=new Array(); sold_outArr.s_outArr=new Array();//已售完数组 var id_s_ar=new Array(); var id_href=new Array(); var red_cut; var userType;//用户类型 var s_spl=new Array(); //id_sp为分批数,整除时为正确,产生余数时在 var id_sp=(id_s.length-id_s.length%50)/50; var i,c1,c,t,g,brand_idtmp,s_tmp=0; //summery// function change()//遍历清除---已售完标签解决方案 { } $(document).ready(function sethrefs(){ i=0; c=0; g=0; var a=this.location.href; var atmp=a.split("/"); var atmp1=a.split("-"); if(atmp1[1]!=0&&atmp1[1]!=1&&atmp1[1]!=2&&atmp1[1]!=3&&atmp1[1]!=4&&atmp1[1]!=undefined) { brand_idtmp=atmp1[1].split(".")[0]; } else { brand_idtmp=atmp1[2]; } //获取用户类型 var VipNewUser = !!$.Cookie.get('VipNewUser'), vip_new_b_user = !!$.Cookie.get('vip_new_b_user'), vip_new_old_user = !!$.Cookie.get('vip_new_old_user'), userType = 0; if (!vip_new_old_user || VipNewUser) { userType = 0; } else { userType = vip_new_b_user ? 1 : 1; } // sold_outArr.b_ids=('L_soldout_' + id_s.toString().replace(/,/g, ",L_soldout_")).split(","); $(".wrapper a").each(function(){ if(this.name==""){ id_href[i]="http://shop.vipshop.com/detail-"+brand_idtmp+"-"+id_s[i]+".html"; $(this).attr("id",sold_outArr.b_ids[i]); $(this).attr("href",id_href[i]); $(this).attr("target","_blank"); i++;} }); //已售完 $.ajax ({ url : 'http://stock.vipshop.com/list/', data : { brandId: brand_idtmp, is_old: userType }, cache : true, jsonp: 'callback', jsonpCallback : 'te_pingou', success : function (re) { sold_outArr.s_outArr='#L_soldout_' + re.sold_out.replace(/,/g, ",#L_soldout_"); $(sold_outArr.s_outArr).find("b").show(); }, dataType : 'jsonp' }); /*change();*/ })
      

  4.   


    Connection conn = Jsoup.connect(url);
    conn.timeout(0);
    conn.ignoreContentType(true);
    Document doc = conn.get();
    System.out.println(doc.text());
      

  5.   

    或者用htmlparserString path="http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js";
    Parser parser=new Parser(path);
    parser.setEncoding("utf-8");
    NodeList list=parser.parse(null);
    System.out.println(list.toHtml());