本来是想采集搜索的结果的。
但是偶尔会下载到类似:
<!-- gsp15.search.cnb.yahoo.com uncompressed/chunked Thu May 22 11:25:49 CST 2008 -->
这样的字符串,得不到网页的内容,这是怎么回事。

解决方案 »

  1.   


    public static String getHtmlText(String strUrl, int timeout, String strEnCoding) {
            if (strUrl == null || strUrl.length() == 0) {
                return null;
            }        StringBuffer strHtml = null;
            String strLine = "";
            HttpURLConnection httpConnection = null;// 这里可以定义成HttpURLConnection
            InputStream urlStream = null;
            BufferedInputStream buff = null;
            BufferedReader br = null;
            boolean isError = false;
            try {            //链接网络得到网页源代码
             URL url = new URL(strUrl);
                httpConnection = (HttpURLConnection) url.openConnection();
                httpConnection.addRequestProperty("User-Agent", "IcewolfHttp/1.0");
    httpConnection.addRequestProperty("Accept",
                                         "www/source; text/html; image/gif; */*");
    httpConnection.addRequestProperty("Accept-Language", "");
                httpConnection.setConnectTimeout(timeout);
                httpConnection.setReadTimeout(timeout);
                urlStream = httpConnection.getInputStream();
                buff = new BufferedInputStream(urlStream);
                Reader r = null;
                if (strEnCoding == null || strEnCoding.compareTo("null") == 0) {
                 r = new InputStreamReader(buff);
                } else {
                 try {
                 r = new InputStreamReader(buff, strEnCoding);
                 } catch (UnsupportedEncodingException e) {
                 r = new InputStreamReader(buff);
                 }
                }
                
                br = new BufferedReader(r);
                strHtml = new StringBuffer("");
                while ((strLine = br.readLine()) != null) {
                 strHtml.append(strLine + "\r\n");
                }
            } catch (Exception e) {
             //e.printStackTrace();
             System.out.println(e.getClass() + "下载网页" + strUrl + "失败");
             isError = true;
            } finally{   
             try{
             if (br != null)
             br.close();
             if (buff != null)
             buff.close();
             if (urlStream != null)
             urlStream.close();
             }catch(Exception e){
             System.out.println(e.getClass() + "下载网页" + strUrl + "连接关闭失败");
             return null;
             }
            }
            
            if (strHtml == null || isError)
             return null;
            return strHtml.toString();
        }