求助下载网页图片

我有一个已经做好的，决定post出来
1、保存图片的类package pagecapture;import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;public class Image {    private URL url;
    private String imageName;  //服务器端的图片名
    private String targetName; //保存在本地时的图片名    public String getTargetName() {
        return targetName;
    }
    public URL getUrl() {
        return url;
    }    public Image(String url) {
        try {
            this.url = new URL(url);
            initImageName();
            targetName = imageName;
        } catch (MalformedURLException ex) {
            System.out.println("URL is invalid");
        }
    }    public void saveAsFile(String path) {
        BufferedInputStream in = null;
        FileOutputStream out = null;
        String separator = File.separator;
        File fPath = null;
        String sFile = null;
        byte[] buf = new byte[8096];  // 缓冲区大小
        int size = 0;
        //检查输入并创建文件目录
        if (null != path && !"".equals(path)) {
            fPath = new File(path + separator + "images");
            if (!fPath.exists()) {
                fPath.mkdirs();
            }
        }
        //这里的全路径不能用File,不然的话会出错,因为File new了之后会创建一个文件夹
        sFile = fPath.getPath() + separator + targetName;
        File file = new File(sFile);
        if(file.exists()){
            System.out.println("File : "+ sFile +" has exists!");
            return ;
        }
        try {
            URLConnection URLcon = url.openConnection();
            in = new BufferedInputStream(URLcon.getInputStream());
            out = new FileOutputStream(sFile);
            while ((size = in.read(buf)) != -1) {
                out.write(buf, 0, size);
            }
            out.close();
            in.close();
        } catch (IOException ex) {
            System.err.println("IO Error about:" + sFile);
        }
        System.out.println("Save Image OK : " +sFile);
    }

    private void initImageName(){
         String strUrl = url.getFile();
         int i = strUrl.lastIndexOf("/");
         imageName = strUrl.substring(i+1);
         if(imageName.endsWith(".asp")||imageName.endsWith(".jsp")){
             imageName = imageName.substring(0, imageName.length()-4);
             imageName = imageName + ".gif";
         }
    }
}
2、保存页面的类，支持下载页面的所有连接
package pagecapture;

import java.io.*;
import java.net.*;
import java.util.ArrayList;
import java.util.List;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;public class Page {

    private String encoding = "utf-8";
    private URL url;
    private String pageName;    //The original name in the server side
    private String targetName ;  //The name you want when you save the page as a files
    private String content;
    private List<String> linkedList = new ArrayList();
    private List<Image> imageList = new ArrayList();

    //Also can add css,javascript support in the furture
    void init(){
        System.out.println("Begin Init...");
        initContent ();
        initPageName();
        initLinks();
        initImages();
        System.out.println("End Init...");
    }
//Constructors
    public Page(URL url) {
        this.url = url;
        init();
    }
    public Page(String url) {
        try {
            this.url = new URL(url);
            init();
        } catch (MalformedURLException ex) {
            System.out.println("URL is invalid");
        }
    }
    public Page(String url,String encoding){
        this.encoding = encoding;
        try {
            this.url = new URL(url);
            init();
        } catch (MalformedURLException ex) {
            System.out.println("URL is invalid");
        }
    }//All Public method
    public String getEncoding() {
        return encoding;
    }
    public void setEncoding(String encoding) {
        this.encoding = encoding;
    }
    public String getContent() {
        return content;
    }
    public URL getUrl() {
        return url;
    }
    public String getPageName() {
        return pageName;
    }
    public List<Image> getImageList() {
        return imageList;
    }
    public List<String> getLinkedList() {
        return linkedList;
    }

    public void SaveAsFile(String path) throws Exception {
        System.out.println("Begin Save as a file... :" + targetName);
        //创建目标目录
        File fpath = new File(path);
        fpath.mkdirs();

        SaveAllImages(path);
        //这里要从CONTENT中输出文件,因为这样才可以更改内容
        BufferedReader in = new BufferedReader(new StringReader(content));
        try {
            PrintWriter out = new PrintWriter(
                              new BufferedWriter(
                              new FileWriter(path+ File.separator + targetName)));
            String inputLine;
            while ((inputLine = in.readLine()) != null) {
                String strLine = System.getProperty("line.separator");
                out.println(inputLine + strLine);
            }
            out.close();
        } catch (EOFException e) {
            System.err.println("End of stream save as a file:"+targetName);
        }
        in.close();
        System.out.println("Save File Success! On :" + path);
    }

    private void SaveAllImages(String path) {
        String origSrc;
        String nowSrc;
        for (Image image : imageList) {
            image.saveAsFile(path);
            //把content中的图片绝对路径转换为下载后的相对路径
            origSrc = image.getUrl().toString();
            nowSrc = "images/"+image.getTargetName();
            content = content.replaceAll(origSrc, nowSrc);
        }
    }

    //All private method
    private void initContent(){
        try {
            StringBuffer sb = new StringBuffer(); //As a container to contain the content
            URLConnection conn = url.openConnection();
            conn.setDoOutput(true);
            //我不知道这里要设定什么编码了,郁闷(BIG5反而正常了?)
            BufferedReader in = new BufferedReader(
                                new InputStreamReader(conn.getInputStream(), encoding));
            String inputLine;
            while ((inputLine = in.readLine()) != null) {
                sb.append(inputLine);
                sb.append("\n");
//                System.out.println(inputLine);
            }
            in.close();
            content = sb.toString();
            System.out.println("End of initContent!");
        } catch (IOException ex) {
            System.out.println("Can't connect to the URL!");
        }
    }
    private void initPageName(){
        String strUrl = url.getFile();
        if ( strUrl.endsWith(".html")|| strUrl.endsWith(".htm")){
            int i =  strUrl.lastIndexOf("/");
            pageName =  strUrl.substring(i+1);
        }else{
            pageName = "index.html";
        }
        targetName = pageName;
        System.out.println( "End of initPageName!The name is :"+targetName);
    }
    private void initImages(){
        try {
            Parser p = new Parser(content);
            p.setEncoding(encoding);
            String filterType = "img";
            NodeFilter filer = new TagNameFilter(filterType);
            NodeList nl = p.extractAllNodesThatMatch(filer);
            for (int i = 0; i < nl.size(); i++) {
                ImageTag imageTag = (ImageTag) nl.elementAt(i);
                String imageUrl = imageTag.getImageURL();
                System.out.println("Image-"+i+" : "+imageUrl);
                imageList.add(new Image(imageUrl));
            }
        } catch (ParserException ex) {
            System.out.println("Parse Image Tag Error!");
        }
        System.out.println("End of initImage!");
    }
    private void initLinks(){
        try {
            //Parser html source code to get all linked list in
            Parser parser;
            NodeList nodelist;
            parser = Parser.createParser(content, encoding);
            NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
            OrFilter lastFilter = new OrFilter();
            lastFilter.setPredicates(new NodeFilter[]{linkFilter});
            nodelist = parser.parse(lastFilter);
            Node[] nodes = nodelist.toNodeArray();
            String line = "";
            for (int i = 0; i < nodes.length; i++) {
                Node node = nodes[i];
                LinkTag link = (LinkTag) node;
                line = link.getLink();
                if (isTrimEmpty(line)) {
                    continue;
                }
                if (!line.startsWith("http://")&&!line.equals("#")&&!line.equals("index.html")&&(line.endsWith(".html")||line.endsWith(".htm"))) {
                    String sUrl = url.toURI().toString();
                    line = sUrl.substring(0, sUrl.length()-pageName.length())+line;
                    System.out.println("Link-"+i+" : "+line);
                    linkedList.add(line);
                }
            }
        } catch (URISyntaxException ex) {
            System.out.println("Can't change the URL to URI!");
        } catch (ParserException ex) {
            System.out.println("Parse Links Tag Error!");
        }
        System.out.println("End of initLinks!");
    }
    private boolean isTrimEmpty(String astr) {
        if ((null == astr) || (astr.length() == 0)) {
            return true;
        }
        if (isBlank(astr.trim())) {
            return true;
        }
        return false;
    }
    private boolean isBlank(String astr) {
        if ((null == astr) || (astr.length() == 0)) {
            return true;
        } else {
            return false;
        }
    }
}
3、使用以上类的类（有准备写个界面）package pagecapture;import java.util.List;public class Main {    public static void main(String[] args) throws Exception {
        String sUrl = "http://www.510book.cn/files/article/html/3/3382/index.html";
        String dest = "D:/PageCapture";

        Page p = new Page(sUrl,"BIG5");
        p.SaveAsFile(dest);
        List<String> l = p.getLinkedList();
        for (String s : l) {
            System.out.println("");
            System.out.println("-------------------" + s + "----------------");
            Page d = new Page(s);
            d.SaveAsFile(dest);
        }
    }
}比较粗糙，不对应该是十分粗糙，我主要就用它下下起点的盗贴（如果是小说迷，又只能在公司上网的话一定有一样的需求）

调试易

求助下载网页图片

解决方案 »