作点贡献，提供完全支持中文的PDFBox-0.6.6.jar

pdfbox是什么东东？？？比iText好用？？能用模板么？

有人可以说一下具体使用么？download的时候没有看见使用文档？

package org.pdfbox;import java.io.*;
import org.apache.log4j.Category;
import org.apache.log4j.Logger;
import org.pdfbox.encryption.DecryptDocument;
import org.pdfbox.exceptions.InvalidPasswordException;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;public class ExtractText
{    private static final Logger LOG;
    public static final String DEFAULT_ENCODING = null;
    private static final String PASSWORD = "-password";
    private static final String ENCODING = "-encoding";
    private static final String CONSOLE = "-console";
    private static final String START_PAGE = "-startPage";
    private static final String END_PAGE = "-endPage";
    static Class class$org$pdfbox$ExtractText; /* synthetic field */    public ExtractText()
    {
    }    public static void main(String args[])
        throws Exception
    {
        boolean toConsole = false;
        int currentArgumentIndex = 0;
        String password = "";
        String encoding = DEFAULT_ENCODING;
        PDFTextStripper stripper = new PDFTextStripper();
        String pdfFile = null;
        String textFile = null;
        int startPage = 1;
        int endPage = 0x7fffffff;
        for(int i = 0; i < args.length; i++)
            if(args[i].equals("-password"))
            {
                if(++i >= args.length)
                    usage();
                password = args[i];
            } else
            if(args[i].equals("-encoding"))
            {
                if(++i >= args.length)
                    usage();
                encoding = args[i];
            } else
            if(args[i].equals("-startPage"))
            {
                if(++i >= args.length)
                    usage();
                startPage = Integer.parseInt(args[i]);
            } else
            if(args[i].equals("-endPage"))
            {
                if(++i >= args.length)
                    usage();
                endPage = Integer.parseInt(args[i]);
            } else
            if(args[i].equals("-console"))
                toConsole = true;
            else
            if(pdfFile == null)
                pdfFile = args[i];
            else
                textFile = args[i];        if(pdfFile == null)
            usage();
        if(textFile == null && pdfFile.length() > 4)
            textFile = pdfFile.substring(0, pdfFile.length() - 4) + ".txt";
        InputStream input = null;
        Writer output = null;
        PDDocument document = null;
        try
        {
            input = new FileInputStream(pdfFile);
            long start = System.currentTimeMillis();
            document = parseDocument(input);
            long stop = System.currentTimeMillis();
            LOG.info("Time to parse time=" + (stop - start));
            if(document.isEncrypted())
                try
                {
                    DecryptDocument decryptor = new DecryptDocument(document);
                    decryptor.decryptDocument(password);
                }
                catch(InvalidPasswordException e)
                {
                    if(args.length == 4)
                    {
                        System.err.println("Error: The supplied password is incorrect.");
                        System.exit(2);
                    } else
                    {
                        System.err.println("Error: The document is encrypted.");
                        usage();
                    }
                }
            if(toConsole)
                output = new OutputStreamWriter(System.out);
            else
            if(encoding != null)
                output = new OutputStreamWriter(new FileOutputStream(textFile), encoding);
            else
                output = new OutputStreamWriter(new FileOutputStream(textFile));
            start = System.currentTimeMillis();
            stripper.setStartPage(startPage);
            stripper.setEndPage(endPage);
            stripper.writeText(document, output);
            stop = System.currentTimeMillis();
            LOG.info("Time to extract text time=" + (stop - start));
        }
        finally
        {
            if(input != null)
                input.close();
            if(output != null)
                output.close();
            if(document != null)
                document.close();
        }
    }    private static PDDocument parseDocument(InputStream input)
        throws IOException
    {
        PDFParser parser = new PDFParser(input);
        parser.parse();
        return parser.getPDDocument();
    }    private static void usage()
    {
        System.err.println("Usage: java org.pdfbox.ExtractText [OPTIONS] <PDF file> [Text File]\n  -password  <password>        Password to decrypt document\n  -encoding  <output encoding> (ISO-8859-1,UTF-16BE,UTF-16LE,...)\n  -console                     Send text to console instead of file\n  -startPage <number>          The first page to start extraction(1 based)\n  -endPage <number>            The last page to extract(inclusive)\n  <PDF file>                   The PDF document to use\n  [Text File]                  The file to write the text to\n");
        System.exit(1);
    }    static Class class$(String x0)
    {
        try
        {
            return Class.forName(x0);
        }
        catch(ClassNotFoundException x1)
        {
            throw new NoClassDefFoundError(x1.getMessage());
        }
    }    static
    {
        LOG = Logger.getLogger(class$org$pdfbox$ExtractText != null ? class$org$pdfbox$ExtractText : (class$org$pdfbox$ExtractText = class$("org.pdfbox.ExtractText")));
    }
}

搂主，我看了你传的代码，这应该是将pdf解析成txt的形式吧，但是，pdfbox中有提取pdf段落的类么？
我看了好长时间的doc，没找着能用来提取pdf段落的类。
如果转成txt，大家有好的方法来提取各个段落么？
如果大家有源代码，能帮我传到[email protected]么？谢谢大家哦，

非常谢谢楼主！！！By the way ， to 青豆：我对你的问题也很感兴趣，希望有什么解决方案可以相互告知，谢谢！！！[email protected]

TO 楼主：好像中文还是不行呀！！！另外我用Eclipse运时，出了警告：
log4j:WARN No appenders could be found for logger (org.pdfbox.pdfparser.PDFParser).
log4j:WARN Please initialize the log4j system properly.

to:ton2008(寒亮)
参考：http://www-900.ibm.com/developerWorks/cn/java/l-log4j/index.shtml

调试易

作点贡献，提供完全支持中文的PDFBox-0.6.6.jar

解决方案 »