package cn.edu.study9;import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.pdfbox.searchengine.lucene.IndexFiles;
import org.pdfbox.searchengine.lucene.LucenePDFDocument;public class PDFBoxLuceneIndex { private static String dest_index_path = "D:\\lucene\\PDFBox\\index"; public static void PDFQueryIndex() {
try {
IndexSearcher searcher = new IndexSearcher(dest_index_path);
Term term = new Term("contents", "pdf");
Query query = new TermQuery(term);
System.out.println("----------检索内容:" + query.toString() + "-----"); Hits hits = searcher.search(query);
System.out.println("----------检索结果:共检索到 " + hits.length()
+ "条-----"); for (int i = 0; i < hits.length(); i++) {
System.out.println(hits.doc(i));
System.out.println(hits.doc(i).getField("id"));
}
} catch (IOException e) {
e.printStackTrace();
}
} public static void PDFIndexBuilder() {  // 用lucene-2.0.0.jar d:\lucene\PDFBox\external
try {
IndexFiles indexpdf = new IndexFiles();
indexpdf.index(new File(
"D:\\lucene\\PDFBox\\PDFBox-0.7.3\\PDFBox-0.7.3\\docs"),
true, dest_index_path);
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("------------创建索引成功----------------");
} public static void main(String[] args) {
PDFIndexBuilder();
PDFQueryIndex();
System.out
.println("---------------PDF Lucene 检索测试---------------------");
}
}
控制台输出结果:
Skipping D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\broken-links.xml
Indexing Text document: D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\changes.html
Indexing PDF document: D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\changes.pdf
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.lucene.document.Document.add(Lorg/apache/lucene/document/Field;)V
at org.pdfbox.searchengine.lucene.LucenePDFDocument.addUnindexedField(LucenePDFDocument.java:224)
at org.pdfbox.searchengine.lucene.LucenePDFDocument.convertDocument(LucenePDFDocument.java:265)
at org.pdfbox.searchengine.lucene.LucenePDFDocument.getDocument(LucenePDFDocument.java:377)
at org.pdfbox.searchengine.lucene.IndexFiles.addDocument(IndexFiles.java:295)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:269)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:236)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:223)
at org.pdfbox.searchengine.lucene.IndexFiles.index(IndexFiles.java:165)
at cn.edu.study9.PDFBoxLuceneIndex.PDFIndexBuilder(PDFBoxLuceneIndex.java:56)
at cn.edu.study9.PDFBoxLuceneIndex.main(PDFBoxLuceneIndex.java:66)