package cn.edu.study9;import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.pdfbox.searchengine.lucene.IndexFiles;
import org.pdfbox.searchengine.lucene.LucenePDFDocument;public class PDFBoxLuceneIndex { private static String dest_index_path = "D:\\lucene\\PDFBox\\index"; public static void PDFQueryIndex() {
try {
IndexSearcher searcher = new IndexSearcher(dest_index_path);
Term term = new Term("contents", "pdf");
Query query = new TermQuery(term);
System.out.println("----------检索内容:" + query.toString() + "-----"); Hits hits = searcher.search(query);
System.out.println("----------检索结果:共检索到 " + hits.length()
+ "条-----"); for (int i = 0; i < hits.length(); i++) {
System.out.println(hits.doc(i));
System.out.println(hits.doc(i).getField("id"));
}
} catch (IOException e) {
e.printStackTrace();
}
} public static void PDFIndexBuilder() { // 用lucene-2.0.0.jar d:\lucene\PDFBox\external
try {
IndexFiles indexpdf = new IndexFiles();
indexpdf.index(new File(
"D:\\lucene\\PDFBox\\PDFBox-0.7.3\\PDFBox-0.7.3\\docs"),
true, dest_index_path);
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("------------创建索引成功----------------");
} public static void main(String[] args) {
PDFIndexBuilder();
PDFQueryIndex();
System.out
.println("---------------PDF Lucene 检索测试---------------------");
}
}
控制台输出结果:
Skipping D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\broken-links.xml
Indexing Text document: D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\changes.html
Indexing PDF document: D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\changes.pdf
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.lucene.document.Document.add(Lorg/apache/lucene/document/Field;)V
at org.pdfbox.searchengine.lucene.LucenePDFDocument.addUnindexedField(LucenePDFDocument.java:224)
at org.pdfbox.searchengine.lucene.LucenePDFDocument.convertDocument(LucenePDFDocument.java:265)
at org.pdfbox.searchengine.lucene.LucenePDFDocument.getDocument(LucenePDFDocument.java:377)
at org.pdfbox.searchengine.lucene.IndexFiles.addDocument(IndexFiles.java:295)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:269)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:236)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:223)
at org.pdfbox.searchengine.lucene.IndexFiles.index(IndexFiles.java:165)
at cn.edu.study9.PDFBoxLuceneIndex.PDFIndexBuilder(PDFBoxLuceneIndex.java:56)
at cn.edu.study9.PDFBoxLuceneIndex.main(PDFBoxLuceneIndex.java:66)
import java.io.FileInputStream;
import java.io.IOException;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.pdfbox.searchengine.lucene.IndexFiles;
import org.pdfbox.searchengine.lucene.LucenePDFDocument;public class PDFBoxLuceneIndex { private static String dest_index_path = "D:\\lucene\\PDFBox\\index"; public static void PDFQueryIndex() {
try {
IndexSearcher searcher = new IndexSearcher(dest_index_path);
Term term = new Term("contents", "pdf");
Query query = new TermQuery(term);
System.out.println("----------检索内容:" + query.toString() + "-----"); Hits hits = searcher.search(query);
System.out.println("----------检索结果:共检索到 " + hits.length()
+ "条-----"); for (int i = 0; i < hits.length(); i++) {
System.out.println(hits.doc(i));
System.out.println(hits.doc(i).getField("id"));
}
} catch (IOException e) {
e.printStackTrace();
}
} public static void PDFIndexBuilder() { // 用lucene-2.0.0.jar d:\lucene\PDFBox\external
try {
IndexFiles indexpdf = new IndexFiles();
indexpdf.index(new File(
"D:\\lucene\\PDFBox\\PDFBox-0.7.3\\PDFBox-0.7.3\\docs"),
true, dest_index_path);
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("------------创建索引成功----------------");
} public static void main(String[] args) {
PDFIndexBuilder();
PDFQueryIndex();
System.out
.println("---------------PDF Lucene 检索测试---------------------");
}
}
控制台输出结果:
Skipping D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\broken-links.xml
Indexing Text document: D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\changes.html
Indexing PDF document: D:\lucene\PDFBox\PDFBox-0.7.3\PDFBox-0.7.3\docs\changes.pdf
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.lucene.document.Document.add(Lorg/apache/lucene/document/Field;)V
at org.pdfbox.searchengine.lucene.LucenePDFDocument.addUnindexedField(LucenePDFDocument.java:224)
at org.pdfbox.searchengine.lucene.LucenePDFDocument.convertDocument(LucenePDFDocument.java:265)
at org.pdfbox.searchengine.lucene.LucenePDFDocument.getDocument(LucenePDFDocument.java:377)
at org.pdfbox.searchengine.lucene.IndexFiles.addDocument(IndexFiles.java:295)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:269)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:236)
at org.pdfbox.searchengine.lucene.IndexFiles.indexDocs(IndexFiles.java:223)
at org.pdfbox.searchengine.lucene.IndexFiles.index(IndexFiles.java:165)
at cn.edu.study9.PDFBoxLuceneIndex.PDFIndexBuilder(PDFBoxLuceneIndex.java:56)
at cn.edu.study9.PDFBoxLuceneIndex.main(PDFBoxLuceneIndex.java:66)
解决方案 »
- Jboss启动异常
- ssh中,则样在类中配置查询出的结果集按照创建时间降序排列
- struts2+Json+Prototype出There is no Action mapped for namespace 错误
- 如何保证http访问安全性
- hibernate同步数据库的问题
- (IBatis)不经过的Bean中的方法,直接调用成功的jsp页面(用的是ibatis jpetstore 框架),好像是序列化的问题,没有将对象进行序列化。怎么解决。急!!!
- 大家给个建议,职称,要怎么考呀.
- 关于用J2EE开发在线考试系统的问题
- Struts和Spring 结合出了点错~~。帮忙看看。着急~
- 如何用Java将数据库的内容保存到CSV文件
- 各位朋友,这是我的开发总结,想在北京找份工作,不知道何种岗位比较适合,请帮忙推荐一下,谢谢!!
- myfaces+richfaces 问题,急啊!!!!!
应该是jar的版本有问题。
《搜索引擎零距离—基于Ruby+Java搜索引擎原理与实现》 清华出版社。
http://www.huachu.com.cn/itbook/itbookinfo.asp?lbbh=10105450