String fieldName = "text"; //检索的内容 String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,"; String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, "; //String text = "国内水"; //String text1 ="国内山水"; //实例化IKAnalyzer分词器 Analyzer analyzer = new PaodingAnalyzer();
//建立内存目录 Directory dir = new RAMDirectory(); //Directory dir = FSDirectory.open(new File("d:/test/index")); //配置IndexWriterConfig IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);
IndexWriter iwriter = new IndexWriter(dir,config);
//写入索引 Document doc = new Document(); doc.add(new StringField("ID","10000",Field.Store.YES)); doc.add(new TextField("text",text,Field.Store.YES));
Document doc1 = new Document(); doc1.add(new StringField("ID","10001",Field.Store.YES)); doc1.add(new TextField("text",text1,Field.Store.YES));
//Lucnene Document的字段名 String fieldName = "text"; //检索的内容 String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,"; String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, "; //String text = "国内水"; //String text1 ="国内山水"; //实例化IKAnalyzer分词器 Analyzer analyzer = new PaodingAnalyzer();
//建立内存目录 Directory dir = new RAMDirectory(); //Directory dir = FSDirectory.open(new File("d:/test/index")); //配置IndexWriterConfig IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);
IndexWriter iwriter = new IndexWriter(dir,config);
//写入索引 Document doc = new Document(); doc.add(new StringField("ID","10000",Field.Store.YES)); doc.add(new TextField("text",text,Field.Store.YES));
Document doc1 = new Document(); doc1.add(new StringField("ID","10001",Field.Store.YES)); doc1.add(new TextField("text",text1,Field.Store.YES));
}完整错误是: 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker getProperties 信息: config paoding analysis from: D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis-default.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analyzer.properties;D:\pro\artup.com\test\bin\paoding-dic-home.properties;D:\pro\artup.com\artup\www_artup\webapps\dic\paoding-dic-names.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives-user.properties 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.CJKKnife 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.LetterKnife 2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives 信息: add knike: net.paoding.analysis.knife.NumberKnife Exception in thread "main" java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'text' at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125) at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248) at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254) at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446) at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202) at IKIndexAndSearch.main(IKIndexAndSearch.java:66) lucene版本是:4.5.1 分词器jar包是paoding-analysis-4.4.0.jar包,这个是不是lucene版本与分词器jar包的冲突,万分感谢
//检索的内容
String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,";
String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, ";
//String text = "国内水";
//String text1 ="国内山水";
//实例化IKAnalyzer分词器
Analyzer analyzer = new PaodingAnalyzer();
//建立内存目录
Directory dir = new RAMDirectory();
//Directory dir = FSDirectory.open(new File("d:/test/index"));
//配置IndexWriterConfig
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);
IndexWriter iwriter = new IndexWriter(dir,config);
//写入索引
Document doc = new Document();
doc.add(new StringField("ID","10000",Field.Store.YES));
doc.add(new TextField("text",text,Field.Store.YES));
Document doc1 = new Document();
doc1.add(new StringField("ID","10001",Field.Store.YES));
doc1.add(new TextField("text",text1,Field.Store.YES));
iwriter.addDocument(doc);
iwriter.addDocument(doc1);
iwriter.close();
import java.io.StringReader;import net.paoding.analysis.analyzer.PaodingAnalyzer;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class IKIndexAndSearch{ public static void main(String[] args) throws Exception{
//Lucnene Document的字段名
String fieldName = "text";
//检索的内容
String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,";
String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, ";
//String text = "国内水";
//String text1 ="国内山水";
//实例化IKAnalyzer分词器
Analyzer analyzer = new PaodingAnalyzer();
//建立内存目录
Directory dir = new RAMDirectory();
//Directory dir = FSDirectory.open(new File("d:/test/index"));
//配置IndexWriterConfig
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);
IndexWriter iwriter = new IndexWriter(dir,config);
//写入索引
Document doc = new Document();
doc.add(new StringField("ID","10000",Field.Store.YES));
doc.add(new TextField("text",text,Field.Store.YES));
Document doc1 = new Document();
doc1.add(new StringField("ID","10001",Field.Store.YES));
doc1.add(new TextField("text",text1,Field.Store.YES));
iwriter.addDocument(doc);
iwriter.addDocument(doc1);
iwriter.close();
//开始搜索
//实例化搜索器
DirectoryReader ireader = DirectoryReader.open(dir);
IndexSearcher isearcher = new IndexSearcher(ireader);
//String keyword = "中文分词工具包";
//String keyword = "这是一个中文分词的例子";
String keyword = "国内水";
//使用QueryParser查询分析器构造Query对象
Analyzer analyzera = new PaodingAnalyzer();
QueryParser qp = new QueryParser(Version.LUCENE_45,"text",analyzera);
qp.setDefaultOperator(QueryParser.Operator.AND);
Query query = qp.parse(keyword);
System.out.println("QueryParser:"+query.toString());
//搜索相似度最高的5条记录
TopDocs topDocs = isearcher.search(query, 5);
System.out.println("命中:"+topDocs.totalHits); //输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//高亮设置
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<B>","</B>");
//设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀
Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
//设置每次返回的字符数,想必大家在使用搜索引擎的时候也没有一并把全部数据展示出来吧,当然这里也是设定只展示部分数据
for(int i=0;i<topDocs.totalHits;i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:"+targetDoc.toString());
TokenStream tokenStream = analyzer.tokenStream(fieldName,new StringReader( targetDoc.get(fieldName)));
String str = highlighter.getBestFragment(tokenStream, targetDoc.get(fieldName));
System.out.println(str);
}
ireader.close();
dir.close();
}
}完整错误是:
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker getProperties
信息: config paoding analysis from: D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis-default.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analyzer.properties;D:\pro\artup.com\test\bin\paoding-dic-home.properties;D:\pro\artup.com\artup\www_artup\webapps\dic\paoding-dic-names.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives-user.properties
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.CJKKnife
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.LetterKnife
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.NumberKnife
Exception in thread "main" java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'text'
at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125)
at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248)
at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202)
at IKIndexAndSearch.main(IKIndexAndSearch.java:66)
lucene版本是:4.5.1 分词器jar包是paoding-analysis-4.4.0.jar包,这个是不是lucene版本与分词器jar包的冲突,万分感谢
建立索引代码片段
// 索引 String rootlucene = ServletActionContext.getRequest()
.getRealPath("/lucenexxx/");
File flucene = null;
flucene = new File(rootlucene);
if (flucene.exists()) {
flucene.mkdir();
} IndexWriter writerlucene = null; try {
writerlucene = new IndexWriter(rootlucene,
new IKAnalyzer(), false);
} catch (IOException e) {
// 如果没有索引文件,则创建新索引
writerlucene = new IndexWriter(rootlucene,
new IKAnalyzer(), true);
} writerlucene.setUseCompoundFile(true);
Document doc1 = null;
doc1 = new Document();
doc1.add(new Field("jianjie", jianjie, Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
doc1.add(new Field("title", title, Field.Store.YES,
Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
doc1.add(new Field("yearmoth", "" + yearmoth, Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
doc1.add(new Field("id", "" + id, Field.Store.YES,
Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS)); doc1.add(new Field("time", time, Field.Store.YES,
Field.Index.NO, Field.TermVector.NO)); writerlucene.addDocument(doc1); writerlucene.optimize();
writerlucene.close(); // 索引
搜索