lucene4.5建立索引报错，在整不出来就开除了，谢谢

lucene

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

String fieldName = "text";
//检索的内容
String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,";
String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, ";
//String text = "国内水";
//String text1 ="国内山水";
//实例化IKAnalyzer分词器
Analyzer analyzer = new PaodingAnalyzer();

//建立内存目录
Directory dir = new RAMDirectory();
//Directory dir = FSDirectory.open(new File("d:/test/index"));
//配置IndexWriterConfig
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);

IndexWriter iwriter = new IndexWriter(dir,config);

//写入索引
Document doc = new Document();
doc.add(new StringField("ID","10000",Field.Store.YES));
doc.add(new TextField("text",text,Field.Store.YES));

Document doc1 = new Document();
doc1.add(new StringField("ID","10001",Field.Store.YES));
doc1.add(new TextField("text",text1,Field.Store.YES));

iwriter.addDocument(doc);
    iwriter.addDocument(doc1);
iwriter.close();
完整的代码是：import java.io.File;
import java.io.StringReader;import net.paoding.analysis.analyzer.PaodingAnalyzer;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class IKIndexAndSearch{ public static void main(String[] args) throws Exception{

//Lucnene Document的字段名
String fieldName = "text";
//检索的内容
String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,";
String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, ";
//String text = "国内水";
//String text1 ="国内山水";
//实例化IKAnalyzer分词器
Analyzer analyzer = new PaodingAnalyzer();

//建立内存目录
Directory dir = new RAMDirectory();
//Directory dir = FSDirectory.open(new File("d:/test/index"));
//配置IndexWriterConfig
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);

IndexWriter iwriter = new IndexWriter(dir,config);

//写入索引
Document doc = new Document();
doc.add(new StringField("ID","10000",Field.Store.YES));
doc.add(new TextField("text",text,Field.Store.YES));

Document doc1 = new Document();
doc1.add(new StringField("ID","10001",Field.Store.YES));
doc1.add(new TextField("text",text1,Field.Store.YES));

iwriter.addDocument(doc);
    iwriter.addDocument(doc1);
iwriter.close();

//开始搜索
//实例化搜索器
DirectoryReader ireader = DirectoryReader.open(dir);
IndexSearcher isearcher = new IndexSearcher(ireader);

//String keyword = "中文分词工具包";
//String keyword = "这是一个中文分词的例子";
String keyword = "国内水";

//使用QueryParser查询分析器构造Query对象
Analyzer analyzera = new PaodingAnalyzer();
QueryParser qp = new QueryParser(Version.LUCENE_45,"text",analyzera);
qp.setDefaultOperator(QueryParser.Operator.AND);
Query query = qp.parse(keyword);

System.out.println("QueryParser:"+query.toString());

//搜索相似度最高的5条记录
TopDocs topDocs = isearcher.search(query, 5);
System.out.println("命中："+topDocs.totalHits); //输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;

//高亮设置
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<B>","</B>");
//设定高亮显示的格式，也就是对高亮显示的词组加上前缀后缀

Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
//设置每次返回的字符数，想必大家在使用搜索引擎的时候也没有一并把全部数据展示出来吧，当然这里也是设定只展示部分数据

for(int i=0;i<topDocs.totalHits;i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容："+targetDoc.toString());

TokenStream tokenStream = analyzer.tokenStream(fieldName,new StringReader( targetDoc.get(fieldName)));
String str = highlighter.getBestFragment(tokenStream, targetDoc.get(fieldName));
System.out.println(str);

}

ireader.close();
dir.close();

}

}完整错误是：
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker getProperties
信息: config paoding analysis from: D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis-default.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analyzer.properties;D:\pro\artup.com\test\bin\paoding-dic-home.properties;D:\pro\artup.com\artup\www_artup\webapps\dic\paoding-dic-names.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives-user.properties
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.CJKKnife
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.LetterKnife
2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
信息: add knike: net.paoding.analysis.knife.NumberKnife
Exception in thread "main" java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'text'
at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125)
at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248)
at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202)
at IKIndexAndSearch.main(IKIndexAndSearch.java:66)
lucene版本是：4.5.1 分词器jar包是paoding-analysis-4.4.0.jar包，这个是不是lucene版本与分词器jar包的冲突，万分感谢
没那么难吧，我的个人网站 http://www.ablanxue.com 也是用庖丁解牛做分词器。感觉很好用，中文分词很好。
建立索引代码片段
// 索引 String rootlucene = ServletActionContext.getRequest()
.getRealPath("/lucenexxx/");
File flucene = null;
flucene = new File(rootlucene);
if (flucene.exists()) {
flucene.mkdir();
} IndexWriter writerlucene = null; try {
writerlucene = new IndexWriter(rootlucene,
new IKAnalyzer(), false);
} catch (IOException e) {
// 如果没有索引文件，则创建新索引
writerlucene = new IndexWriter(rootlucene,
new IKAnalyzer(), true);
} writerlucene.setUseCompoundFile(true);
Document doc1 = null;
doc1 = new Document();
doc1.add(new Field("jianjie", jianjie, Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
doc1.add(new Field("title", title, Field.Store.YES,
Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
doc1.add(new Field("yearmoth", "" + yearmoth, Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
doc1.add(new Field("id", "" + id, Field.Store.YES,
Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS)); doc1.add(new Field("time", time, Field.Store.YES,
Field.Index.NO, Field.TermVector.NO)); writerlucene.addDocument(doc1); writerlucene.optimize();
writerlucene.close(); // 索引
搜索
我觉得你调用了两次addDocument，你试试每次添加一个docment
庖丁不适用Lucene3.x以后的分词，用mmseg4j吧！！！