解决方案 »

  1.   

    String fieldName = "text";
    //检索的内容
    String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,";
    String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, ";
    //String text = "国内水";
    //String text1 ="国内山水";
    //实例化IKAnalyzer分词器
    Analyzer analyzer = new PaodingAnalyzer();
                            
    //建立内存目录
    Directory dir = new RAMDirectory();
    //Directory dir = FSDirectory.open(new File("d:/test/index"));
    //配置IndexWriterConfig
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);

    IndexWriter iwriter = new IndexWriter(dir,config);

    //写入索引
    Document doc = new Document();
    doc.add(new StringField("ID","10000",Field.Store.YES));
    doc.add(new TextField("text",text,Field.Store.YES));

    Document doc1 = new Document();
    doc1.add(new StringField("ID","10001",Field.Store.YES));
    doc1.add(new TextField("text",text1,Field.Store.YES));

    iwriter.addDocument(doc);
        iwriter.addDocument(doc1);
    iwriter.close();
      

  2.   

    完整的代码是:import java.io.File;
    import java.io.StringReader;import net.paoding.analysis.analyzer.PaodingAnalyzer;import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.TokenStream;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.highlight.Highlighter;
    import org.apache.lucene.search.highlight.QueryScorer;
    import org.apache.lucene.search.highlight.SimpleFragmenter;
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.util.Version;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    public class IKIndexAndSearch{ public static void main(String[] args) throws Exception{

    //Lucnene Document的字段名
    String fieldName = "text";
    //检索的内容
    String text = "地貌,自然风光,奇特景观,河流,土地,寓意风景,美景,水,国内山水,";
    String text1 = "蓝天,自然风光,云,国内山水,依山傍水,山岩,奇特景观,天空,森林,山,水, ";
    //String text = "国内水";
    //String text1 ="国内山水";
    //实例化IKAnalyzer分词器
    Analyzer analyzer = new PaodingAnalyzer();
                            
    //建立内存目录
    Directory dir = new RAMDirectory();
    //Directory dir = FSDirectory.open(new File("d:/test/index"));
    //配置IndexWriterConfig
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,analyzer);

    IndexWriter iwriter = new IndexWriter(dir,config);

    //写入索引
    Document doc = new Document();
    doc.add(new StringField("ID","10000",Field.Store.YES));
    doc.add(new TextField("text",text,Field.Store.YES));

    Document doc1 = new Document();
    doc1.add(new StringField("ID","10001",Field.Store.YES));
    doc1.add(new TextField("text",text1,Field.Store.YES));

    iwriter.addDocument(doc);
        iwriter.addDocument(doc1);
    iwriter.close();

    //开始搜索
    //实例化搜索器
    DirectoryReader ireader = DirectoryReader.open(dir);
    IndexSearcher isearcher = new IndexSearcher(ireader);

    //String keyword = "中文分词工具包";
    //String keyword = "这是一个中文分词的例子";
    String keyword = "国内水";

    //使用QueryParser查询分析器构造Query对象
    Analyzer analyzera = new PaodingAnalyzer();
    QueryParser qp = new QueryParser(Version.LUCENE_45,"text",analyzera);
    qp.setDefaultOperator(QueryParser.Operator.AND);  
    Query query = qp.parse(keyword);

    System.out.println("QueryParser:"+query.toString());

    //搜索相似度最高的5条记录
    TopDocs topDocs = isearcher.search(query, 5);
    System.out.println("命中:"+topDocs.totalHits); //输出结果
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;

    //高亮设置
    SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<B>","</B>");
    //设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀

    Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    //设置每次返回的字符数,想必大家在使用搜索引擎的时候也没有一并把全部数据展示出来吧,当然这里也是设定只展示部分数据



    for(int i=0;i<topDocs.totalHits;i++){
    Document targetDoc = isearcher.doc(scoreDocs[i].doc);
    System.out.println("内容:"+targetDoc.toString());

    TokenStream tokenStream = analyzer.tokenStream(fieldName,new StringReader( targetDoc.get(fieldName)));
    String str = highlighter.getBestFragment(tokenStream, targetDoc.get(fieldName));
    System.out.println(str);

    }

    ireader.close();
    dir.close();


    }

    }完整错误是:
    2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker getProperties
    信息: config paoding analysis from: D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analysis-default.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-analyzer.properties;D:\pro\artup.com\test\bin\paoding-dic-home.properties;D:\pro\artup.com\artup\www_artup\webapps\dic\paoding-dic-names.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives.properties;D:\pro\artup.com\test\file:\C:\Users\yachang\Desktop\paoding-analysis-4.4.0.jar!\paoding-knives-user.properties
    2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
    信息: add knike: net.paoding.analysis.knife.CJKKnife
    2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
    信息: add knike: net.paoding.analysis.knife.LetterKnife
    2013-11-8 15:52:06 net.paoding.analysis.knife.PaodingMaker createPaodingWithKnives
    信息: add knike: net.paoding.analysis.knife.NumberKnife
    Exception in thread "main" java.lang.IllegalArgumentException: first position increment must be > 0 (got 0) for field 'text'
    at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:125)
    at org.apache.lucene.index.DocFieldProcessor.processDocument(DocFieldProcessor.java:248)
    at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:254)
    at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:446)
    at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1551)
    at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1221)
    at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1202)
    at IKIndexAndSearch.main(IKIndexAndSearch.java:66)
    lucene版本是:4.5.1 分词器jar包是paoding-analysis-4.4.0.jar包,这个是不是lucene版本与分词器jar包的冲突,万分感谢
      

  3.   

    没那么难吧,我的个人网站 http://www.ablanxue.com 也是用庖丁解牛做分词器。感觉很好用,中文分词很好。
    建立索引代码片段
    // 索引 String rootlucene = ServletActionContext.getRequest()
    .getRealPath("/lucenexxx/");
    File flucene = null;
    flucene = new File(rootlucene);
    if (flucene.exists()) {
    flucene.mkdir();
    } IndexWriter writerlucene = null; try {
    writerlucene = new IndexWriter(rootlucene,
    new IKAnalyzer(), false);
    } catch (IOException e) {
    // 如果没有索引文件,则创建新索引
    writerlucene = new IndexWriter(rootlucene,
    new IKAnalyzer(), true);
    } writerlucene.setUseCompoundFile(true);
    Document doc1 = null;
    doc1 = new Document();
    doc1.add(new Field("jianjie", jianjie, Field.Store.YES,
    Field.Index.NO, Field.TermVector.NO));
    doc1.add(new Field("title", title, Field.Store.YES,
    Field.Index.TOKENIZED,
    Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc1.add(new Field("yearmoth", "" + yearmoth, Field.Store.YES,
    Field.Index.NO, Field.TermVector.NO));
    doc1.add(new Field("id", "" + id, Field.Store.YES,
    Field.Index.TOKENIZED,
    Field.TermVector.WITH_POSITIONS_OFFSETS)); doc1.add(new Field("time", time, Field.Store.YES,
    Field.Index.NO, Field.TermVector.NO)); writerlucene.addDocument(doc1); writerlucene.optimize();
    writerlucene.close(); // 索引
    搜索
      

  4.   

    我觉得你调用了两次addDocument,你试试每次添加一个docment
      

  5.   

    庖丁不适用Lucene3.x以后的分词,用mmseg4j吧!!!