Lucene SpanNotQuery的问题

初学Lucene,用的是Lucene2.3.2
以下是我的代码 import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;

public class SpanNotQueryTest {
    RAMDirectory dir = new RAMDirectory();

    public Document getDoc(String type,String content){
        Document doc = new Document();
        doc.add(new Field(type, content,Field.Store.YES,Field.Index.TOKENIZED));
        return doc;
    }

    public void makeIndex(){
        try {
            IndexWriter writer = new IndexWriter(dir,new StandardAnalyzer(),true);
            writer.addDocument(getDoc("content", "aa bb cc dd ee ff gg hh ii jj kk"));
            //indexWriter.addDocument(getDoc("content", "bb aa cc dd ee ff gg ii jj kk"));
            writer.addDocument(getDoc("content", "gg hh aa jj kk"));
            writer.addDocument(getDoc("content", "gg hh ii jj kk"));
            writer.addDocument(getDoc("content", "aa bb cc"));
            writer.addDocument(getDoc("content", "aa bb cc dd ee"));
            writer.addDocument(getDoc("content", "ff gg aa hh ii jj kk"));
            //indexWriter.addDocument(getDoc("content", "ff gg bb aa hh jj kk"));
            writer.close();
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void spanNotQuery(){
        try {
            IndexSearcher searcher = new IndexSearcher(dir);
            /*SpanQuery include=new SpanNearQuery(new SpanQuery[]{
                new SpanTermQuery(new Term("content","aa")),
                new SpanTermQuery(new Term("content","cc"))
            },1,false);*/
            SpanQuery include=new SpanFirstQuery(
                    new SpanTermQuery(new Term("content","aa")),3);
            SpanQuery exclude=new SpanNearQuery(new SpanQuery[]{
                new SpanTermQuery(new Term("content","jj")),
                new SpanTermQuery(new Term("content","ff"))
            },4,false);
            SpanNotQuery query = new SpanNotQuery(include,exclude);
            Hits hits = searcher.search(query);
            System.out.println("All:");printHits(hits);
            hits = searcher.search(query.getInclude());
            System.out.println("Inlcude:");printHits(hits);
            hits = searcher.search(query.getExclude());
            System.out.println("Exclude:");printHits(hits);
        }catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void printHits(Hits hits) throws Exception{
        for (int i = 0,j=hits.length(); i < j; i++) {
            System.out.println(hits.doc(i));
        }
    }

    public static void main(String[] args) {
        SpanNotQueryTest query=new SpanNotQueryTest();
        query.makeIndex();
        query.spanNotQuery();
    }

}  输出结果All:
Document<stored/uncompressed,indexed,tokenized<content:aa bb cc>>
Document<stored/uncompressed,indexed,tokenized<content:gg hh aa jj kk>>
Document<stored/uncompressed,indexed,tokenized<content:aa bb cc dd ee>>
Document<stored/uncompressed,indexed,tokenized<content:aa bb cc dd ee ff gg hh ii jj kk>>
Inlcude:
Document<stored/uncompressed,indexed,tokenized<content:aa bb cc>>
Document<stored/uncompressed,indexed,tokenized<content:gg hh aa jj kk>>
Document<stored/uncompressed,indexed,tokenized<content:aa bb cc dd ee>>
Document<stored/uncompressed,indexed,tokenized<content:ff gg aa hh ii jj kk>>
Document<stored/uncompressed,indexed,tokenized<content:aa bb cc dd ee ff gg hh ii jj kk>>
Exclude:
Document<stored/uncompressed,indexed,tokenized<content:ff gg aa hh ii jj kk>>
Document<stored/uncompressed,indexed,tokenized<content:aa bb cc dd ee ff gg hh ii jj kk>>我的理解是从include中去掉和exclude相交部分
为什么include和exclude中都有
ff gg aa hh ii jj kk
aa bb cc dd ee ff gg hh ii jj kk
最后结果中还有 aa bb cc dd ee ff gg hh ii jj kk 实在不懂这是为什么

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货