public class TestLucene{
public static void main(String[] args) throws Exception{
//indexDir is the directory that hosts Lucene's index files
File indexDir = new File("D:\\luceneIndex");
//dataDir is the directory that hosts the text files that to be indexed
File dataDir = new File("D:\\luceneData");
Analyzer luceneAnalyzer = new StandardAnalyzer();
File[] dataFiles = dataDir.listFiles();
IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
long startTime = new Date().getTime();
Document document = new Document();
for(int i = 0; i < dataFiles.length; i++){
if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());
Reader txtReader = new FileReader(dataFiles[i]);
document.add(Field.Text("path",dataFiles[i].getCanonicalPath()));
document.add(Field.Text("contents",txtReader));
indexWriter.addDocument(document);
}
}
indexWriter.optimize();
indexWriter.close();
long endTime = new Date().getTime();
System.out.println("It takes " + (endTime - startTime)
+ " milliseconds to create index for the files in directory "
+ dataDir.getPath());
}
}
此类为创建索引public class TxtLuceneFileSearcher {
public static void main(String[] args) throws Exception{
String queryStr = "from";
//This is the directory that hosts the Lucene index
File indexDir = new File("D:\\luceneIndex");
FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
IndexSearcher searcher = new IndexSearcher(directory);
if(!indexDir.exists()){
System.out.println("The Lucene index is not exist");
return;
}
Term term = new Term("contents",queryStr.toLowerCase());
TermQuery luceneQuery = new TermQuery(term);
Hits hits = searcher.search(luceneQuery);
System.out.println(hits.length());
for(int i = 0; i < hits.length(); i++){
Document document = hits.doc(i);
System.out.println("File: " + document.get("path"));
System.out.println("File: " + document.get("contents"));
}
}
}
此类打印出搜索到的文档的路径
1 打印出来的文档路径不是按照搜索关键字出现次数的大小排的序啊!
2 我搜索关键字“to” "the" 等都找不到。文本里绝对有这些个关键字的啊!!!
3 System.out.println("File: " + document.get("contents"));这句话应该是打印出来 文档吧,可是总显示null
高手给指点一下吧。有好的lucene教程给推荐一下啊
public static void main(String[] args) throws Exception{
//indexDir is the directory that hosts Lucene's index files
File indexDir = new File("D:\\luceneIndex");
//dataDir is the directory that hosts the text files that to be indexed
File dataDir = new File("D:\\luceneData");
Analyzer luceneAnalyzer = new StandardAnalyzer();
File[] dataFiles = dataDir.listFiles();
IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
long startTime = new Date().getTime();
Document document = new Document();
for(int i = 0; i < dataFiles.length; i++){
if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());
Reader txtReader = new FileReader(dataFiles[i]);
document.add(Field.Text("path",dataFiles[i].getCanonicalPath()));
document.add(Field.Text("contents",txtReader));
indexWriter.addDocument(document);
}
}
indexWriter.optimize();
indexWriter.close();
long endTime = new Date().getTime();
System.out.println("It takes " + (endTime - startTime)
+ " milliseconds to create index for the files in directory "
+ dataDir.getPath());
}
}
此类为创建索引public class TxtLuceneFileSearcher {
public static void main(String[] args) throws Exception{
String queryStr = "from";
//This is the directory that hosts the Lucene index
File indexDir = new File("D:\\luceneIndex");
FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
IndexSearcher searcher = new IndexSearcher(directory);
if(!indexDir.exists()){
System.out.println("The Lucene index is not exist");
return;
}
Term term = new Term("contents",queryStr.toLowerCase());
TermQuery luceneQuery = new TermQuery(term);
Hits hits = searcher.search(luceneQuery);
System.out.println(hits.length());
for(int i = 0; i < hits.length(); i++){
Document document = hits.doc(i);
System.out.println("File: " + document.get("path"));
System.out.println("File: " + document.get("contents"));
}
}
}
此类打印出搜索到的文档的路径
1 打印出来的文档路径不是按照搜索关键字出现次数的大小排的序啊!
2 我搜索关键字“to” "the" 等都找不到。文本里绝对有这些个关键字的啊!!!
3 System.out.println("File: " + document.get("contents"));这句话应该是打印出来 文档吧,可是总显示null
高手给指点一下吧。有好的lucene教程给推荐一下啊
解决方案 »
免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货