我copy and change了这个建立索引的例子IndexFiles.java
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.db.DbDirectory;
import org.apache.lucene.store.je.JEDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Transaction;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;/** Index all text files under a directory. */
public class IndexFiles {
private IndexFiles() {}
private
static final File INDEX_DIR = new File("index");
/** Index all text files under a directory.
* @throws DatabaseException */
public static void main(String[] args) throws DatabaseException {
String usage = "java org.apache.lucene.demo.IndexFiles <root_directory>";
if (args.length == 0) {
System.err.println("Usage: " + usage);
System.exit(1);
}
final File docDir = new File(args[0]);
if (!docDir.exists() || !docDir.canRead()) {
System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
try {
Database index, blocks;
Environment env;
//设置环境
EnvironmentConfig envConfig = new EnvironmentConfig();
envConfig.setTransactional(true);
envConfig.setAllowCreate(true);
File indexF=new File(INDEX_DIR.getAbsolutePath()+"/je/index");
env = new Environment(indexF,envConfig);
DatabaseConfig dbConfig = new DatabaseConfig();
dbConfig.setAllowCreate(true);
dbConfig.setTransactional(true);
dbConfig.setSortedDuplicates(true);
Transaction tn = env.beginTransaction(null, null);
index = env.openDatabase(tn ,"_index_", dbConfig);
File indexB=new File(INDEX_DIR.getAbsolutePath()+"/je/block");
env = new Environment(indexB, envConfig);
blocks = env.openDatabase(tn,"_blocks_",dbConfig);
JEDirectory dir=new JEDirectory(null,index,blocks);
System.out.println("不是这的错!");
IndexWriter writer = new IndexWriter(dir, new IKAnalyzer(true), true, IndexWriter.MaxFieldLength.LIMITED);
System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
indexDocs(writer, docDir,dir);
System.out.println("Optimizing...");
dir.flush();
writer.optimize();
writer.close();
Date start = new Date();
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) {
System.out.println(e);
}
} static void indexDocs(IndexWriter writer, File file,JEDirectory dir)
throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]),dir);
}
}
} else {
System.out.println("adding " + file);
try {
writer.addDocument(FileDocument.Document(file,dir));
}
// at least on windows, some temporary files raise this exception with an "access denied" message
// checking if the file can be read doesn't help
catch (FileNotFoundException fnfe) {
;
}
}
}
}
}
FileDocument.java
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/import java.io.File;
import java.io.FileReader;import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.db.DbDirectory;
import org.apache.lucene.store.je.JEDirectory;/** A utility for making Lucene Documents from a File. */public class FileDocument {
/** Makes a document for a File.
<p>
The document has three fields:
<ul>
<li><code>path</code>--containing the pathname of the file, as a stored,
untokenized field;
<li><code>modified</code>--containing the last modified date of the file as
a field as created by <a
href="lucene.document.DateTools.html">DateTools</a>; and
<li><code>contents</code>--containing the full contents of the file, as a
Reader field;
*/
public static Document Document(File f,JEDirectory dir)
throws java.io.FileNotFoundException {
// make a new, empty document
Document doc = new Document(); // Add the path of the file as a field named "path". Use a field that is
// indexed (i.e. searchable), but don't tokenize the field into words.
doc.add(new Field("path", f.getPath(), Field.Store.NO, Field.Index.NOT_ANALYZED));
// Add the last modified date of the file a field named "modified". Use
// a field that is indexed (i.e. searchable), but don't tokenize the field
// into words.
doc.add(new Field("modified",
DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
Field.Store.NO, Field.Index.NOT_ANALYZED)); // Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the fichecksum mismatch in segments filele to be in the system's default encoding.
// If that's not the case searching for special characters will fail.
doc.add(new Field("contents", new FileReader(f))); // return the document
return doc;
} private FileDocument() {}
}
berkey db jar包没找到在lucene下载的地方自己下了个版本je-4.0.71.jar问什么我运行报错→org.apache.lucene.index.CorruptIndexException: checksum mismatch in segments file是indexwriter writer=new Indexwriter(..
这句错误!来个高手解惑
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.db.DbDirectory;
import org.apache.lucene.store.je.JEDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Transaction;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;/** Index all text files under a directory. */
public class IndexFiles {
private IndexFiles() {}
private
static final File INDEX_DIR = new File("index");
/** Index all text files under a directory.
* @throws DatabaseException */
public static void main(String[] args) throws DatabaseException {
String usage = "java org.apache.lucene.demo.IndexFiles <root_directory>";
if (args.length == 0) {
System.err.println("Usage: " + usage);
System.exit(1);
}
final File docDir = new File(args[0]);
if (!docDir.exists() || !docDir.canRead()) {
System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
try {
Database index, blocks;
Environment env;
//设置环境
EnvironmentConfig envConfig = new EnvironmentConfig();
envConfig.setTransactional(true);
envConfig.setAllowCreate(true);
File indexF=new File(INDEX_DIR.getAbsolutePath()+"/je/index");
env = new Environment(indexF,envConfig);
DatabaseConfig dbConfig = new DatabaseConfig();
dbConfig.setAllowCreate(true);
dbConfig.setTransactional(true);
dbConfig.setSortedDuplicates(true);
Transaction tn = env.beginTransaction(null, null);
index = env.openDatabase(tn ,"_index_", dbConfig);
File indexB=new File(INDEX_DIR.getAbsolutePath()+"/je/block");
env = new Environment(indexB, envConfig);
blocks = env.openDatabase(tn,"_blocks_",dbConfig);
JEDirectory dir=new JEDirectory(null,index,blocks);
System.out.println("不是这的错!");
IndexWriter writer = new IndexWriter(dir, new IKAnalyzer(true), true, IndexWriter.MaxFieldLength.LIMITED);
System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
indexDocs(writer, docDir,dir);
System.out.println("Optimizing...");
dir.flush();
writer.optimize();
writer.close();
Date start = new Date();
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) {
System.out.println(e);
}
} static void indexDocs(IndexWriter writer, File file,JEDirectory dir)
throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]),dir);
}
}
} else {
System.out.println("adding " + file);
try {
writer.addDocument(FileDocument.Document(file,dir));
}
// at least on windows, some temporary files raise this exception with an "access denied" message
// checking if the file can be read doesn't help
catch (FileNotFoundException fnfe) {
;
}
}
}
}
}
FileDocument.java
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/import java.io.File;
import java.io.FileReader;import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.db.DbDirectory;
import org.apache.lucene.store.je.JEDirectory;/** A utility for making Lucene Documents from a File. */public class FileDocument {
/** Makes a document for a File.
<p>
The document has three fields:
<ul>
<li><code>path</code>--containing the pathname of the file, as a stored,
untokenized field;
<li><code>modified</code>--containing the last modified date of the file as
a field as created by <a
href="lucene.document.DateTools.html">DateTools</a>; and
<li><code>contents</code>--containing the full contents of the file, as a
Reader field;
*/
public static Document Document(File f,JEDirectory dir)
throws java.io.FileNotFoundException {
// make a new, empty document
Document doc = new Document(); // Add the path of the file as a field named "path". Use a field that is
// indexed (i.e. searchable), but don't tokenize the field into words.
doc.add(new Field("path", f.getPath(), Field.Store.NO, Field.Index.NOT_ANALYZED));
// Add the last modified date of the file a field named "modified". Use
// a field that is indexed (i.e. searchable), but don't tokenize the field
// into words.
doc.add(new Field("modified",
DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
Field.Store.NO, Field.Index.NOT_ANALYZED)); // Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the fichecksum mismatch in segments filele to be in the system's default encoding.
// If that's not the case searching for special characters will fail.
doc.add(new Field("contents", new FileReader(f))); // return the document
return doc;
} private FileDocument() {}
}
berkey db jar包没找到在lucene下载的地方自己下了个版本je-4.0.71.jar问什么我运行报错→org.apache.lucene.index.CorruptIndexException: checksum mismatch in segments file是indexwriter writer=new Indexwriter(..
这句错误!来个高手解惑
解决方案 »
- 求一正则表达式写法
- Tomcat 启动时错误
- 我面临着抉择,需要你们的救助,我的前辈们,我的兄弟们!!
- struts下更新数据报错:违反了 PRIMARY KEY 约束 'PK_employees'。不能在对象 'employees' 中插入重复键
- 关于Xml的建树问题
- struts 中如何使用网页的gzip压缩?
- struts 中,多个html:submit 提交的问题??请高手指教!
- 请问,我在用Middlegen生成x.hbm.xml文件时,老是出现没有找到schema和catalog,怎么回事啊?
- 大家帮我看看weblogic 上这个数据库连接池的抱错是怎么回事?
- 在ejb中如果主键是自增列,应如何处理?
- 初学struts1,遇到的问题................
- 这个东西怎么设计数据库?
它的版本差别有点大。