import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;public class IndexerHelper { public enum CreateMode {
CREATE, UPDATE
} /**
* 描述:生成索引文件
*
* @param args 命令行指令
* "-index/-i",以下为索引存储目录
* "-source/-s",以下为索引源目录
* "-update/-u",更新操作 <BR/>
* @return Usage: java ConstructIndexer [-index/-i] "c:\index" [-source/s] "c:\source" [[-update/-u] / isNull]
* @see java.lang.String.equalsIgnoreCase()
* @throws IOException
*/
public static void create(String[] args) {
if (args == null || args.length >= 4) {
System.out.println("Usage:java ConstructIndexer [-index/-i] \"c:\\index\" [-source/s] \"c:\\source\" [[-update/-u] / isNull]");
System.exit(1);
}
String iDir = null, sDir = null;
boolean update = false;
for (int a = 0; a < args.length; a++) {
if ("-index".equalsIgnoreCase(args[a])) {
iDir = args[a + 1];
}
else if ("-source".equalsIgnoreCase(args[a])) {
sDir = args[a + 1];
}
else if ("-update".equalsIgnoreCase(args[a])) {
update = true;
}
}
File idx = new File(iDir);
if (!idx.exists() || !idx.isDirectory()) {
System.out.println("Index directory is not exist.");
System.exit(1);// by exception interrupt
}
Directory directory;
try {
directory = FSDirectory.open(idx);
// open the index directory
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
config.setOpenMode(update ? OpenMode.CREATE_OR_APPEND : OpenMode.CREATE);
IndexWriter writer = new IndexWriter(directory, config);
addDocs(writer, sDir);
writer.commit();
writer.close();
}
catch (IOException e) {
e.printStackTrace();
}
} public static void create(String iDir, String sDir, CreateMode mode) {
if (iDir == null || sDir == null || "".equals(iDir.trim()) || "".equals(sDir.trim())) {
return;
}
File idx = new File(iDir);
if (!idx.exists() || !idx.isDirectory()) {
System.out.println("Index directory is not exist.");
}
Directory directory;
try {
directory = FSDirectory.open(idx);
// open the index directory
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
IndexWriter writer = new IndexWriter(directory, config);
addDocs(writer, sDir);
writer.commit();
writer.close();
}
catch (IOException e) {
e.printStackTrace();
}
} public static void addDocs(IndexWriter writer, String sDir) {
File source = new File(sDir);
if (!source.exists()) {
System.out.println("Source directory is not exist.");
}
File[] files = source.listFiles();
if (files.length > 0) {
int f = 0;
for (;;) {
if (files[f].isDirectory()) {
addDocs(writer, sDir);
}
else {
if (files[f].getName().toLowerCase().endsWith(".txt"))
try {
addFileDocs(writer, files[f]);
}
catch (IOException e) {
e.printStackTrace();
}
}
f++;
if (f == files.length) {
break;
}
}
}
} public static void addFileDocs(IndexWriter writer, File sFile) throws IOException {
if (!sFile.canRead()) {
System.out.println(sFile.getCanonicalPath() + " can not Read.");
}
Document document = new Document();
FileInputStream fis = new FileInputStream(sFile);
Field field = new StringField("path", sFile.getPath(), Store.YES);
document.add(field);
field = new LongField("lastModified", sFile.lastModified(), Store.YES);
document.add(field);
field = new TextField("contents", new BufferedReader(new InputStreamReader(fis, "utf-8")));
document.add(field);
OpenMode mode = writer.getConfig().getOpenMode();
if (mode == OpenMode.CREATE) {
// Add
System.out.println("Create index:" + sFile.getPath());
writer.addDocument(document);
}
else if (mode == OpenMode.CREATE_OR_APPEND) {
// Update
System.out.println("Update index:" + sFile.getPath());
writer.updateDocument(new Term("filePath", sFile.getPath()), document);
}
fis.close();
} public static void main(String[] args) {
if (args != null && args.length > 3) {
create(args);
}
else { create("f:/test/idx", "f:/test/source", CreateMode.UPDATE); }
}
}
jars:lucene-core-4.0.0.jar,lucene-analyzers-common-4.0.0.jar
1.update时仍重复创建索引问题?
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;public class IndexerHelper { public enum CreateMode {
CREATE, UPDATE
} /**
* 描述:生成索引文件
*
* @param args 命令行指令
* "-index/-i",以下为索引存储目录
* "-source/-s",以下为索引源目录
* "-update/-u",更新操作 <BR/>
* @return Usage: java ConstructIndexer [-index/-i] "c:\index" [-source/s] "c:\source" [[-update/-u] / isNull]
* @see java.lang.String.equalsIgnoreCase()
* @throws IOException
*/
public static void create(String[] args) {
if (args == null || args.length >= 4) {
System.out.println("Usage:java ConstructIndexer [-index/-i] \"c:\\index\" [-source/s] \"c:\\source\" [[-update/-u] / isNull]");
System.exit(1);
}
String iDir = null, sDir = null;
boolean update = false;
for (int a = 0; a < args.length; a++) {
if ("-index".equalsIgnoreCase(args[a])) {
iDir = args[a + 1];
}
else if ("-source".equalsIgnoreCase(args[a])) {
sDir = args[a + 1];
}
else if ("-update".equalsIgnoreCase(args[a])) {
update = true;
}
}
File idx = new File(iDir);
if (!idx.exists() || !idx.isDirectory()) {
System.out.println("Index directory is not exist.");
System.exit(1);// by exception interrupt
}
Directory directory;
try {
directory = FSDirectory.open(idx);
// open the index directory
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
config.setOpenMode(update ? OpenMode.CREATE_OR_APPEND : OpenMode.CREATE);
IndexWriter writer = new IndexWriter(directory, config);
addDocs(writer, sDir);
writer.commit();
writer.close();
}
catch (IOException e) {
e.printStackTrace();
}
} public static void create(String iDir, String sDir, CreateMode mode) {
if (iDir == null || sDir == null || "".equals(iDir.trim()) || "".equals(sDir.trim())) {
return;
}
File idx = new File(iDir);
if (!idx.exists() || !idx.isDirectory()) {
System.out.println("Index directory is not exist.");
}
Directory directory;
try {
directory = FSDirectory.open(idx);
// open the index directory
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
IndexWriter writer = new IndexWriter(directory, config);
addDocs(writer, sDir);
writer.commit();
writer.close();
}
catch (IOException e) {
e.printStackTrace();
}
} public static void addDocs(IndexWriter writer, String sDir) {
File source = new File(sDir);
if (!source.exists()) {
System.out.println("Source directory is not exist.");
}
File[] files = source.listFiles();
if (files.length > 0) {
int f = 0;
for (;;) {
if (files[f].isDirectory()) {
addDocs(writer, sDir);
}
else {
if (files[f].getName().toLowerCase().endsWith(".txt"))
try {
addFileDocs(writer, files[f]);
}
catch (IOException e) {
e.printStackTrace();
}
}
f++;
if (f == files.length) {
break;
}
}
}
} public static void addFileDocs(IndexWriter writer, File sFile) throws IOException {
if (!sFile.canRead()) {
System.out.println(sFile.getCanonicalPath() + " can not Read.");
}
Document document = new Document();
FileInputStream fis = new FileInputStream(sFile);
Field field = new StringField("path", sFile.getPath(), Store.YES);
document.add(field);
field = new LongField("lastModified", sFile.lastModified(), Store.YES);
document.add(field);
field = new TextField("contents", new BufferedReader(new InputStreamReader(fis, "utf-8")));
document.add(field);
OpenMode mode = writer.getConfig().getOpenMode();
if (mode == OpenMode.CREATE) {
// Add
System.out.println("Create index:" + sFile.getPath());
writer.addDocument(document);
}
else if (mode == OpenMode.CREATE_OR_APPEND) {
// Update
System.out.println("Update index:" + sFile.getPath());
writer.updateDocument(new Term("filePath", sFile.getPath()), document);
}
fis.close();
} public static void main(String[] args) {
if (args != null && args.length > 3) {
create(args);
}
else { create("f:/test/idx", "f:/test/source", CreateMode.UPDATE); }
}
}
jars:lucene-core-4.0.0.jar,lucene-analyzers-common-4.0.0.jar
1.update时仍重复创建索引问题?
解决方案 »
免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货