全文检索为什么出错? private String indexPath = "c://"; private String filePath = "e://"; private String queryStr = "doc";记住全部要// 不能是单斜杆 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 c://?是c:\\吧不过好像c:/也可以啊 是因为你在执行的时候,lucence的索引库还没有建成吧,这样的话你怎么查询呢?有几个缺省的文件你看在不在,write.lock;segment;deletable还有一些以“_”开头的文件,是实际存放索引的文件。 对了,你有没有看过车东的那篇文章,可以去他的主页上找找,他有一个lucence的opensource;网址好像是www.chedong.com/lucence/;找找看。解决了告诉我如何建一个空的索引库,嗬嗬,我也一直没有解决,不过我有一个很小的库,每次就在这个上面建,不会出错,但是指定一个空的目录不行,而且我还没有彻底弄明白那个三文件的格式和确切意义。 有个别的问题,你为什么用SimpleAnalyzer?这个对中文的支持很弱的,分词是根据标点符号切分的,这样的话你必须匹配至少一句话才能查询出来,做搜索的不用我教你吧,分词是很关键的。 我这里有个切分单字的,嗬嗬,基于词库的当然是不能给你了!package org.apache.lucene.analysis.cn;import java.io.*;import org.apache.lucene.analysis.*;public final class ChineseTokenizer extends Tokenizer { private int offset; private int bufferIndex; private int dataLen; private static final int MAX_WORD_LEN = 255; private static final int IO_BUFFER_SIZE = 1024; private final char buffer[] = new char[255]; private final char ioBuffer[] = new char[1024]; private int length; private int start; public ChineseTokenizer(Reader reader) { offset = 0; bufferIndex = 0; dataLen = 0; super.input = reader; } private final void push(char c) { if (length == 0) { start = offset - 1; } buffer[length++] = Character.toLowerCase(c); } private final void pop() { length--; } private final Token flush() { if (length > 0) { return new Token(new String(buffer, 0, length), start, start + length); } else { return null; } } public final Token next() throws IOException { length = 0; start = offset; Token token = null; while (token == null) { offset++; if (bufferIndex >= dataLen) { dataLen = super.input.read(ioBuffer); bufferIndex = 0; } if (dataLen == -1) { token = flush(); if (token == null || token.termText().length() <= 1) { return null; } break; } char c = ioBuffer[bufferIndex++]; switch (Character.getType(c)) { case Character.TITLECASE_LETTER: case Character.MODIFIER_LETTER: case Character.NON_SPACING_MARK: case Character.ENCLOSING_MARK: case Character.COMBINING_SPACING_MARK: default: if (length == 1) { pop(); } else if (length > 0) { token = flush(); } break; case Character.UPPERCASE_LETTER: case Character.LOWERCASE_LETTER: case Character.DECIMAL_DIGIT_NUMBER: if (length > 0 && Character.getType(buffer[length - 1]) == Character.OTHER_LETTER) { if (length > 1) { token = flush(); bufferIndex--; break; } else { pop(); } } push(c); if (length == 255) { token = flush(); } break; case Character.OTHER_LETTER: if (length > 0) { if (Character.getType(buffer[length - 1]) == Character.OTHER_LETTER) { push(c); token = flush(); bufferIndex--; } else { if (length > 1) { bufferIndex--; token = flush(); } else { pop(); push(c); } } } else { push(c); } break; } } /* String temp = token.termText(); try { System.out.println("index word: "+new String(temp.getBytes("GBK"))+" word length = "+temp.length()); } catch (Exception e) {} */ return token; }} 小白请求一下JSP 用session存数组问题 为什么要重写tostring()方法? 本人是超级菜鸟,刚刚学完J2SE,想学J2EE有啥好建议? JPA插入字节数组 急求EA6应用服务器+SQLSERVER2000的配置方式 spring 运行问题 很简单HQL JOIN 问题 求救,struts中的checkbox问题 20分求jboss下的一个文件:javax-servlet.jar 如何用JAVAMAIL实现收件箱,垃圾箱,草稿箱的功能啊? 请教,在linux做j2ee,用什么版本linux比较好,红旗的怎么样?? 能帮我解决这个问题的我给他168分...祝他一路发...
是c:\\吧
不过好像c:/也可以啊
有几个缺省的文件你看在不在,write.lock;segment;deletable还有一些以“_”开头的文件,是实际存放索引的文件。
package org.apache.lucene.analysis.cn;import java.io.*;import org.apache.lucene.analysis.*;public final class ChineseTokenizer
extends Tokenizer {
private int offset;
private int bufferIndex;
private int dataLen;
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 1024;
private final char buffer[] = new char[255];
private final char ioBuffer[] = new char[1024];
private int length;
private int start; public ChineseTokenizer(Reader reader) {
offset = 0;
bufferIndex = 0;
dataLen = 0;
super.input = reader;
} private final void push(char c) {
if (length == 0) {
start = offset - 1;
}
buffer[length++] = Character.toLowerCase(c);
} private final void pop() {
length--;
} private final Token flush() {
if (length > 0) {
return new Token(new String(buffer, 0, length), start,
start + length);
}
else {
return null;
}
} public final Token next() throws IOException {
length = 0;
start = offset;
Token token = null;
while (token == null) {
offset++;
if (bufferIndex >= dataLen) {
dataLen = super.input.read(ioBuffer);
bufferIndex = 0;
}
if (dataLen == -1) {
token = flush();
if (token == null || token.termText().length() <= 1) {
return null;
}
break;
}
char c = ioBuffer[bufferIndex++];
switch (Character.getType(c)) {
case Character.TITLECASE_LETTER:
case Character.MODIFIER_LETTER:
case Character.NON_SPACING_MARK:
case Character.ENCLOSING_MARK:
case Character.COMBINING_SPACING_MARK:
default:
if (length == 1) {
pop();
}
else if (length > 0) {
token = flush();
}
break; case Character.UPPERCASE_LETTER:
case Character.LOWERCASE_LETTER:
case Character.DECIMAL_DIGIT_NUMBER:
if (length > 0 &&
Character.getType(buffer[length - 1]) ==
Character.OTHER_LETTER) {
if (length > 1) {
token = flush();
bufferIndex--;
break;
}
else {
pop();
}
}
push(c);
if (length == 255) {
token = flush();
}
break; case Character.OTHER_LETTER:
if (length > 0) {
if (Character.getType(buffer[length - 1]) ==
Character.OTHER_LETTER) {
push(c);
token = flush();
bufferIndex--;
}
else {
if (length > 1) {
bufferIndex--;
token = flush();
}
else {
pop();
push(c);
}
}
}
else {
push(c);
}
break;
}
}
/*
String temp = token.termText();
try {
System.out.println("index word: "+new String(temp.getBytes("GBK"))+" word length = "+temp.length());
} catch (Exception e) {}
*/
return token;
}
}