Lucene太难学了 中文资料也没多少 请各位帮忙修改想用lucene实现
输入一个关键词
搜索所有路径的下文件内容(默认类型txt可选doc、pdf等) -api lucene tag
按照频度输出 存在这个单词的的文件 、关键词出现次数 、以及文件路径import java.util.Comparator;//map关键词排序的比较器
public class ComparatorImpl implements Comparator<Integer> { @Override
public int compare(Integer a, Integer b) {
// TODO Auto-generated method stub
return b-a;
}
}import java.io.File;//这个bean用于存放文件和关键词出现次数,然后将其加入一个list进行处理
public class FileBean {
File file;
int frequency;
public File getFile() {
return file;
}
public void setFile(File file) {
this.file = file;
}
public int getFrequency() {
return frequency;
}
public void setFrequency(int frequency) {
this.frequency = frequency;
}
}
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
public class FileSearch {
//存放索引文件
File indexDir = new File("C:\\IndexData");
Analyzer luceneAnalyzer;
IndexWriter indexWriter;
String keyWord;
public FileSearch(String queryStr, String type){
keyWord = queryStr;
for(char c='c';c<='z';c++){ //遍历所有硬盘 搜索多个硬盘时会出错,尽量只搜索一个硬盘
File file = new File(c+":");
System.out.println("building index on dir "+file.getAbsolutePath()+"...");
if(!file.isDirectory())
continue;
try {
luceneAnalyzer = new StandardAnalyzer();
indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
//递归调用createIndex以搜索所有文件
createIndex(file,type);
indexWriter.optimize();
indexWriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
private void createIndex(File file, String type) throws IOException {
// TODO Auto-generated method stub
File[] files = file.listFiles();
if(files!=null){
for(File obj : files){
if(obj.isDirectory()){
System.out.println(obj.getAbsolutePath()); //
createIndex(obj,type);
}else{
if(obj.getName().endsWith("."+type)){
System.out.println(obj.getAbsolutePath()); //
Document document = new Document();
//添加搜索域
Field pathField = new Field("path",obj.getAbsolutePath(),Field.Store.YES, Field.Index.NO);
Reader txtReader = new FileReader(obj);
document.add(new Field("contents",txtReader));
document.add(pathField);
indexWriter.addDocument(document);
}
}
}
}
}
public Map<Integer,List<FileBean>> getResult() throws IOException {
// TODO Auto-generated method stub
List<FileBean> list = new ArrayList<FileBean>();
FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
IndexSearcher searcher = new IndexSearcher(directory);
if(!indexDir.exists()){
System.out.println("The Lucene index is not exist");
return null;
}
Analyzer analyzer = new StandardAnalyzer();
Term term = new Term("contents",keyWord);
TermQuery luceneQuery = new TermQuery(term);
Hits hits = searcher.search(luceneQuery);
System.out.println(hits.length()); //
for(int i = 0; i < hits.length(); i++){
Document document = hits.doc(i);
System.out.println("score: "+hits.score(i)); //
String path = document.getField("path").stringValue();
File file = new File(path);
FileBean fileBean = new FileBean();
//计算关键词出现次数
int stat = calc(file,keyWord);
System.out.print(path+"\t"); //
System.out.println(stat+"\n"); //
fileBean.setFile(file);
fileBean.setFrequency(stat);
list.add(fileBean);
}
//排序并返回
return orderByTimes(list);
}
private Map<Integer,List<FileBean>> orderByTimes(List<FileBean> list) {
// TODO Auto-generated method stub
//该map以关键词频度为key,相同关键词频度的文件组成的list为object
Map<Integer,List<FileBean>> map = new TreeMap<Integer,List<FileBean>>(new ComparatorImpl());
for(FileBean fileBean : list){
int times = fileBean.getFrequency();
if(!map.containsKey(times)){
List<FileBean> _list = new ArrayList<FileBean>();
_list.add(fileBean);
map.put(times, _list);
}else{
map.get(times).add(fileBean);
}
}
return map;
}
private int calc(File file,String keyWord) throws IOException {
// TODO Auto-generated method stub
Reader reader = new FileReader(file);
char data[] = new char[(int) file.length()];
int length = reader.read(data);
if(length!=file.length())
return -1;
String datum = new String(data,0,data.length);
int times = 0;
for(int i = 0;i<length;){ //循环计算关键词出现次数
int indexNum = datum.indexOf(keyWord, i);
if(indexNum!=-1){
times++;
i+=indexNum + keyWord.length();
}else
return 0;
}
return times;
}
}import java.awt.*;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.io.File;
import java.io.IOException;
import java.util.*;
import javax.swing.*;
import java.awt.event.*;//界面层,以已写好的原来的那个文件修改而来
public class FileSearchGUI extends JPanel{
static private final String newline = "\n"; //关键词输入框
JTextField keyWord;
JButton search;
JButton clear;
JLabel lab;
//文件类型
JComboBox typeSelete;
String type = "txt";; //内容输出区
JTextArea result;
File file ;
JFileChooser fc;
public FileSearchGUI(){
super(new BorderLayout());
result = new JTextArea(15,50);
result.setMargin(new Insets(5,5,5,5));
result.setEditable(false);
JScrollPane resultScrollPane = new JScrollPane(result);
fc = new JFileChooser();
keyWord = new JTextField(20);
search = new JButton("开始查找");
clear = new JButton("清除");
lab = new JLabel(" key word"); //文件类型选择
typeSelete=new JComboBox(new Object[] {"txt","doc","pdf"});
typeSelete.addItemListener(new ItemListener(){
public void itemStateChanged(ItemEvent e){
typeSelete=(JComboBox)e.getSource();
type=(String)typeSelete.getSelectedItem();
}
});
//事件监听器
MyActionListener myListener = new MyActionListener();
search.addActionListener(myListener);
clear.addActionListener(myListener); JPanel searchPanel = new JPanel(new GridLayout(2,2,1,1));
searchPanel.add(keyWord);
searchPanel.add(lab);
searchPanel.add(typeSelete);
searchPanel.add(search);
searchPanel.add(clear);
add(searchPanel,BorderLayout.CENTER);
add(resultScrollPane,BorderLayout.SOUTH);
}
private static void createAndShowGUI(){
JFrame.setDefaultLookAndFeelDecorated(true);
JDialog.setDefaultLookAndFeelDecorated(true);
JFrame frame = new JFrame("SwingFileChooserDemo");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
JComponent newContentPane = new FileSearchGUI();
newContentPane.setOpaque(true);
frame.setContentPane(newContentPane);
frame.setBounds(300,200,400,800);
frame.pack();
frame.setVisible(true);
}
//事件监听器
class MyActionListener implements ActionListener{
public void actionPerformed(ActionEvent e){
if(e.getSource() == search){
String searchStr = keyWord.getText();
try {
int data = 0;
result.setText("Searching work completed!\n");
Map<Integer,List<FileBean>> map = new FileSearch(searchStr,type).getResult();
Set<Integer> set = map.keySet();
Iterator<Integer> it = set.iterator();
while(it.hasNext()){
List<FileBean> list = map.get(it.next());
for(FileBean fileBean : list){
result.append("FILE: "+fileBean.getFile().getName()+"\tKeyWord contained: "+fileBean.getFrequency()+"\t"+fileBean.getFile().getAbsolutePath()+"\n");
data++;
}
}
result.append(data+" results!");
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}else if(e.getSource() == clear){
result.setText("");
}
}
}
//main method
public static void main(String[] args){
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run(){
createAndShowGUI();
}
});
}
}lucene.jar下载
http://download.csdn.net/source/1824370
输入一个关键词
搜索所有路径的下文件内容(默认类型txt可选doc、pdf等) -api lucene tag
按照频度输出 存在这个单词的的文件 、关键词出现次数 、以及文件路径import java.util.Comparator;//map关键词排序的比较器
public class ComparatorImpl implements Comparator<Integer> { @Override
public int compare(Integer a, Integer b) {
// TODO Auto-generated method stub
return b-a;
}
}import java.io.File;//这个bean用于存放文件和关键词出现次数,然后将其加入一个list进行处理
public class FileBean {
File file;
int frequency;
public File getFile() {
return file;
}
public void setFile(File file) {
this.file = file;
}
public int getFrequency() {
return frequency;
}
public void setFrequency(int frequency) {
this.frequency = frequency;
}
}
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
public class FileSearch {
//存放索引文件
File indexDir = new File("C:\\IndexData");
Analyzer luceneAnalyzer;
IndexWriter indexWriter;
String keyWord;
public FileSearch(String queryStr, String type){
keyWord = queryStr;
for(char c='c';c<='z';c++){ //遍历所有硬盘 搜索多个硬盘时会出错,尽量只搜索一个硬盘
File file = new File(c+":");
System.out.println("building index on dir "+file.getAbsolutePath()+"...");
if(!file.isDirectory())
continue;
try {
luceneAnalyzer = new StandardAnalyzer();
indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
//递归调用createIndex以搜索所有文件
createIndex(file,type);
indexWriter.optimize();
indexWriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
private void createIndex(File file, String type) throws IOException {
// TODO Auto-generated method stub
File[] files = file.listFiles();
if(files!=null){
for(File obj : files){
if(obj.isDirectory()){
System.out.println(obj.getAbsolutePath()); //
createIndex(obj,type);
}else{
if(obj.getName().endsWith("."+type)){
System.out.println(obj.getAbsolutePath()); //
Document document = new Document();
//添加搜索域
Field pathField = new Field("path",obj.getAbsolutePath(),Field.Store.YES, Field.Index.NO);
Reader txtReader = new FileReader(obj);
document.add(new Field("contents",txtReader));
document.add(pathField);
indexWriter.addDocument(document);
}
}
}
}
}
public Map<Integer,List<FileBean>> getResult() throws IOException {
// TODO Auto-generated method stub
List<FileBean> list = new ArrayList<FileBean>();
FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
IndexSearcher searcher = new IndexSearcher(directory);
if(!indexDir.exists()){
System.out.println("The Lucene index is not exist");
return null;
}
Analyzer analyzer = new StandardAnalyzer();
Term term = new Term("contents",keyWord);
TermQuery luceneQuery = new TermQuery(term);
Hits hits = searcher.search(luceneQuery);
System.out.println(hits.length()); //
for(int i = 0; i < hits.length(); i++){
Document document = hits.doc(i);
System.out.println("score: "+hits.score(i)); //
String path = document.getField("path").stringValue();
File file = new File(path);
FileBean fileBean = new FileBean();
//计算关键词出现次数
int stat = calc(file,keyWord);
System.out.print(path+"\t"); //
System.out.println(stat+"\n"); //
fileBean.setFile(file);
fileBean.setFrequency(stat);
list.add(fileBean);
}
//排序并返回
return orderByTimes(list);
}
private Map<Integer,List<FileBean>> orderByTimes(List<FileBean> list) {
// TODO Auto-generated method stub
//该map以关键词频度为key,相同关键词频度的文件组成的list为object
Map<Integer,List<FileBean>> map = new TreeMap<Integer,List<FileBean>>(new ComparatorImpl());
for(FileBean fileBean : list){
int times = fileBean.getFrequency();
if(!map.containsKey(times)){
List<FileBean> _list = new ArrayList<FileBean>();
_list.add(fileBean);
map.put(times, _list);
}else{
map.get(times).add(fileBean);
}
}
return map;
}
private int calc(File file,String keyWord) throws IOException {
// TODO Auto-generated method stub
Reader reader = new FileReader(file);
char data[] = new char[(int) file.length()];
int length = reader.read(data);
if(length!=file.length())
return -1;
String datum = new String(data,0,data.length);
int times = 0;
for(int i = 0;i<length;){ //循环计算关键词出现次数
int indexNum = datum.indexOf(keyWord, i);
if(indexNum!=-1){
times++;
i+=indexNum + keyWord.length();
}else
return 0;
}
return times;
}
}import java.awt.*;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.io.File;
import java.io.IOException;
import java.util.*;
import javax.swing.*;
import java.awt.event.*;//界面层,以已写好的原来的那个文件修改而来
public class FileSearchGUI extends JPanel{
static private final String newline = "\n"; //关键词输入框
JTextField keyWord;
JButton search;
JButton clear;
JLabel lab;
//文件类型
JComboBox typeSelete;
String type = "txt";; //内容输出区
JTextArea result;
File file ;
JFileChooser fc;
public FileSearchGUI(){
super(new BorderLayout());
result = new JTextArea(15,50);
result.setMargin(new Insets(5,5,5,5));
result.setEditable(false);
JScrollPane resultScrollPane = new JScrollPane(result);
fc = new JFileChooser();
keyWord = new JTextField(20);
search = new JButton("开始查找");
clear = new JButton("清除");
lab = new JLabel(" key word"); //文件类型选择
typeSelete=new JComboBox(new Object[] {"txt","doc","pdf"});
typeSelete.addItemListener(new ItemListener(){
public void itemStateChanged(ItemEvent e){
typeSelete=(JComboBox)e.getSource();
type=(String)typeSelete.getSelectedItem();
}
});
//事件监听器
MyActionListener myListener = new MyActionListener();
search.addActionListener(myListener);
clear.addActionListener(myListener); JPanel searchPanel = new JPanel(new GridLayout(2,2,1,1));
searchPanel.add(keyWord);
searchPanel.add(lab);
searchPanel.add(typeSelete);
searchPanel.add(search);
searchPanel.add(clear);
add(searchPanel,BorderLayout.CENTER);
add(resultScrollPane,BorderLayout.SOUTH);
}
private static void createAndShowGUI(){
JFrame.setDefaultLookAndFeelDecorated(true);
JDialog.setDefaultLookAndFeelDecorated(true);
JFrame frame = new JFrame("SwingFileChooserDemo");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
JComponent newContentPane = new FileSearchGUI();
newContentPane.setOpaque(true);
frame.setContentPane(newContentPane);
frame.setBounds(300,200,400,800);
frame.pack();
frame.setVisible(true);
}
//事件监听器
class MyActionListener implements ActionListener{
public void actionPerformed(ActionEvent e){
if(e.getSource() == search){
String searchStr = keyWord.getText();
try {
int data = 0;
result.setText("Searching work completed!\n");
Map<Integer,List<FileBean>> map = new FileSearch(searchStr,type).getResult();
Set<Integer> set = map.keySet();
Iterator<Integer> it = set.iterator();
while(it.hasNext()){
List<FileBean> list = map.get(it.next());
for(FileBean fileBean : list){
result.append("FILE: "+fileBean.getFile().getName()+"\tKeyWord contained: "+fileBean.getFrequency()+"\t"+fileBean.getFile().getAbsolutePath()+"\n");
data++;
}
}
result.append(data+" results!");
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}else if(e.getSource() == clear){
result.setText("");
}
}
}
//main method
public static void main(String[] args){
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run(){
createAndShowGUI();
}
});
}
}lucene.jar下载
http://download.csdn.net/source/1824370
给你份中文资料