我现在做的只能对纯文字文件进行索引,如何对doc或docx进行内容索引?
下面这段换成doc就不行了 IndexWriter writer = null; 
                        Document doc = new Document(); 
writer = new IndexWriter(@"E:\ss\i", new StandardAnalyzer(), true);  StreamReader sr = new StreamReader(@"E:\ss\111.txt", System.Text.Encoding.GetEncoding("gb2312"));
doc.Add(new Field(@"E:\ss\111.txt", "name", Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.Add(new Field("content", sr.ReadToEnd().ToString(), Field.Store.YES, Field.Index.TOKENIZED));
sr.Close();
doc.Add(Field.UnIndexed("filename", "111.txt")); 
writer.AddDocument(doc);
writer.Optimize();
writer.Close();

解决方案 »

  1.   

    Lucene.Net 哎,没接触过。。传说中比全文索引快的东西~
      

  2.   

    public void CreateMarketIndex(DataSet dsMarketResult)
            {
                try
                {
                    IndexWriter writer = new IndexWriter(indexpath, getAnalyzer(), true);
                    writer.mergeFactor = 3000;
                    writer.minMergeDocs = 3000;
                    writer.maxMergeDocs = int.MaxValue;
                    for (int i = 0; i < dsMarketResult.Tables[0].Rows.Count; i++)
                    {
                        Document doc = new Document();
                        doc.Add(Field.Keyword("id", dsMarketResult.Tables[0].Rows[i][0].ToString()));
                        doc.Add(Field.UnIndexed("title", ClassIndex.RemoveHTML(dsMarketResult.Tables[0].Rows[i][1].ToString())));
                        string content = dsMarketResult.Tables[0].Rows[i][1].ToString();
                        //content += dsMarketResult.Tables[0].Rows[i][2].ToString();
                        doc.Add(Field.Text("content", ClassIndex.RemoveHTML(content)));
                        writer.AddDocument(doc);
                    }
                    writer.Optimize();
                    writer.Close();
                }
                catch(Exception exp2)
                {
                    System.IO.StreamWriter write = new System.IO.StreamWriter(@"d:\index\createindexlog.txt", true, System.Text.Encoding.Unicode, 10240);
                    write.WriteLine("记录时间:" + DateTime.Now.ToString());
                    write.Write(exp2.ToString());
                    write.Flush();
                    write.Close();
                }
            }
      

  3.   

    4楼的dsMarketResult哪来的,如何和doc、xls文件结合呀
      

  4.   

    doc、xls先要读出里面的文本,不能直接对文件建索引的.