如何提取出doc或docx中的纯文字?或xls中的表格?

解决方案 »

  1.   

    我知道一个微软提供的VS2005的OFFice编程插件,找找吧,
      

  2.   

    http://blog.csdn.net/46539492/archive/2008/05/20/2462334.aspx
      

  3.   

    doc 可以调用一个接口,另存为txt 或htm然后再处理比较方便。docx 就很容易了,你改个后缀名成 zip 然后打开看看就知道其中玄机了
    你可以用.Net自带的 GZipStream相关的类来把docx解压成一群XML,然后里面就有你要的任何东西了
      

  4.   


    <%@ Page Language="C#" AutoEventWireup="true" CodeFile="test2.aspx.cs" Inherits="TKKMTest_test2" %><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml" >
    <head runat="server">
        <title>无标题页</title>
    </head>
    <body>
        <form id="form1" runat="server">
        <div>
             请选择您需要的文件:<input type="file" id="fileUpload" runat="server"  /> 
        <asp:Button ID="btnOpen" runat="server" Text="打开文本" OnClick="btnOpen_Click" />
        <div id="divTest" runat="server" style="padding:5px;float:left;width:80%;height:500px;overflow-x:hidden;overflow-y:scroll;border-top:inset 1px Menu;border-left:inset 1px Menu;border-bottom:outset 1px 
    Menu;border-right:outset 1px Menu;font-size:12pt;" ContentEditable= "true"></div>
        </div>
        </form>
    </body>
    </html>
      

  5.   


    using System;
    using System.Data;
    using System.Configuration;
    using System.Collections;
    using System.Web;
    using System.Web.Security;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Web.UI.WebControls.WebParts;
    using System.Web.UI.HtmlControls;
    using Word = Microsoft.Office.Interop.Word;
    using System.Windows.Forms;public partial class TKKMTest_test2 : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {
           
        }
        #region 获取word文档中的内容并显示在div中    protected void btnOpen_Click(object sender, EventArgs e)
        {
            if (fileUpload.PostedFile.ContentLength != 0)
            {
                if (fileUpload.PostedFile.ContentType != "application/msword")
                {
                    InterService.Utility.Utils.ShowMessage("文件的类型不对,请重新选择!");
                }
                else
                {
                    Object Nothing = System.Reflection.Missing.Value;
                    //取得Word文件保存路径
                    object filename = fileUpload.PostedFile.FileName;
                    //创建一个名为WordApp的组件对象                Word.Application WordApp = new Word.ApplicationClass();
                    bool wordvisible = false;
                    object wrdvisible = wordvisible;
                    object missing = System.Reflection.Missing.Value;
                    Word.Document document = null;
                    try
                    {                    document = WordApp.Documents.Open(ref filename, ref missing, ref missing, ref missing, ref missing
                                , ref missing, ref missing, ref missing, ref missing
                                , ref missing, ref missing, ref wrdvisible, ref missing
                                , ref missing, ref missing, ref missing);
                        divTest.InnerHtml = document.Content.Text;
                        if (String.IsNullOrEmpty(divTest.InnerHtml))
                        {
                            InterService.Utility.Utils.ShowMessage("导入文本试题失败!");
                            return;
                        }
                    }
                    catch
                    {
                    }
                    finally
                    {
                        if (document != null)
                        {
                            document.Close(ref missing, ref missing, ref missing);
                        }
                        if (WordApp != null)
                        {
                            WordApp.Quit(ref missing, ref missing, ref missing);                    }
                    }
                }
            }
            else
            {
                InterService.Utility.Utils.ShowMessage("文件名不能为空,请重新选择!");
            }
        }
       
        #endregion
    }