我给出读取word文件的代码,保存到txt你自己写一下:
<%@ page contentType="text/html;charset=gb2312" import="java.io.*,org.textmining.text.extraction.WordExtractor,java.text.*,java.util.*,java.lang.*"%>
<%@ include file="global.jsp" %>
<%@ include file="TaxConn.jsp" %>
<html>
<head >
<title>文件测试</title>
<META http-equiv=Content-Type content="text/html; charset=GBK">
</head>
<body>
  <%
//取得目录,注意路径格式\\
String path="d:\\test"; 
File d = new File(path);
File list[] = d.listFiles();
String url;
//out.println("在目录"+path+"下的文件有:<ul>");
for(int i =0;i<list.length;i++){
//url=URLEncode(list[i].getName());
// out.println("<a href='http://localhost:8080/note/"+d+"/"+url+"'>"+list[i].getName()+"<br>");
 File file=new File(path+"\\"+list[i].getName());
 FileInputStream fins=new FileInputStream(file);
String filename=list[i].getName();
String filetwo=filename.substring(0,2);
//out.print(filetwo);
if(filetwo.equals("~$")==false)
{
WordExtractor extractor = new WordExtractor();
String str = extractor.extractText(fins);String strOut=null;
    strOut=replaceString(str,"\n\t","<br>");
     strOut=replaceString(str,"\n","<br>");
 strOut=replaceString(str,"\r","<br>");
     strOut=replaceString(str," ","&nbsp;");
out.println(strOut); }
 fins.close();
  }
   %></body>
</html>