我给出读取word文件的代码,保存到txt你自己写一下:
<%@ page contentType="text/html;charset=gb2312" import="java.io.*,org.textmining.text.extraction.WordExtractor,java.text.*,java.util.*,java.lang.*"%>
<%@ include file="global.jsp" %>
<%@ include file="TaxConn.jsp" %>
<html>
<head >
<title>文件测试</title>
<META http-equiv=Content-Type content="text/html; charset=GBK">
</head>
<body>
<%
//取得目录,注意路径格式\\
String path="d:\\test";
File d = new File(path);
File list[] = d.listFiles();
String url;
//out.println("在目录"+path+"下的文件有:<ul>");
for(int i =0;i<list.length;i++){
//url=URLEncode(list[i].getName());
// out.println("<a href='http://localhost:8080/note/"+d+"/"+url+"'>"+list[i].getName()+"<br>");
File file=new File(path+"\\"+list[i].getName());
FileInputStream fins=new FileInputStream(file);
String filename=list[i].getName();
String filetwo=filename.substring(0,2);
//out.print(filetwo);
if(filetwo.equals("~$")==false)
{
WordExtractor extractor = new WordExtractor();
String str = extractor.extractText(fins);String strOut=null;
strOut=replaceString(str,"\n\t","<br>");
strOut=replaceString(str,"\n","<br>");
strOut=replaceString(str,"\r","<br>");
strOut=replaceString(str," "," ");
out.println(strOut); }
fins.close();
}
%></body>
</html>
<%@ page contentType="text/html;charset=gb2312" import="java.io.*,org.textmining.text.extraction.WordExtractor,java.text.*,java.util.*,java.lang.*"%>
<%@ include file="global.jsp" %>
<%@ include file="TaxConn.jsp" %>
<html>
<head >
<title>文件测试</title>
<META http-equiv=Content-Type content="text/html; charset=GBK">
</head>
<body>
<%
//取得目录,注意路径格式\\
String path="d:\\test";
File d = new File(path);
File list[] = d.listFiles();
String url;
//out.println("在目录"+path+"下的文件有:<ul>");
for(int i =0;i<list.length;i++){
//url=URLEncode(list[i].getName());
// out.println("<a href='http://localhost:8080/note/"+d+"/"+url+"'>"+list[i].getName()+"<br>");
File file=new File(path+"\\"+list[i].getName());
FileInputStream fins=new FileInputStream(file);
String filename=list[i].getName();
String filetwo=filename.substring(0,2);
//out.print(filetwo);
if(filetwo.equals("~$")==false)
{
WordExtractor extractor = new WordExtractor();
String str = extractor.extractText(fins);String strOut=null;
strOut=replaceString(str,"\n\t","<br>");
strOut=replaceString(str,"\n","<br>");
strOut=replaceString(str,"\r","<br>");
strOut=replaceString(str," "," ");
out.println(strOut); }
fins.close();
}
%></body>
</html>
读取word的我也会··主要就是不知道如何保存为txt··
//作为txt格式保存到临时文件
Dispatch.invoke(doc,"SaveAs", Dispatch.Method, new Object[]{tpFile,new Variant(2)}, new int[1]);