如何使用Java将Word文件转化成HTML，同时尽可能保持格式不变。

谢谢。就像docs。google。com还有writely实现的那样子

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

import com.jacob.com.*;
import com.jacob.activeX.*;
import java.io.*;//取得指定目录下面所有的doc文件名称
public class wordtohtml
{
//------------------------------------------------------------------------------
//方法原型: change(String paths)
//功能描述: 将指定目录下面所有的doc文件转化为HTML并存储在相同目录下
//输入参数: String
//输出参数: 无
//返回值: 无
//其它说明: 递归
//------------------------------------------------------------------------------
public static void change(String paths, String savepaths)
{

  File d = new File(paths);
  //取得当前文件夹下所有文件和目录的列表
  File lists[] = d.listFiles();
  String pathss = new String("");  //对当前目录下面所有文件进行检索
  for(int i = 0; i < lists.length; i ++)
  {
   if(lists[i].isFile())
   {
    String filename = lists[i].getName();
    String filetype = new String("");
    //取得文件类型
    filetype = filename.substring((filename.length() - 3), filename.length());

    //判断是否为doc文件
    if(filetype.equals("doc"))
    {
     System.out.println("当前正在转换......");
     //打印当前目录路径
     System.out.println(paths);
     //打印doc文件名
     System.out.println(filename.substring(0, (filename.length() - 4)));

     ActiveXComponent app = new ActiveXComponent("Word.Application");//启动word

     String docpath = paths + filename;
     String htmlpath = savepaths + filename.substring(0, (filename.length() - 4));

     String inFile = docpath;
    //要转换的word文件
     String tpFile = htmlpath;
    //HTML文件     boolean flag = false;

     try
     {
      app.setProperty("Visible", new Variant(false));
        //设置word不可见
      Object docs = app.getProperty("Documents").toDispatch();
      Object doc = Dispatch.invoke(docs,"Open", Dispatch.Method, new Object[]{inFile,new Variant(false), new Variant(true)}, new int[1]).toDispatch();
        //打开word文件
      Dispatch.invoke(doc,"SaveAs", Dispatch.Method, new Object[]{tpFile,new Variant(8)}, new int[1]);
        //作为html格式保存到临时文件
      Variant f = new Variant(false);
      Dispatch.call(doc, "Close", f);
      flag = true;
     }
     catch (Exception e)
     {
      e.printStackTrace();
     }
     finally
     {
      app.invoke("Quit", new Variant[] {});
     }
     System.out.println("转化完毕！");
    }
   }
   else
   {
    pathss = paths;
    //进入下一级目录
    pathss = pathss + lists[i].getName() + "";
    //递归遍历所有目录
    change(pathss, savepaths);
   }
  }

}
//------------------------------------------------------------------------------
//方法原型: main(String[] args)
//功能描述: main文件
//输入参数: 无
//输出参数: 无
//返回值: 无
//其它说明: 无
//------------------------------------------------------------------------------
public static void main(String[] args)
{

  String paths = new String("D:Work2004.10.8 est system est01word");
  String savepaths = new String ("D:Work2004.10.8 est system est01html");  change(paths, savepaths); }
}
package Inc;import java.io.*;
import org.textmining.text.extraction.WordExtractor;public class GetNotHtmlContent
{
public StringBuffer getDOC(String fileName)
{
StringBuffer str=new StringBuffer("");
try
{
    FileInputStream in = new FileInputStream(fileName);
    WordExtractor extractor = new WordExtractor();
    str.append(extractor.extractText(in));
}
catch(Exception e)
{
    e.printStackTrace();
}
return str;
}
}
import org.textmining.text.extraction.WordExtractor; 刚忘记说了...需要下这个包 tm-extractors-0.4.jar 不然上面那句就不能正确导入.
给我分吧.兄弟...

如何使用Java将Word文件转化成HTML， 同时尽可能保持格式不变。

解决方案 »

如何使用Java将Word文件转化成HTML，同时尽可能保持格式不变。