///如何用java判断word文档内容与格式???///

如何用java判断word文档内容与格式急需～～非常感谢高手帮忙～！

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

内容可以判断  格式我还不知道怎么做
package testh;import java.io.*;
import org.textmining.text.extraction.WordExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFCell;public class testcls { public static void readdoc(String paths, String savepaths)
{
File d = new File(paths);
//取得当前文件夹下所有文件和目录的列表
File lists[] = d.listFiles();
String pathss = new String(""); //对当前目录下面所有文件进行检索
for(int i = 0; i < lists.length; i ++)
{
if(lists[i].isFile())
{
String filename = lists[i].getName();
String filetype = new String("");
//取得文件类型
filetype = filename.substring((filename.length() - 3), filename.length());

//判断是否为doc文件
if(filetype.equals("doc"))
{
System.out.println("当前正在检索....");
//打印当前目录路径
System.out.println(paths);
//打印doc文件名
String fname=filename.substring(0, (filename.length() - 4));
System.out.println(fname);
try
{
//打印文件内容
FileInputStream in = new FileInputStream(paths+fname+".doc");
WordExtractor extractor = new WordExtractor();
String str = extractor.extractText(in);
//System.out.println("the result length is"+str.length());
System.out.println(str);
if(str.indexOf("\n")!=0)
{
str=str.replace("\n","\n<br>");
System.out.print("\na111111111111\n");
}
System.out.print(str);

BufferedReader bufread;
    //指定文件路径和名称
    String path = savepaths+fname+".html";
    File outfilename = new File(path);
    String readStr ="";
    /** *//**
     * 创建文本文件.
     * @throws IOException
     *
     */

    if (!outfilename.exists()) {
         outfilename.createNewFile();
         System.err.println(outfilename + "已创建！");

    }
    else
    {
     System.out.print("文件已存在，放弃创建，请处理存在文件后再运行...\n");
     continue;
    }


    String filein ="<META http-equiv=content-type content='text/html; charset=UTF-8'>\r\n"+str;
        RandomAccessFile mm = null;
        try {

            FileOutputStream out=new FileOutputStream(paths+fname+".html",true);

                StringBuffer sb=new StringBuffer();
                sb.append(filein);
                out.write(sb.toString().getBytes("utf-8"));



        } catch (IOException e1) {
            // TODO 自动生成 catch 块
            e1.printStackTrace();
        } finally {
            if (mm != null) {
                try {
                    mm.close();
                } catch (IOException e2) {
                    // TODO 自动生成 catch 块
                    e2.printStackTrace();
                }
            }
        }

}
catch(Exception e)
{
e.printStackTrace();
}
}

}

}

}
public static void main(String args[]) throws Exception
{
String paths = new String("c:\\a\\");
String savepaths = new String ("c:\\a\\");
readdoc(paths,savepaths);

}

}
以上是我对内容的   格式我只想到换行  Orz...
我也在找格式问题...