我用poi读取word2003时,对于带修订痕迹的文件无法正常读出。现象是准时将已经删除的文字也同样读出了。请教各位高手帮忙,谢谢!
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;import org.apache.poi.hwpf.extractor.WordExtractor;
public class Test {
public static void main(String[] args) {
try {
InputStream is = new FileInputStream(new File("c:\\files\\newtest.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getTextFromPieces();
System.out.println(text2003);
} catch (Exception e) {
e.printStackTrace();
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;import org.apache.poi.hwpf.extractor.WordExtractor;
public class Test {
public static void main(String[] args) {
try {
InputStream is = new FileInputStream(new File("c:\\files\\newtest.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getTextFromPieces();
System.out.println(text2003);
} catch (Exception e) {
e.printStackTrace();
jxl读取excel也是一样、、