//下面注释掉的是提取所有文字的 FileInputStream in = new FileInputStream("F:/test.doc"); WordExtractor extractor = new WordExtractor(in);
String [] strArray = extractor.getParagraphText(); String str = extractor.getText(); for(int i = 0; i < strArray.length; ++i) System.out.println(strArray[i]);
System.out.println(str); //hwpfDocument是专门处理word的, 在poi中还有处理其他office文档的类 HWPFDocument doc = new HWPFDocument(new FileInputStream("F:/test.doc")); //看看此文档有多少个段落 Range range = doc.getRange(); int numP = range.numParagraphs(); System.out.println("number of Paragrah " + numP);
long start2 = System.currentTimeMillis(); //得到word的数据流 byte[] dataStream = doc.getDataStream();
System.out.println("size of dataStream " + dataStream.length);
java 跨平台并没有它吹嘘的那么好
有个问题请教一下,
你们在处理word的时候有涉及到表格吗?poi怎么操作word里面的表格信息?
---------------------------------------------
Quietly through .....
package org.LeeJ.util.word.poi;import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.*;
import org.apache.poi.hwpf.model.*;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;public class MsWordExtractorPC {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException{
//下面注释掉的是提取所有文字的
FileInputStream in = new FileInputStream("F:/test.doc");
WordExtractor extractor = new WordExtractor(in);
String [] strArray = extractor.getParagraphText();
String str = extractor.getText();
for(int i = 0; i < strArray.length; ++i)
System.out.println(strArray[i]);
System.out.println(str);
//hwpfDocument是专门处理word的, 在poi中还有处理其他office文档的类
HWPFDocument doc = new HWPFDocument(new FileInputStream("F:/test.doc"));
//看看此文档有多少个段落
Range range = doc.getRange();
int numP = range.numParagraphs();
System.out.println("number of Paragrah " + numP);
long start2 = System.currentTimeMillis();
//得到word的数据流
byte[] dataStream = doc.getDataStream();
System.out.println("size of dataStream " + dataStream.length);
int numChar = range.numCharacterRuns();
System.out.println("number of CharacterRun " + numChar);
PicturesTable pTable = new PicturesTable(dataStream);
for(int j = 0; j < numChar; ++j){
CharacterRun cRun = range.getCharacterRun(j); //看看有没有图片
boolean has = pTable.hasPicture(cRun);
System.out.println("hasPicture " + has);
if(has){
Picture zhou = pTable.extractPicture(cRun, true);
//目录你就自己设了,像保存什么的格式都可以
zhou.writeImageContent(new FileOutputStream("f:/"+ j +".bmp"));
System.out.println("extract Picture successfully! ");
}
}
}
}