这是java文件
public List<String[][]> getWordValues(String file){
List<String[][]>word = new ArrayList<String[][]>();
try {
List<String[][]> wordValue = new ArrayList<String[][]>();
//表格数据
POIFSFileSystem pfs = new POIFSFileSystem(new FileInputStream(file));
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文档的读取范围
//迭代文档中的表格
TableIterator it = new TableIterator(range);
while (it.hasNext()) {
Table tb = (Table) it.next();
String[][] tables = null;
if(tb.numRows() > 0) tables = new String[tb.numRows()][tb.getRow(0).numCells()];
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text();
//System.out.print()+"\t"); //输出单元格数据
if( s.indexOf("") < 1) {
tables[i][j] = " ";
continue;
}
tables[i][j] = s.substring(0, s.indexOf(""));
}
// System.out.print(tables[i][j]);
}
// System.out.println();//没一行完后换行
}
// System.out.println();//第一个表格完后换一行
wordValue.add(tables);
}
List<String> wordTest = new AnalyticWord().getWordText(file);
for (int i = 0,k=0; i < wordTest.size(); i++) {
if("tables".equals(wordTest.get(i))){
word.add(wordValue.get(k++));
}else{
word.add(new String[][]{{wordTest.get(i)}});
}
}
} catch (Exception e) {
e.printStackTrace();
new Exception("无法从该Mocriosoft Word文档中提取内容").printStackTrace();
word = null;
}
return word;
}
public List<String> getWordText(String file){
List<String> list = new ArrayList<String>();
try {
//表格外的数据
InputStream is = new FileInputStream(file);
WordExtractor ex = new WordExtractor(is);
for (String string : ex.getText().toString().split("\n")) {//截取每行数据
if(string.indexOf("") > 0 ){
list.add("tables");
if(string.lastIndexOf("") > 1){ //表格下的第一行数据此数据没有换行的但紧挨着表格
list.add("\n"+string.substring(string.lastIndexOf("")+2 , string.length()));//输出紧挨着表格的数据
}
}else{
list.add(string);//杈撳嚭琛ㄦ牸澶栫殑鏁版嵁
}
}
} catch (Exception e) {
e.printStackTrace();
new Exception("无法从该Mocriosoft Word文档中提取内容").printStackTrace();
list = null;
}
return list;
}这是JSP页面
<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%>
<html>
<head>
<title>读取Word文档</title>
</head>
<script type="text/javascript" src="dwr/engine.js"></script>
<script type="text/javascript" src="dwr/interface/WordValues.js"></script>
<head>
<script type="text/javascript">
var file = "F://ceshi/word.doc";
window.onload = getWordValues();
function getWordValues(){
var word = "";
WordValues.getWordValues(file,function(listWordVlues){
for ( var int = 0; int < listWordVlues.length; int++) {
if(listWordVlues[int].length > 1){
word = word + "<Table border = '2' bordercolor='#6600FF' border = '2' style='border-color: fuchsia;'>";
for ( var i = 0; i < listWordVlues[int].length; i++) {
word = word + "<Tr>";
for ( var j = 0; j < listWordVlues[int][i].length; j++) {
word = word + "<td width='50px'>"+(listWordVlues[int][i][j] == " " ? " " : listWordVlues[int][i][j] )+"</Td>";
}
word = word + "</Tr>";
}
word = word + "</Table> ";
}else{
word = word + listWordVlues[int][0][0] + "<br/>";
}
}
document.getElementById("word").innerHTML = word;
});
}
</script>
</head>
<body>
<form id="word"></form>
</body>
</html>
只要WORD文档里面有中文就显示不出来
请高手解决下
public List<String[][]> getWordValues(String file){
List<String[][]>word = new ArrayList<String[][]>();
try {
List<String[][]> wordValue = new ArrayList<String[][]>();
//表格数据
POIFSFileSystem pfs = new POIFSFileSystem(new FileInputStream(file));
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文档的读取范围
//迭代文档中的表格
TableIterator it = new TableIterator(range);
while (it.hasNext()) {
Table tb = (Table) it.next();
String[][] tables = null;
if(tb.numRows() > 0) tables = new String[tb.numRows()][tb.getRow(0).numCells()];
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text();
//System.out.print()+"\t"); //输出单元格数据
if( s.indexOf("") < 1) {
tables[i][j] = " ";
continue;
}
tables[i][j] = s.substring(0, s.indexOf(""));
}
// System.out.print(tables[i][j]);
}
// System.out.println();//没一行完后换行
}
// System.out.println();//第一个表格完后换一行
wordValue.add(tables);
}
List<String> wordTest = new AnalyticWord().getWordText(file);
for (int i = 0,k=0; i < wordTest.size(); i++) {
if("tables".equals(wordTest.get(i))){
word.add(wordValue.get(k++));
}else{
word.add(new String[][]{{wordTest.get(i)}});
}
}
} catch (Exception e) {
e.printStackTrace();
new Exception("无法从该Mocriosoft Word文档中提取内容").printStackTrace();
word = null;
}
return word;
}
public List<String> getWordText(String file){
List<String> list = new ArrayList<String>();
try {
//表格外的数据
InputStream is = new FileInputStream(file);
WordExtractor ex = new WordExtractor(is);
for (String string : ex.getText().toString().split("\n")) {//截取每行数据
if(string.indexOf("") > 0 ){
list.add("tables");
if(string.lastIndexOf("") > 1){ //表格下的第一行数据此数据没有换行的但紧挨着表格
list.add("\n"+string.substring(string.lastIndexOf("")+2 , string.length()));//输出紧挨着表格的数据
}
}else{
list.add(string);//杈撳嚭琛ㄦ牸澶栫殑鏁版嵁
}
}
} catch (Exception e) {
e.printStackTrace();
new Exception("无法从该Mocriosoft Word文档中提取内容").printStackTrace();
list = null;
}
return list;
}这是JSP页面
<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%>
<html>
<head>
<title>读取Word文档</title>
</head>
<script type="text/javascript" src="dwr/engine.js"></script>
<script type="text/javascript" src="dwr/interface/WordValues.js"></script>
<head>
<script type="text/javascript">
var file = "F://ceshi/word.doc";
window.onload = getWordValues();
function getWordValues(){
var word = "";
WordValues.getWordValues(file,function(listWordVlues){
for ( var int = 0; int < listWordVlues.length; int++) {
if(listWordVlues[int].length > 1){
word = word + "<Table border = '2' bordercolor='#6600FF' border = '2' style='border-color: fuchsia;'>";
for ( var i = 0; i < listWordVlues[int].length; i++) {
word = word + "<Tr>";
for ( var j = 0; j < listWordVlues[int][i].length; j++) {
word = word + "<td width='50px'>"+(listWordVlues[int][i][j] == " " ? " " : listWordVlues[int][i][j] )+"</Td>";
}
word = word + "</Tr>";
}
word = word + "</Table> ";
}else{
word = word + listWordVlues[int][0][0] + "<br/>";
}
}
document.getElementById("word").innerHTML = word;
});
}
</script>
</head>
<body>
<form id="word"></form>
</body>
</html>
只要WORD文档里面有中文就显示不出来
请高手解决下
当为中文的时候,s 输出是什么?如果报错,报什么错,贴出来。
没有报错
就是到JSp 页面上
就显示不出来了
到页面上
报错内容
无法获取属性"length" 的值;对象为NULL或未定义