linux下 antiword获取word 文件问题 php读取word总是出问题 要不就是2003和2007之间不能同时兼容1.我想用antiword获取word原格式(不被格式化为文本格式)2.如何解决antiword兼容2003或2007的问题3.诸位还有别的好办法解决读取word格式方法吗 我使用com总是报错 或者 pear的相关方法也行 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 /** * check system operation win or linux * * @param string $file contain file path and file name * @return file content */function CheckSystemOS($file = '') { $content = ""; // $type = substr ( $file, strrpos ( $file, '.' ) + 1 ); $type = pathinfo ( $file, PATHINFO_EXTENSION ); global $WIN_ANTIWORD_PATH, $WIN_XPDF_PATH; // global $UNIX_ANTIWORD_PATH, $UNIX_XPDF_PATH; if (strtoupper ( substr ( PHP_OS, 0, 3 ) ) === 'WIN') { //this is a server using windows switch (strtolower ( $type )) { case 'doc' : $content = shell_exec ( "c:\\antiword\\antiword -f $file" ); break; case 'docx' : $content = parseWord ( $file ); break; case 'pdf' : $content = shell_exec ( "c:\\xpdf\\pdftotext -" ); break; case 'zip' : $content = ReadZIPFile ( $file ); break; case 'txt' : $content = file_get_contents ( $file ); break; } } else { //this is a server not using windows //$ch = shell_exec ( "ls /usr/local/bin/antiword -l" ); //print_r ( "<pre>$ch</pre>" ); //exit (); switch (strtolower ( $type )) { case 'doc' : $content = shell_exec ( "/usr/local/bin/antiword -f $file" ); break; case 'docx' : $content = parseWord ( $file ); break; case 'pdf' : $content = shell_exec ( "/usr/bin/xpdf/pdftotext $file -" ); break; case 'zip' : $content = ReadZIPFile ( $file ); break; case 'txt' : $content = file_get_contents ( $file ); break; } } if (file_exists ( $file )) { @unlink ( $file ); } return $content;} /*****************************************************************This approach uses detection of NUL (chr(00)) and end line (chr(13))to decide where the text is:- divide the file contents up by chr(13)- reject any slices containing a NUL- stitch the rest together again- clean up with a regular expression *****************************************************************/function parseWord($userDoc) { $fileHandle = fopen ( $userDoc, "r" ); $line = @fread ( $fileHandle, filesize ( $userDoc ) ); $lines = explode ( chr ( 0x0D ), $line ); $outtext = ""; foreach ( $lines as $thisline ) { $pos = strpos ( $thisline, chr ( 0x00 ) ); if (($pos !== FALSE) || (strlen ( $thisline ) == 0)) { } else { $outtext .= $thisline . " "; } } // $outtext = preg_replace ( "/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/", "", $outtext ); return $outtext;} 不同表单的值怎么才能一同得到? 模拟搜狐邮箱能登陆成功了,不能跳到通讯录页面,问题出在哪? 首页菜单设计 请教一SQL语句 我的PHP博客 要实现四级分类,有点问题 我拿出我的112分中的100分来问一个问题.希望大家能够帮忙. PHP能否直接打开网站上的HTML文件,进行编辑后再保存覆盖原来的文件 关于静态页面的问题 ThinkPHP框架下model里面$this->query()与M()->query()有什么区别 【求助】数组问题 php如何判断第一个执行某个文件
* check system operation win or linux
*
* @param string $file contain file path and file name
* @return file content
*/
function CheckSystemOS($file = '') {
$content = "";
// $type = substr ( $file, strrpos ( $file, '.' ) + 1 );
$type = pathinfo ( $file, PATHINFO_EXTENSION );
global $WIN_ANTIWORD_PATH, $WIN_XPDF_PATH;
// global $UNIX_ANTIWORD_PATH, $UNIX_XPDF_PATH;
if (strtoupper ( substr ( PHP_OS, 0, 3 ) ) === 'WIN') { //this is a server using windows
switch (strtolower ( $type )) {
case 'doc' :
$content = shell_exec ( "c:\\antiword\\antiword -f $file" );
break;
case 'docx' :
$content = parseWord ( $file );
break;
case 'pdf' :
$content = shell_exec ( "c:\\xpdf\\pdftotext -" );
break;
case 'zip' :
$content = ReadZIPFile ( $file );
break;
case 'txt' :
$content = file_get_contents ( $file );
break;
}
} else { //this is a server not using windows
//$ch = shell_exec ( "ls /usr/local/bin/antiword -l" );
//print_r ( "<pre>$ch</pre>" );
//exit ();
switch (strtolower ( $type )) {
case 'doc' :
$content = shell_exec ( "/usr/local/bin/antiword -f $file" );
break;
case 'docx' :
$content = parseWord ( $file );
break;
case 'pdf' :
$content = shell_exec ( "/usr/bin/xpdf/pdftotext $file -" );
break;
case 'zip' :
$content = ReadZIPFile ( $file );
break;
case 'txt' :
$content = file_get_contents ( $file );
break;
}
}
if (file_exists ( $file )) {
@unlink ( $file );
}
return $content;
}
This approach uses detection of NUL (chr(00)) and end line (chr(13))
to decide where the text is:
- divide the file contents up by chr(13)
- reject any slices containing a NUL
- stitch the rest together again
- clean up with a regular expression
*****************************************************************/
function parseWord($userDoc) {
$fileHandle = fopen ( $userDoc, "r" );
$line = @fread ( $fileHandle, filesize ( $userDoc ) );
$lines = explode ( chr ( 0x0D ), $line );
$outtext = "";
foreach ( $lines as $thisline ) {
$pos = strpos ( $thisline, chr ( 0x00 ) );
if (($pos !== FALSE) || (strlen ( $thisline ) == 0)) {
} else {
$outtext .= $thisline . " ";
}
}
// $outtext = preg_replace ( "/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/", "", $outtext );
return $outtext;
}