我是想做一个提取网页信息的小功能程序,一次提取多个网页的内容放入到数据库中,一次提取了500个网页,但是只执行4次就终止了,打印结果也不变,只有循环次数少于4次的可以运行,我觉得这一定是我知识面上的一个打空白,希望有人可以帮忙解答,小弟现在正在学习PHP,希望有高手可以指点迷津,本人看过提问的智慧,不是那种无理取闹的人,真心希望获得指点,联系方式
mail:[email protected]
QQ:54003398好了,下面使问题的源代码:<?php
set_time_limit(0);
include_once 'DbObject.php';
function insert($id, $title, $content) {
$arrTemp = (explode ( " ", microtime () ));
$pubTime = $arrTemp [1];
/*$sql = @"INSERT INTO `s-save`
VALUES
('$id', '$title','$pubTime','3', 0, 0, 0, '0','1','php','0','1', '1', '1', '$content', '0', '0', 'blank', '0', '', '', '0', '', '0', '', 'Intranet', 'www.baidu.com', '') ";
*/
//$db = new DbObject();
//$db->RunNonExecuteQuery($sql);
}function utfEdge($str, $position) {
$temp_str = substr ( $str, $position, 1 );
$ascnum = Ord ( $temp_str );
if ($ascnum < 224) {
return utfEdge ( $str, $position + 1 );
} else {
return $position + 3;
}
}
function addNewPage($str1, $str2, $size = 10) {
if (strlen ( $str2 ) < $size) {
$str = $str1 . $str2;
return $str;
} else {
$position = utfEdge ( $str2, $size );
$tempStr1 = $str1 . substr ( $str2, 0, $position ) . "[newpage] ";
$tempStr2 = substr ( $str2, $position );
return addNewPage ( $tempStr1, $tempStr2 );
}
}function addSeparator($str, $size = 2) {
$position = utfEdge ( $str, $size );
return substr ( $str, 0, $position ) . "[separator]" . substr ( $str, $position );
}function abstractContent($url) {
$resultArr = "";
$contents = file_get_contents ( $url );
$titlePreg = "/<span class=\"newstitle\">(.*?)<\\/span>/is"; //title preg
preg_match_all ( $titlePreg, $contents, $resultArr );
$temp = $resultArr [1] [0];
$articleTitle = $temp;
$resultArr = "";
$strPreg = "/<div id=\"BookText\">(.*)<\\/div>\n<\\/div>/is"; //preg string
preg_match_all ( $strPreg, $contents, $resultArr );
$result = $resultArr [1] [0] . "</div>"; //get the whole contents
$pattern = "/(<div[^<]+<\\/div>)/is"; //the preg of the string whic need to replace
$replacement = ""; //what you want to replace
$result2 = preg_replace ( $pattern, $replacement, $result );
$articleContent = $result2; //substr($result2,0,160);
$article = array ();
$articleTitle = mb_convert_encoding ( $articleTitle, "UTF-8", "GBK" );
$articleContent = mb_convert_encoding ( $articleContent, "UTF-8", "GBK" );
$articleContent = addNewPage ( "", $articleContent );
$articleContent = addSeparator ( $articleContent );
print($articleTitle);
//insert ( $id, $articleTitle, $articleContent );
}function getIndex($aimIndex) {
$strPreg = "/<li><a href=\"(.*?)\" title/is";
$resultArr = array ();
$contents = file_get_contents ( $aimIndex );
preg_match_all ( $strPreg, $contents, $resultArr );
$arrUrl = $resultArr [1];
return $arrUrl;
}
?>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Insert title here</title>
</head>
<body>
<?php$baseUrl = "http://www.86zw.com/Html/Book/18/3727/";
$lastUrl = "Index.shtm";$aimIndex = $baseUrl . $lastUrl;
$arrUrl = getIndex ( $aimIndex );$baseId = 1;
foreach ( $arrUrl as $key => $value ) { if($key < 4)
{
$id = $baseId + $key;
$realUrl = $baseUrl . $value;
print ( "<br />" );print ( "key:" . $key );
print ( " value:" . $realUrl );
print ( "<br />" );
$myArticle = array();
abstractContent ( $realUrl );
}
//die("ds");
//}?> </body>
</html>数据库库连接的
<?php
class DbObject
{
var $mHostname;
var $mUsername;
var $mPassword;
var $mDatabase;
var $mConn="";
var $result;
function __construct()
{
$this->mHostname="localhost";
$this->mUsername="root";
$this->mPassword="root";
$this->mDatabase="test";
$this->Connect($this->mHostname,$this->mUsername,$this->mPassword,$this->mDatabase);
//echo "Hello ";
}
function Connect($host,$uid,$pwd,$database)
{
$this->mConn = mysql_connect($host,$uid,$pwd);
mysql_select_db($database);
return $this->mConn;
}
//Insert,Update,Delete
function RunNonExecuteQuery($sql)
{
$result = mysql_query($sql, $this->mConn) or die("executeNonSql error");
}
//Select
function RunExecuteQuery($sql)
{
$this->result = mysql_query($sql,$this->mConn) or die("executeSql error");
/*
while($record = mysql_fetch_array($result))
{
$records[] = $record;
mysql_free_result($result);
return $records;
}*/
return $this->result;
}
function RunQueryCommand($sql)
{
$result = mysql_query($sql,$this->mConn) or die("executeSql error");
$row;
$i=0;
while($record = mysql_fetch_array($result))
{
$row[$i] = $record;
$i++;
}
mysql_free_result($result);
return $row;
}
function numResults($result)
{
$numResults = $this->result->num_rows();
return $numResults;
}
function GetFormID($formName,$appID)
{
$row = $this->RunQueryCommand("select * from $formName where appID=$appID");
return $row[0]["formID"];
}
}
?>
希望大家能够帮忙,可以自己试一下,每次循环次数超过了一定次数,就不行了,我想知道为什么?还有怎么解决
mail:[email protected]
QQ:54003398好了,下面使问题的源代码:<?php
set_time_limit(0);
include_once 'DbObject.php';
function insert($id, $title, $content) {
$arrTemp = (explode ( " ", microtime () ));
$pubTime = $arrTemp [1];
/*$sql = @"INSERT INTO `s-save`
VALUES
('$id', '$title','$pubTime','3', 0, 0, 0, '0','1','php','0','1', '1', '1', '$content', '0', '0', 'blank', '0', '', '', '0', '', '0', '', 'Intranet', 'www.baidu.com', '') ";
*/
//$db = new DbObject();
//$db->RunNonExecuteQuery($sql);
}function utfEdge($str, $position) {
$temp_str = substr ( $str, $position, 1 );
$ascnum = Ord ( $temp_str );
if ($ascnum < 224) {
return utfEdge ( $str, $position + 1 );
} else {
return $position + 3;
}
}
function addNewPage($str1, $str2, $size = 10) {
if (strlen ( $str2 ) < $size) {
$str = $str1 . $str2;
return $str;
} else {
$position = utfEdge ( $str2, $size );
$tempStr1 = $str1 . substr ( $str2, 0, $position ) . "[newpage] ";
$tempStr2 = substr ( $str2, $position );
return addNewPage ( $tempStr1, $tempStr2 );
}
}function addSeparator($str, $size = 2) {
$position = utfEdge ( $str, $size );
return substr ( $str, 0, $position ) . "[separator]" . substr ( $str, $position );
}function abstractContent($url) {
$resultArr = "";
$contents = file_get_contents ( $url );
$titlePreg = "/<span class=\"newstitle\">(.*?)<\\/span>/is"; //title preg
preg_match_all ( $titlePreg, $contents, $resultArr );
$temp = $resultArr [1] [0];
$articleTitle = $temp;
$resultArr = "";
$strPreg = "/<div id=\"BookText\">(.*)<\\/div>\n<\\/div>/is"; //preg string
preg_match_all ( $strPreg, $contents, $resultArr );
$result = $resultArr [1] [0] . "</div>"; //get the whole contents
$pattern = "/(<div[^<]+<\\/div>)/is"; //the preg of the string whic need to replace
$replacement = ""; //what you want to replace
$result2 = preg_replace ( $pattern, $replacement, $result );
$articleContent = $result2; //substr($result2,0,160);
$article = array ();
$articleTitle = mb_convert_encoding ( $articleTitle, "UTF-8", "GBK" );
$articleContent = mb_convert_encoding ( $articleContent, "UTF-8", "GBK" );
$articleContent = addNewPage ( "", $articleContent );
$articleContent = addSeparator ( $articleContent );
print($articleTitle);
//insert ( $id, $articleTitle, $articleContent );
}function getIndex($aimIndex) {
$strPreg = "/<li><a href=\"(.*?)\" title/is";
$resultArr = array ();
$contents = file_get_contents ( $aimIndex );
preg_match_all ( $strPreg, $contents, $resultArr );
$arrUrl = $resultArr [1];
return $arrUrl;
}
?>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Insert title here</title>
</head>
<body>
<?php$baseUrl = "http://www.86zw.com/Html/Book/18/3727/";
$lastUrl = "Index.shtm";$aimIndex = $baseUrl . $lastUrl;
$arrUrl = getIndex ( $aimIndex );$baseId = 1;
foreach ( $arrUrl as $key => $value ) { if($key < 4)
{
$id = $baseId + $key;
$realUrl = $baseUrl . $value;
print ( "<br />" );print ( "key:" . $key );
print ( " value:" . $realUrl );
print ( "<br />" );
$myArticle = array();
abstractContent ( $realUrl );
}
//die("ds");
//}?> </body>
</html>数据库库连接的
<?php
class DbObject
{
var $mHostname;
var $mUsername;
var $mPassword;
var $mDatabase;
var $mConn="";
var $result;
function __construct()
{
$this->mHostname="localhost";
$this->mUsername="root";
$this->mPassword="root";
$this->mDatabase="test";
$this->Connect($this->mHostname,$this->mUsername,$this->mPassword,$this->mDatabase);
//echo "Hello ";
}
function Connect($host,$uid,$pwd,$database)
{
$this->mConn = mysql_connect($host,$uid,$pwd);
mysql_select_db($database);
return $this->mConn;
}
//Insert,Update,Delete
function RunNonExecuteQuery($sql)
{
$result = mysql_query($sql, $this->mConn) or die("executeNonSql error");
}
//Select
function RunExecuteQuery($sql)
{
$this->result = mysql_query($sql,$this->mConn) or die("executeSql error");
/*
while($record = mysql_fetch_array($result))
{
$records[] = $record;
mysql_free_result($result);
return $records;
}*/
return $this->result;
}
function RunQueryCommand($sql)
{
$result = mysql_query($sql,$this->mConn) or die("executeSql error");
$row;
$i=0;
while($record = mysql_fetch_array($result))
{
$row[$i] = $record;
$i++;
}
mysql_free_result($result);
return $row;
}
function numResults($result)
{
$numResults = $this->result->num_rows();
return $numResults;
}
function GetFormID($formName,$appID)
{
$row = $this->RunQueryCommand("select * from $formName where appID=$appID");
return $row[0]["formID"];
}
}
?>
希望大家能够帮忙,可以自己试一下,每次循环次数超过了一定次数,就不行了,我想知道为什么?还有怎么解决
不知道你指的少于4是什么?子分页?
if($key < 4)$key是arrUrl 数组的下标啊
print_r($arrUrl);一共490个下标呢
你if($key < 4)当然只循环4次了