以下是采集某个网个数据的程序,如果谁能看懂某行代码地话,请帮忙解释一下,另外,如果有更合理的方式地话,就更好了.这个程序采集的原理其实跟网络蜘蛛差不多,只不过我是针对某个具体的网站.我采集的主要是某个游戏下某个服务器的游戏币价格与库存等信息.这个程序正确.
<?php
//包含共用函数库
include_once('Gather_Base.php');final class cherry extends Gather_Base{
private $homepage='http://***********/pctop.html';//网站名被我隐去了.
private $classname='cherry';
private $encoding='shift-jis'; function __construct($n){
//调用父类的构造方法,增加两个参数,
//一个是类名,一个是编码
parent::Gather_Base($this->classname,$this->encoding,$n);
} //得到数据
function getData(){
$games=$this->getGames($this->homepage);
if($games===false)return false; //每个游戏处理
foreach($games as $name=>$http){
$httpSell=$http;
$this->showGame($name);
$this->log('Game',$name); $docSell=$this->get($httpSell);
if($docSell===false)continue; $price=$this->getSellPrice($docSell);
if($price!==false)
$this->areasSell[$name]=$price; $httpBuy=$this->getHttp($docSell);
if(!$httpBuy)continue; $docBuy=$this->get($httpBuy);
if($docBuy===false)continue; $price=$this->getBuyPrice($docBuy);
if($price!==false)
$this->areasBuy[$name]=$price;
} } function getHttp($doc){
$reg='/<div[^>]*><a\s*href\="([^"]*)"[^>]*><img\ssrc\="kaitori(?:\d*)+\.gif"[^>]*><\/a><\/div>/i';
$matchs=$this->pregMatch($reg,$doc,true);
if(!$matchs)return false;
return 'http://cherry-rmt.xux.jp/'.$matchs[1];
} function getSellPrice($doc){
$thisGame=array(); $tbody=$this->getMiddle($doc,'<tbody>','</tbody>');
$trs=$this->getMiddles($tbody,'<tr>','</tr>');
$headers=$this->pregMatchAll('/<th[^>]*><font[^>]*>([^<]*)<\/font><\/th>/i',$trs[0]); //dump($headers);exit;
for($i=1;$i<count($trs);$i++){
$tr=$trs[$i]; $name=$this->getMiddle($tr,'alt="','"',true);
if(!$name)
$name=$this->getMiddle($tr,'src="','.',true);
if(!$name)continue; $this->showArea($name,'sell'); $tds=$this->pregMatchAll('/<td[^>]*><font[^>]*>(\d*)[^<]*<\/font><\/td>/i',$tr); $thisGame[$name]=array(
'areaName'=>$name,
'basePrice'=>$tds[0][1],
'stock'=>$tds[count($tds)-1][1],
'favor'=>array()
); for($j=1;$j<count($headers)-1;$j++){
$favorStock=intval($headers[$j][1]);
$favorPrice=$tds[$j -1][1];
$thisGame[$name]['favor'][$favorStock]=$favorPrice;
}
}
return $thisGame;
} function getBuyPrice($doc){
$thisGame=array(); $tbody=$this->getMiddle($doc,'<tbody>','</tbody>');
$trs=$this->getMiddles($tbody,'<tr>','</tr>'); for($i=1;$i<count($trs);$i++){
$tr=$trs[$i];
$name=$this->getMiddle($tr,'alt="','"',true);
if(!$name)
$name=$this->getMiddle($tr,'src="','.',true);
if(!$name)continue;
$this->showArea($name,'buy'); $tds=$this->pregMatch('/<td[^>]*><font[^>]*>(\d*)[^<]*<\/font><\/td>/i',$tr); $thisGame[$name]=array(
'areaName'=>$name,
'basePrice'=>0,
'stock'=>intval($tds[1])
);
}
//dump($thisGame);
return $thisGame;
}
<?php
//包含共用函数库
include_once('Gather_Base.php');final class cherry extends Gather_Base{
private $homepage='http://***********/pctop.html';//网站名被我隐去了.
private $classname='cherry';
private $encoding='shift-jis'; function __construct($n){
//调用父类的构造方法,增加两个参数,
//一个是类名,一个是编码
parent::Gather_Base($this->classname,$this->encoding,$n);
} //得到数据
function getData(){
$games=$this->getGames($this->homepage);
if($games===false)return false; //每个游戏处理
foreach($games as $name=>$http){
$httpSell=$http;
$this->showGame($name);
$this->log('Game',$name); $docSell=$this->get($httpSell);
if($docSell===false)continue; $price=$this->getSellPrice($docSell);
if($price!==false)
$this->areasSell[$name]=$price; $httpBuy=$this->getHttp($docSell);
if(!$httpBuy)continue; $docBuy=$this->get($httpBuy);
if($docBuy===false)continue; $price=$this->getBuyPrice($docBuy);
if($price!==false)
$this->areasBuy[$name]=$price;
} } function getHttp($doc){
$reg='/<div[^>]*><a\s*href\="([^"]*)"[^>]*><img\ssrc\="kaitori(?:\d*)+\.gif"[^>]*><\/a><\/div>/i';
$matchs=$this->pregMatch($reg,$doc,true);
if(!$matchs)return false;
return 'http://cherry-rmt.xux.jp/'.$matchs[1];
} function getSellPrice($doc){
$thisGame=array(); $tbody=$this->getMiddle($doc,'<tbody>','</tbody>');
$trs=$this->getMiddles($tbody,'<tr>','</tr>');
$headers=$this->pregMatchAll('/<th[^>]*><font[^>]*>([^<]*)<\/font><\/th>/i',$trs[0]); //dump($headers);exit;
for($i=1;$i<count($trs);$i++){
$tr=$trs[$i]; $name=$this->getMiddle($tr,'alt="','"',true);
if(!$name)
$name=$this->getMiddle($tr,'src="','.',true);
if(!$name)continue; $this->showArea($name,'sell'); $tds=$this->pregMatchAll('/<td[^>]*><font[^>]*>(\d*)[^<]*<\/font><\/td>/i',$tr); $thisGame[$name]=array(
'areaName'=>$name,
'basePrice'=>$tds[0][1],
'stock'=>$tds[count($tds)-1][1],
'favor'=>array()
); for($j=1;$j<count($headers)-1;$j++){
$favorStock=intval($headers[$j][1]);
$favorPrice=$tds[$j -1][1];
$thisGame[$name]['favor'][$favorStock]=$favorPrice;
}
}
return $thisGame;
} function getBuyPrice($doc){
$thisGame=array(); $tbody=$this->getMiddle($doc,'<tbody>','</tbody>');
$trs=$this->getMiddles($tbody,'<tr>','</tr>'); for($i=1;$i<count($trs);$i++){
$tr=$trs[$i];
$name=$this->getMiddle($tr,'alt="','"',true);
if(!$name)
$name=$this->getMiddle($tr,'src="','.',true);
if(!$name)continue;
$this->showArea($name,'buy'); $tds=$this->pregMatch('/<td[^>]*><font[^>]*>(\d*)[^<]*<\/font><\/td>/i',$tr); $thisGame[$name]=array(
'areaName'=>$name,
'basePrice'=>0,
'stock'=>intval($tds[1])
);
}
//dump($thisGame);
return $thisGame;
}
解决方案 »
免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货