写一个php脚本 希望把几个表内的数据通过查询写入一个指定表内,执行次数量大,大致有10W左右,每次循环查询语句有5个,写入是批量写入,每有100条写一次运行中,无用的变量数据都有置空为null现在就是php脚本运行过程中,占用内存一直变大,直至耗尽内存
用浏览器页面运行发现占用内存竟然小于php-cli命令行执行现在问题:
1、php脚本运行过程中,释放内存,置空null后,还是需要gc来操作(这个是操作系统执行?),sql每次查询后,使用查询结果后,释放查询结果,目前内存依然是越来越大.2、php命令行执行要比页面执行脚本耗内存吗
public function makeIndustryCacheByDayCli() {        ini_set("memory_limit","-1");
        ignore_user_abort(true);
        set_time_limit(0);
        ob_end_clean();        if ($_GET['argc'] > 0) {
            //检测时间段是否包含已处理的时间
            $m = M('CacheTime');
            $start_time = $m->where('`type`=4')->order('`end_time` DESC')->getField('`end_time`');            echo "开始执行\n";
            //ob_flush();
            flush();            //命令行执行
            $end_time = strtotime(date('Y-m-d', time()));            //所有行业
            $industry = $this->_getIndustryTop();
            //所有来源站点
            $sites = $this->_getSourceSite();
            echo "时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)."开始执行\n";
            //ob_flush();
            flush();
            Log::write("时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)." 开始执行>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n\n", 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log');
            $server_m = new ServerModel();
            $mm = M();
            $log_num = 0;
            //时间循环 按天
            for ($t = $start_time; $t < $end_time; $t += 86400) {
                $start_time_inner = $t;
                $end_time_inner = $t+86400;
                //插入时间段表
                $cache_time_data['type'] = 4;
                $cache_time_data['start_time'] = $start_time_inner;
                $cache_time_data['end_time'] = $end_time_inner;
                $cache_time_data['time_name'] = date('Y年m月d日', $start_time_inner);
                $cache_time_data['status_industry'] = 1;
                $m->add($cache_time_data);                echo "时间:".date('Y-m-d', $start_time_inner)." 一天内的数据 开始生成\n";
                //ob_flush();
                flush();                $data = array();
                $where_common = " pubtime_stamp >= ".$start_time_inner
                    ." AND pubtime_stamp < ".$end_time_inner
                    ." AND is_similar = 0 AND sentiment_status = 1";                //顶层行业循环
                foreach ($industry as $v_industry) {
                    //缓存表名称
                    $cache_table_name = 'cache_industry_day_' . $v_industry['table_suffix'];
                    //评论数据来源数据库
                    $dbconfig = $server_m->getDataServerInfoById($v_industry['server_id']);                    $mm->db(1, $dbconfig);
                    $data_num = 0;
                    //所有具体行业
                    $industry_detail = $this->_getIndustry($v_industry['id'], 0);
                    //外层循环来源站点 内层循环行业 写入缓存表
                    foreach ($sites as $v_site) {
                        if ($v_site['id'] != '0' && $v_site['flag'] != '0') {
                            $where_site = " AND content_type = ".$v_site['flag']
                                ." AND site_id = ".$v_site['id'];
                        } elseif ($v_site['id'] == '0' && $v_site['flag'] != '0') {
                            $where_site = " AND content_type = ".$v_site['flag'];
                        } else {
                            $where_site = '';
                        }
                        foreach ($industry_detail as $v_industry_detail) {                            $where_industry = " AND industry_level_".$v_industry_detail['level']
                                ." = ".$v_industry_detail['id'];                            $where = $where_common . $where_site . $where_industry;                            $where_good = $where." AND sentiment_value > 0";
                            $where_neuter = $where." AND sentiment_value = 0";
                            $where_bad = $where." AND sentiment_value < 0";                            $data[$data_num]['source_id'] = $v_site['id'];
                            $data[$data_num]['source_type'] = $v_site['flag'];
                            $data[$data_num]['industry_id'] = $v_industry_detail['id'];
                            $data[$data_num]['date_day'] = $start_time_inner;
                            $data[$data_num]['comment_total_num'] = $mm->table('industry_data_keywords_map')
                                    ->where($where)
                                    ->count()/$v_industry_detail['keyword_num'];
                            $data[$data_num]['comment_good_num'] = $mm->table('industry_data_keywords_map')
                                    ->where($where_good)
                                    ->count()/$v_industry_detail['keyword_num'];
                            $data[$data_num]['comment_neuter_num'] = $mm->table('industry_data_keywords_map')
                                    ->where($where_neuter)
                                    ->count()/$v_industry_detail['keyword_num'];
                            $data[$data_num]['comment_bad_num'] = $mm->table('industry_data_keywords_map')
                                    ->where($where_bad)
                                    ->count()/$v_industry_detail['keyword_num'];
                            $data[$data_num]['comment_score'] = $mm->table('industry_data_keywords_map')
                                    ->where($where)
                                    ->avg('sentiment_value');                            //输出信息 记录日志
                            $log_num++;
                            $data_num++;                            echo $log_num .' 行业:'.$v_industry['name'].' 来源站点:'.$v_site['name'].' 具体行业:'. $v_industry_detail['name'].' 缓存数据生成'."\n";
                            //ob_flush();
                            flush();                            Log::write($log_num .' 行业:'.$v_industry['name'].' 来源站点:'.$v_site['name'].' 具体行业:'. $v_industry_detail['name'].' 缓存数据生成', 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log');                            //写入数据库
                            if ($data_num%4 == 0) {
                                $mm->table($cache_table_name)->addAll($data);
                                $data = null;
                                $data_num = 0;
                            }
                        }
                    }
                    if ($data) {
                        $mm->table($cache_table_name)->addAll($data);
                        $data = null;
                    }
                }
                echo "时间:".date('Y-m-d', $start_time_inner)." 一天内的数据 开始结束\n";
                //ob_flush();
                flush();
            }            Log::write("时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)." 一共{$log_num}条数据生成结束!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n", 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log');            echo "一共{$log_num}条缓存数据生成!\n";        }
    }

解决方案 »

  1.   

    代码太长了,没心思看cli 没必要就不要echo了,记入log更好
    但这也不是重点,很奇怪为何越来越大
    做一些必要的unset吧
      

  2.   

    本来是unset的 现在用 $data = null;了 其他的一些$industry $sites 也没多大 同时,每次循环,重新赋值$a的话 ,$a 是否有必要unset
    另外unset和置空为null 效果是否一致
    谢谢了~.
      

  3.   

    cli 的话用新版本php会比较好,记忆中以前版本垃圾回收不太好,不过我也很久没玩php cli了以前旧版,遇到大的数组,多次循环每次重新赋值本以为可以冲掉,但发现内存还是提高
    后来每次进循环前用了unset,改善了,但也是两三年前的事了,现在版本应该好多了吧?
    另外,一些频繁使用的计算不妨写成function,比起全局计算会少用很多内存的
      

  4.   


    谢谢回答,现在用的PHP 5.2.6-1+lenny16 with Suhosin-Patch 0.9.6.2 (cli) (built: Feb  3 2012 08:19:55) 准备全局全写函数,内部变量全部循环后删除,看看效果