写一个php脚本 希望把几个表内的数据通过查询写入一个指定表内,执行次数量大,大致有10W左右,每次循环查询语句有5个,写入是批量写入,每有100条写一次运行中,无用的变量数据都有置空为null现在就是php脚本运行过程中,占用内存一直变大,直至耗尽内存
用浏览器页面运行发现占用内存竟然小于php-cli命令行执行现在问题:
1、php脚本运行过程中,释放内存,置空null后,还是需要gc来操作(这个是操作系统执行?),sql每次查询后,使用查询结果后,释放查询结果,目前内存依然是越来越大.2、php命令行执行要比页面执行脚本耗内存吗
public function makeIndustryCacheByDayCli() { ini_set("memory_limit","-1");
ignore_user_abort(true);
set_time_limit(0);
ob_end_clean(); if ($_GET['argc'] > 0) {
//检测时间段是否包含已处理的时间
$m = M('CacheTime');
$start_time = $m->where('`type`=4')->order('`end_time` DESC')->getField('`end_time`'); echo "开始执行\n";
//ob_flush();
flush(); //命令行执行
$end_time = strtotime(date('Y-m-d', time())); //所有行业
$industry = $this->_getIndustryTop();
//所有来源站点
$sites = $this->_getSourceSite();
echo "时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)."开始执行\n";
//ob_flush();
flush();
Log::write("时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)." 开始执行>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n\n", 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log');
$server_m = new ServerModel();
$mm = M();
$log_num = 0;
//时间循环 按天
for ($t = $start_time; $t < $end_time; $t += 86400) {
$start_time_inner = $t;
$end_time_inner = $t+86400;
//插入时间段表
$cache_time_data['type'] = 4;
$cache_time_data['start_time'] = $start_time_inner;
$cache_time_data['end_time'] = $end_time_inner;
$cache_time_data['time_name'] = date('Y年m月d日', $start_time_inner);
$cache_time_data['status_industry'] = 1;
$m->add($cache_time_data); echo "时间:".date('Y-m-d', $start_time_inner)." 一天内的数据 开始生成\n";
//ob_flush();
flush(); $data = array();
$where_common = " pubtime_stamp >= ".$start_time_inner
." AND pubtime_stamp < ".$end_time_inner
." AND is_similar = 0 AND sentiment_status = 1"; //顶层行业循环
foreach ($industry as $v_industry) {
//缓存表名称
$cache_table_name = 'cache_industry_day_' . $v_industry['table_suffix'];
//评论数据来源数据库
$dbconfig = $server_m->getDataServerInfoById($v_industry['server_id']); $mm->db(1, $dbconfig);
$data_num = 0;
//所有具体行业
$industry_detail = $this->_getIndustry($v_industry['id'], 0);
//外层循环来源站点 内层循环行业 写入缓存表
foreach ($sites as $v_site) {
if ($v_site['id'] != '0' && $v_site['flag'] != '0') {
$where_site = " AND content_type = ".$v_site['flag']
." AND site_id = ".$v_site['id'];
} elseif ($v_site['id'] == '0' && $v_site['flag'] != '0') {
$where_site = " AND content_type = ".$v_site['flag'];
} else {
$where_site = '';
}
foreach ($industry_detail as $v_industry_detail) { $where_industry = " AND industry_level_".$v_industry_detail['level']
." = ".$v_industry_detail['id']; $where = $where_common . $where_site . $where_industry; $where_good = $where." AND sentiment_value > 0";
$where_neuter = $where." AND sentiment_value = 0";
$where_bad = $where." AND sentiment_value < 0"; $data[$data_num]['source_id'] = $v_site['id'];
$data[$data_num]['source_type'] = $v_site['flag'];
$data[$data_num]['industry_id'] = $v_industry_detail['id'];
$data[$data_num]['date_day'] = $start_time_inner;
$data[$data_num]['comment_total_num'] = $mm->table('industry_data_keywords_map')
->where($where)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_good_num'] = $mm->table('industry_data_keywords_map')
->where($where_good)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_neuter_num'] = $mm->table('industry_data_keywords_map')
->where($where_neuter)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_bad_num'] = $mm->table('industry_data_keywords_map')
->where($where_bad)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_score'] = $mm->table('industry_data_keywords_map')
->where($where)
->avg('sentiment_value'); //输出信息 记录日志
$log_num++;
$data_num++; echo $log_num .' 行业:'.$v_industry['name'].' 来源站点:'.$v_site['name'].' 具体行业:'. $v_industry_detail['name'].' 缓存数据生成'."\n";
//ob_flush();
flush(); Log::write($log_num .' 行业:'.$v_industry['name'].' 来源站点:'.$v_site['name'].' 具体行业:'. $v_industry_detail['name'].' 缓存数据生成', 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log'); //写入数据库
if ($data_num%4 == 0) {
$mm->table($cache_table_name)->addAll($data);
$data = null;
$data_num = 0;
}
}
}
if ($data) {
$mm->table($cache_table_name)->addAll($data);
$data = null;
}
}
echo "时间:".date('Y-m-d', $start_time_inner)." 一天内的数据 开始结束\n";
//ob_flush();
flush();
} Log::write("时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)." 一共{$log_num}条数据生成结束!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n", 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log'); echo "一共{$log_num}条缓存数据生成!\n"; }
}
用浏览器页面运行发现占用内存竟然小于php-cli命令行执行现在问题:
1、php脚本运行过程中,释放内存,置空null后,还是需要gc来操作(这个是操作系统执行?),sql每次查询后,使用查询结果后,释放查询结果,目前内存依然是越来越大.2、php命令行执行要比页面执行脚本耗内存吗
public function makeIndustryCacheByDayCli() { ini_set("memory_limit","-1");
ignore_user_abort(true);
set_time_limit(0);
ob_end_clean(); if ($_GET['argc'] > 0) {
//检测时间段是否包含已处理的时间
$m = M('CacheTime');
$start_time = $m->where('`type`=4')->order('`end_time` DESC')->getField('`end_time`'); echo "开始执行\n";
//ob_flush();
flush(); //命令行执行
$end_time = strtotime(date('Y-m-d', time())); //所有行业
$industry = $this->_getIndustryTop();
//所有来源站点
$sites = $this->_getSourceSite();
echo "时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)."开始执行\n";
//ob_flush();
flush();
Log::write("时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)." 开始执行>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n\n", 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log');
$server_m = new ServerModel();
$mm = M();
$log_num = 0;
//时间循环 按天
for ($t = $start_time; $t < $end_time; $t += 86400) {
$start_time_inner = $t;
$end_time_inner = $t+86400;
//插入时间段表
$cache_time_data['type'] = 4;
$cache_time_data['start_time'] = $start_time_inner;
$cache_time_data['end_time'] = $end_time_inner;
$cache_time_data['time_name'] = date('Y年m月d日', $start_time_inner);
$cache_time_data['status_industry'] = 1;
$m->add($cache_time_data); echo "时间:".date('Y-m-d', $start_time_inner)." 一天内的数据 开始生成\n";
//ob_flush();
flush(); $data = array();
$where_common = " pubtime_stamp >= ".$start_time_inner
." AND pubtime_stamp < ".$end_time_inner
." AND is_similar = 0 AND sentiment_status = 1"; //顶层行业循环
foreach ($industry as $v_industry) {
//缓存表名称
$cache_table_name = 'cache_industry_day_' . $v_industry['table_suffix'];
//评论数据来源数据库
$dbconfig = $server_m->getDataServerInfoById($v_industry['server_id']); $mm->db(1, $dbconfig);
$data_num = 0;
//所有具体行业
$industry_detail = $this->_getIndustry($v_industry['id'], 0);
//外层循环来源站点 内层循环行业 写入缓存表
foreach ($sites as $v_site) {
if ($v_site['id'] != '0' && $v_site['flag'] != '0') {
$where_site = " AND content_type = ".$v_site['flag']
." AND site_id = ".$v_site['id'];
} elseif ($v_site['id'] == '0' && $v_site['flag'] != '0') {
$where_site = " AND content_type = ".$v_site['flag'];
} else {
$where_site = '';
}
foreach ($industry_detail as $v_industry_detail) { $where_industry = " AND industry_level_".$v_industry_detail['level']
." = ".$v_industry_detail['id']; $where = $where_common . $where_site . $where_industry; $where_good = $where." AND sentiment_value > 0";
$where_neuter = $where." AND sentiment_value = 0";
$where_bad = $where." AND sentiment_value < 0"; $data[$data_num]['source_id'] = $v_site['id'];
$data[$data_num]['source_type'] = $v_site['flag'];
$data[$data_num]['industry_id'] = $v_industry_detail['id'];
$data[$data_num]['date_day'] = $start_time_inner;
$data[$data_num]['comment_total_num'] = $mm->table('industry_data_keywords_map')
->where($where)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_good_num'] = $mm->table('industry_data_keywords_map')
->where($where_good)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_neuter_num'] = $mm->table('industry_data_keywords_map')
->where($where_neuter)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_bad_num'] = $mm->table('industry_data_keywords_map')
->where($where_bad)
->count()/$v_industry_detail['keyword_num'];
$data[$data_num]['comment_score'] = $mm->table('industry_data_keywords_map')
->where($where)
->avg('sentiment_value'); //输出信息 记录日志
$log_num++;
$data_num++; echo $log_num .' 行业:'.$v_industry['name'].' 来源站点:'.$v_site['name'].' 具体行业:'. $v_industry_detail['name'].' 缓存数据生成'."\n";
//ob_flush();
flush(); Log::write($log_num .' 行业:'.$v_industry['name'].' 来源站点:'.$v_site['name'].' 具体行业:'. $v_industry_detail['name'].' 缓存数据生成', 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log'); //写入数据库
if ($data_num%4 == 0) {
$mm->table($cache_table_name)->addAll($data);
$data = null;
$data_num = 0;
}
}
}
if ($data) {
$mm->table($cache_table_name)->addAll($data);
$data = null;
}
}
echo "时间:".date('Y-m-d', $start_time_inner)." 一天内的数据 开始结束\n";
//ob_flush();
flush();
} Log::write("时间:".date('Y-m-d', $start_time)."-".date('Y-m-d', $end_time)." 一共{$log_num}条数据生成结束!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n", 'industryByDay缓存日志', 3, LOG_PATH.date('y_m_d').'.industryByDay.log'); echo "一共{$log_num}条缓存数据生成!\n"; }
}
但这也不是重点,很奇怪为何越来越大
做一些必要的unset吧
另外unset和置空为null 效果是否一致
谢谢了~.
后来每次进循环前用了unset,改善了,但也是两三年前的事了,现在版本应该好多了吧?
另外,一些频繁使用的计算不妨写成function,比起全局计算会少用很多内存的
谢谢回答,现在用的PHP 5.2.6-1+lenny16 with Suhosin-Patch 0.9.6.2 (cli) (built: Feb 3 2012 08:19:55) 准备全局全写函数,内部变量全部循环后删除,看看效果