类成员函数cache机制抽象

经常会遇到这种情况，比如一个读文件内容的类，有个比较耗时的方法 getContent，假设这个方法对于固定参数返回的内容是固定的，比如：
class FileReader {
public function getContent($file) {
$content = // open -> read -> do some decode
return $content;
}
}
$r = new FileReader();
$r->readContent('a.txt');
$r->readContent('a.txt');
$r->readContent('a.txt');这样重复调用开销比较大的，然后就加了个cache：
class FileReader {
private $cache = array();
public function getContent($file) {
if($cache[$file]) {
return $cache[$file];
}
$cache[$file] = // open -> read -> do some decode
return $cache[$file];
}
}
这样好很多，问题是
1. 破坏了getContent的功能单一性
2. 可读性
3. 还加了个类成员变量$cache像这种有什么巧妙的方法能把cache机制抽象出来吗？类似js的memoize

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

用session/cookie试试，要保持单一性可以把判断写在外面或者在类中再写一个方法，或者共享内存。应该很多种方法。session_start();
class FileReader {
    //private $cache = array();
private $a;
    public function readContent($file) {
        $a='hello';
      // open -> read -> do some decode
  $_SESSION[$file]=$a;
      return $a;
    }
}
if(isset($_SESSION[$file])){
    $content=$_SESSION[$file];
}else{
  $r = new FileReader();
  $content=$r->readContent('a.txt');
}
echo $content;
private $a;
    public function readContent($file) {
        $a='hello';
      // open -> read -> do some decode
      $_SESSION[$file]=$a;
      return $a;
    }
我没有表达清楚，其实临时的cache放到哪里并不是关键，这种还是破坏了函数自身，我的本意是做一个完全通用的，适用于普通函数和类成员函数，非侵入式的 cache 机制，
比如js中的 memoization，可以参考这里：http://blog.csdn.net/aj3423/article/details/6213858考虑到php有eval，create_function之类灵活的函数，可能可以用写巧妙的方法写出类似cache机制
google到唯一一篇关于php memorize的帖子，貌似很少有人讨论这个
http://stackoverflow.com/questions/3540403/caching-function-results-in-php
里面的机制应该适用普通函数，正在尝试解决类成员函数
看看，看看，楼主这才是有思想的好同学啊楼上的要向人家学习。。楼主可以参考一下这个扩展：
http://www.php.net/manual/en/function.runkit-function-redefine.php估计反射也是需要用到的：
http://www.php.net/manual/en/book.reflection.php
似乎（我说了似乎）可以搞出和js里类似的实现.
不过缺点是runkit不是php绑定的，需要从pecl里安装，所以不够通用。不管如何，楼主搞出点成果来欢迎回来通知一下，大家都能用得着。
如果能开源建个项目就更好，那什么，我就跟你混了
一般说来，将Cache相关操作封装为一个类即可，耦合是有的，关键是如何降低到最低，参考：
class FileReader {
    public function getContent($file) {
        $data = // open -> read -> do some decode
        return Cache->CacheOperation(file,data);
    }
}class Cache{
    private $_cache = array();

    public function CacheOperation($key, $data){
       if($_cache [$key]) {
            return $_cache [$key];
        }
        $_cache [$key] = $data;
        return $_cache [$key];
    }
}
多谢，这貌似还有个扩展能透明实现memoize：
https://github.com/arraypad/php-memoize公司不让用扩展的说-_-!! 只能用php实现了，反射也许用得上，想办法中，哎。。
这样从原来的类里精简掉了一个$cache变量，但还是需要改函数本身，其实最初的需求是项目中发现有性能问题，原因是反复调用一些大开销的函数，所以想搞一套cache机制，最好是能不修改原有代码。。
第一版：
include_once 'd:/debug.php';function get_class_prefix($c) {//根据ReflectionClass拼class前缀
$p = array();
// if($c->isInterface)
// if($c->isNamespace)
if($c->isAbstract()) $p[] = 'abstract';
if($c->isFinal()) $p[] = 'final'; return implode(' ', $p);
}function get_property_prefix($_p) {//根据ReflectionProperty拼property前缀
$p = array();
// if($m->isDefault // what is this?
if($_p->isPrivate()) $p[] = 'private';
if($_p->isPublic()) $p[] = 'public';
if($_p->isProtected()) $p[] = 'protected';
if($_p->isStatic()) $p[] = 'static'; return implode(' ', $p);
}
function get_method_prefix($m) {//根据ReflectionMethod拼function前缀
$p = array();
// if($m->isConstructor
// if($m->isDestructor
if($m->isPrivate()) $p[] = 'private';
if($m->isPublic()) $p[] = 'public';
if($m->isProtected()) $p[] = 'protected';
if($m->isAbstract()) $p[] = 'abstract';
if($m->isFinal()) $p[] = 'final'; return implode(' ', $p);
}
//默认的id生成器，根据[方法名+参数]=>生成唯一id
//如果有复杂参数，可自定义id生成器
class DefaultHasher {
public static function encode($fun_name, $args = array()) {
$uid = md5(
implode("|", array_merge((array)$fun_name, array_map(
"serialize",
$args)
))
);
return $uid;
}
}
function create_memoized_class($new_class, $base, $memo_funcs, $hasher = 'DefaultHasher') {
$ref = new ReflectionClass($base); $prefix = get_class_prefix($ref);
$code = array(//开始生成新class
"$prefix class $new_class {
private \$cache = array();//用于存放所有计算过的值
private \$hasher = '$hasher';"//uid生成器
); foreach($ref->getConstants() as $name=>$value) {//处理constant常量
$code[] = "const $name = $value;";
} //注1：这里的实例化 $instance 完全是为了得到成员变量的默认值，限制了base的构造函数里不能有复杂的初始化，有无其他办法能得到变量默认值？
$instance = new $base;
foreach($ref->getProperties() as $p) {//处理属性
$name = $p->getName();
$p->setAccessible(true); //注2：这里暂用json_encode，限制了property不能为复杂结构，比如:
//private $girl = array(array('virgin'=>true));
//还需写一个通用的属性生成器
$value = json_encode($p->getValue($instance)); $prefix = get_property_prefix($p); $code[] = "$prefix \$$name = $value;";
} foreach($ref->getMethods() as $m) {//处理member function
$name = $m->getName();
$prefix = get_method_prefix($m);
if(in_array($name, $memo_funcs)) {
$code[] = "
$prefix function $name() {
\$args = func_get_args();
\$hasher = \$this->hasher;
\$uid = $hasher::encode('$name', \$args) . ''; if(array_key_exists(\$uid, \$this->cache)) {
return \$this->cache[\$uid];
}
                    eval( '\$ret = $base::$name( \$args['
                        . implode( '], \$args[', array_keys(\$args) )
                        . '] );' );
\$this->cache[\$uid] = \$ret;
                    return \$ret;
}"; } else {
$code[] = "
$prefix function $name() {
\$args = func_get_args();
                    eval( '\$ret = $base::$name( \$args['
                        . implode( '], \$args[', array_keys(\$args) )
                        . '] );' );
                    return \$ret;
}";
}
}
$code[] = "}";
$x = implode("", $code);
//debug($x);
    eval($x);
}
//测试
class Person {
    var $job = "coding";
const const_iq = 200;
public $pubv = 'public_var';
// private $girl = array(array('virgin'=>true));
private $age = 123;
private $married = false;
public static $pubstat = 'pub static';    function work($time) {
debug("now: $time, sleep 1 hour first...");
sleep(1);
        debug("Hi, I'm $this->job");
return "going home";
    }
}
create_memoized_class("LazyPerson", "Person", array("work"));$b = new LazyPerson();
$b->work('9:00');//延时1秒
$b->work('9:00');//无延时
$b->work('9:00');//无延时
$b->work('9:00');//无延时
$b->work('10:00');//延时1秒
$b->work('10:00');//无延时
$b->work('10:00');//无延时
$b->work('10:00');//无延时
有无简单方法能解决上面2个注释处的问题？
注3：以上是通过复制一个新的class，copy所有默认的属性值，然后hook某些函数，没找到直接改原有class的办法，应该没有办法能改一个class的定义吧，比如改掉某个函数？
貌似用memoize可以代替原singleton的写法
注1：
http://www.php.net/manual/en/reflectionclass.getdefaultproperties.php注3：
除了用扩展（上面提的runkit），应该是没有php层的方法改变函数
拿到的是 ReflectionProperty 对象，貌似不实例化 $base 也是拿不到 value 的，这是问题1
问题2：上面的注2，要写一个通用的属性生成器，好复杂。。
问题3，4，5.n多问题其实memoize的问题已经转成了如何clone一个类(不生成实例的情况下)，ReflectionClass::getFileName拿到class所在的文件路径 -> 读文件内容 -> 用正则提取class定义 -> 用正则替换目标函数 -> eval一个新class
貌似能解决之前的所有问题，就是需要个牛x的正则，研究正则中。。
不是啊,这个拿到的是一个数组,key是属性名,value是default值.
嗯,这个改源码的方法应该是能实现....
( 似乎可以将原函数改名(在确保无冲突情况下,可以考虑namespace), 然后新建同名函数调用原已改名的函数 )
不得不说此方法很诡异........
欢迎继续....
private情况下好像是拿不到value。。
namespace这个倒是给忘了考虑了。。
哦是用 key=>value这样，倒没试过。
本帖最后由 xuzuning 于 2011-09-19 14:00:05 编辑
哈是一种巧妙的办法，
extend + call_user_func_array(array("parent"
貌似解决了namespace问题，唯一的限制不能cache private方法
是的，不能cache private方法
但 private方法同样不能从外部访问，应该没有必要cache的
很好奇这样reflection + eval的实现效率会低多少
恩这点应该不是问题，这种方法貌似是最接近完美的php层实现试了class A {
private $xx = 2; public function foo($a) {
return $this->xx;
}
}eval("class B {
private \$xx = 2; public function foo(\$a) {
return \$this->xx;
}
}");
$times = 100000;$a = new A();
debugTime();
for($i=0; $i<$times; $i++) $a->foo(2);
debugTimeEnd('a');//输出a: 0.31262493133545秒$b = new B();
debugTime();
for($i=0; $i<$times; $i++) $b->foo(2);
debugTimeEnd('b');//输出b: 0.31435298919678秒$c = tack_cache('A');
debugTime();
for($i=0; $i<$times; $i++) $c->foo(2);
debugTimeEnd('c');//输出c: 2.3384540081024秒
eval的和普通的时间上没有区别，memoize比普通函数时间上是8倍，应该都是花在了那个if(cache)检查吧
eval会有安全上的隐忧，比如类实例方法有的参数是接收用户输入(get,post)的话，注意php原生代码一定要禁止。
关于执行时间问题,个人感觉应该不是if(cache)那里，你的测试没有单独测试reflection调用和普通调用性能上的差别，可能是这上面的原因。
假如一个页面有2，3个类方法需要cache结果，那执行时间又走什么样的曲线，如果本身比较关心效率的话，建议多测试测试。
本帖最后由 xuzuning 于 2011-09-19 17:30:49 编辑
不是这个问题，你每次调用一个类，是不是都要$ref = new ReflectionClass($classname);一次?
我是说要关注一下
$c = tack_cache('A');
$b = tack_cache('B');
$d = tack_cache('D');

。。
性能的区别
tack_cache 只是一种写法吧，具体使用可以在一个class定义之后紧接着一句类似 generate_cached_class('new_class_name', 'old_class_name', functions_to_cache)
接下来都用 new new_class_name() 了
对，同一个类名只reflection一次.所以我比较关注缓存多个不同的类::method的性能.
放代码讲清楚我的问题。
比如一个页面要cache住下面三个类的getContent返回的结果,那么reflection会不会有效率问题，如果更多呢？
class FileReader {
    public function getContent($file) {
        $content = // open -> read -> do some decode
        return $content;
    }
}class urlReader {
    public function getContent($url) {
        $content = // open -> read -> do some decode
        return $content;
    }
}class dbReader {
    public function getContent($id) {
        $content = // open -> read -> do some decode
        return $content;
    }
}
本帖最后由 xuzuning 于 2011-09-19 18:56:46 编辑
本帖最后由 xuzuning 于 2011-09-19 19:01:59 编辑
不知道我的理解对不对，从代码上看你是认为3个类的cache是放在一个地方的，cache越多的类会到这那个地方过于庞大
如果是这样的话，这3个类的cache是独立的
class _FileReader {
    public function getContent($file) {
        $content = // open -> read -> do some decode
        return $content;
    }
}
create_memoized_class("FileReader", "_FileReader", array("getContent"));
//这句话生成了一个新的class，相当于
class FileReader {
private $cache = array();
    public function getContent($file) {
if($cache[$file]) return cache[$file];
        $content = // open -> read -> do some decode
$cache[$file] = $content;
        return $content;
    }
}
//这句会在include该文件时执行，里面会用到一次ReflectionClass，
//之后 new 实例就和ReflectionClass无关了
//每个实例有自己的 cache 存放处
//这样不会有效率问题吧class _urlReader {
    public function getContent($url) {
        $content = // open -> read -> do some decode
        return $content;
    }
}
create_memoized_class("urlReader", "_urlReader", array("getContent"));class _dbReader {
    public function getContent($id) {
        $content = // open -> read -> do some decode
        return $content;
    }
}
create_memoized_class("dbReader", "_dbReader", array("getContent"));
class _Singleton {
  public static function getInstance(xxx) {
    return new Singleton(xxx);
  }
}
create_memoized_class("Singleton ", "_Singleton ", array("getInstance"));
发个能用的
include_once 'Person.class.php';
function create_memoized_class($new_class, $base, $memo_funcs) {
if(is_string($memo_funcs))
return create_memoized_class($new_class, $base, array($memo_funcs)); static $NON_STATIC_FUNC = 1;//01b
static $STATIC_FUNC = 2;//10b $ref = new ReflectionClass($base); $type = 0;
foreach($memo_funcs as $f) {
if(!$ref->hasMethod($f)) throw new Exception("function '$f' not found in class '$base'");
$m = $ref->getMethod($f);
$type = $type | ($m->isStatic() ? $STATIC_FUNC : $NON_STATIC_FUNC);
} $code = array(
"class $new_class extends $base {"
);
if($type & $NON_STATIC_FUNC)
$code[] =
"private \$cache = array();
function __call(\$m, \$v) {
\$k = md5(serialize(func_get_args()));
exit();
debug(\$m, 'm');
debug(\$v, 'v');
debug('dfs');
if(!array_key_exists(\$k, \$this->cache)) {
\$this->cache[\$k] = call_user_func_array(array('parent', \$m), \$v);
}
return \$this->cache[\$k];
}";
if($type & $STATIC_FUNC)
$code[] =
"private static \$s_cache = array();
public static function __callStatic(\$m, \$v) {
\$k = md5(serialize(func_get_args()));
if(!array_key_exists(\$k, self::\$s_cache)) {
self::\$s_cache[\$k] = call_user_func_array(array('parent', \$m), \$v);
}
return self::\$s_cache[\$k];
}"; foreach($memo_funcs as $f) {
$m = $ref->getMethod($f);
$prefix = $m->isPrivate() ? 'private' : ($m->isPublic() ? 'public' : 'protected');
if($m->isStatic()) {
$code[] =
"$prefix static function $f() {
return self::__callStatic('$f', func_get_args());
}";
} else {
$code[] =
"$prefix function $f() {
return \$this->__call('$f', func_get_args());
}";
}
}
$code[] =
"}"; $code = implode('', $code);
// debug($code);
eval($code);
}
//测试
class Person {
var $job = "coding"; function __construct($name) {
debug("a person created: [$name]");
}
function work($time) {
debug("now: $time, sleep 1 hour first...");
sleep(1);
debug("Hi, I'm $this->job");
return "going home";
}
public static function getInstance($name) {
return new Person($name);
}
}
create_memoized_class("LazyPerson", "Person", array("work", "getInstance"));
//非静态方法，没问题
$z = new LazyPerson('zz');
$z->work('9:00');
$z->work('9:00');//成功cache住
//静态方法，没问题
$x = LazyPerson::getInstance('aj');
$y = LazyPerson::getInstance('aj');
debug($x===$y); //true
//把上面2个放一起问题就来了
debug(LazyPerson::getInstance('aj')->work('9:00'));
debug(LazyPerson::getInstance('aj')->work('9:00'));//没有cache住，原始work被调用2次
//原因是getInstance里new出来的是原始Person，而不是LazyPerson。
//明天接着搞，囧。
忘了删上面代码里的 debug 和一个 exit
不考虑效率那就把专注力放到功能实现吧.\$k = md5(serialize(func_get_args()));
------------------------
这个key生成有个问题，就是如果参数是对象的情况，会发生什么问题?会不会不同的实例化对象也生成相同的key?