InitialContacts = [{"guid":"","contactId":"1","contactName":"\u7684\u8bf4\u7684","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqhq1%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false},{"guid":"","contactId":"2","contactName":"\u997f\u4eba","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqh2%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false}];
怎么将上面抓取到的代码中的:
"contactName":"\u7684\u8bf4\u7684"里的\u7684\u8bf4\u7684的编码转回来,本来是中文的,可现在成了这种代码

解决方案 »

  1.   

    所有代码都贴出来吧,抓取yahoo.cn的邮箱联系人的程序“<?php
        define( "COOKIEJAR", tempnam( ini_get( "upload_tmp_dir" ), "cookie" ) );
        //定义COOKIES存放的路径,要有操作的权限
        define( "TIMEOUT", 1000 );
        //超时设定
        class YAHOO
        {
            private function login($username, $password)
            {
                //第一步:模拟抓取登录页面的数据,并记下cookies
                $cookies = array(); $matches = array();
                //获取表单
                $login_url = "https://login.yahoo.com/config/login?.src=fpctx&.intl=us&.done=http%3A%2F%2Fwww.yahoo.com%2F";
                $ch = curl_init($login_url);
                curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR);
                curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
                $contents = curl_exec($ch); curl_close($ch);
                //构造参数
                $name =
                array('tries','src','md5','hash','js','last','promo','intl','bypass','partner','u','v','challenge','yplus','emailCode','pkg','stepid','ev','hasMsgr','chkP','done','pd','p
                ad','aad');
                $postfiles = array();
                $matches = array();
                foreach($name as $v)
                {
                    preg_match('/<input\s*type="hidden"\s*name=".'.$v.'"\s*value="(.*?)"\s*>/i', $contents, $matches);
                    if(!empty($matches))
                    {
                        $postfiles['.'.$v] = $matches[1];
                        $matches = array();
                        }
                    if($v == 'pd')
                    {
                        $postfiles['.'.$v] = urlencode($postfiles['.'.$v]);
                    }
                 
                 }
                 $postfiles['pad'] = 5;
                 $postfiles['aad'] = 6;
                 $postfiles['login'] = urlencode($username);
                 $postfiles['passwd'] = $password;
                 $postfiles['.persistent'] = 'y';
                 $postfiles['save'] = '';
                 $postfiles['.done'] = urlencode($postfiles['.done']);
                 //$postfiles['.pd'] = urlencode($postfiles['.pd']);
                 $postargs = '';
                 foreach($postfiles as $k => $v){ $postargs .= $k.'='.$v.'&'; }
                 $postargs = substr($postargs,0,-1); $request = "https://login.yahoo.com/config/login?";
    //             print_r($postargs);exit;
                 //开始登录
                 $ch = curl_init();
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 curl_setopt($ch, CURLOPT_URL, $request);
                 curl_setopt($ch, CURLOPT_POST, 1);
                 curl_setopt($ch, CURLOPT_POSTFIELDS, $postargs);
                 curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR);
                 curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT);
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 $contents = curl_exec($ch);
                 curl_close($ch);
    //             echo $contents;exit;
                 if (stripos($contents,'submit') != FALSE)
                 {
                     return 0;
                 }
                 return 1;
             
            }
            //获取邮箱通讯录-地址
            public function getAddressList($username, $password)
            {
                if (!$this->login($username, $password))
                {
                 return '登陆失败';
                }
                //开始进入模拟抓取
                //get mail list from the page information username && emailaddress
                $url = "http://address.mail.yahoo.com/";
                $data = array( );
                if ( !$data = $this->hanlde_date( $url, $names, $emails) )
                {
                    return FALSE;
                }
                
                
                return $data;
            }
            function hanlde_date( $url, &$names, &$emails)
            {
                 $ch = curl_init( );
                 curl_setopt( $ch, CURLOPT_COOKIEFILE, COOKIEJAR );
                 curl_setopt( $ch, CURLOPT_URL, $url );
                 curl_setopt( $ch, CURLOPT_TIMEOUT, TIMEOUT );
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 $contents = curl_exec($ch);
                 
    //             echo "<<< EOT". $contents ."EOT";
    //             iconv("utf-8","gbk",$contents);s
                 curl_close($ch); $temparr = array();
                 preg_match_all('/InitialContacts\s*=\s*(.*?);/i',$contents,$temparr);
                 echo "<pre>";
                 print_r($temparr);
                 echo "</pre>";
                 preg_match_all('/"email":"(.*?)"/i',$temparr[1][0],$temparr1);
                 preg_match_all('/"contactName":"(.*?)"/i',$temparr[1][0],$temparr2);
    //          echo "<script type='text/javascript'>unescape(".$temparr2[1][0].");</script>";
    //          echo $temparr2[1][0];
    //             echo "<pre>";
    //             print_r($temparr2);
    //             echo "</pre>";
                 //print_R($temparr1);exit;
                 return $temparr1[1];
                 //匹配出JSON对象数组
            }
                 
        }
        $yahoo = new YAHOO;
        $res = $yahoo->getAddressList('***@yahoo.cn','****');
        print_R($res);
    ?>
      

  2.   

    再补充一句,好像是json格式造成的,怎么处理下这种格式,使其在浏览中输出时中外文并不是乱码?
      

  3.   

    function hanlde_date( $url, &$names, &$emails)
            {
                 $ch = curl_init( );
                 curl_setopt( $ch, CURLOPT_COOKIEFILE, COOKIEJAR );
                 curl_setopt( $ch, CURLOPT_URL, $url );
                 curl_setopt( $ch, CURLOPT_TIMEOUT, TIMEOUT );
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 $contents = curl_exec($ch);
                 
    //             echo "<<< EOT". $contents ."EOT";
    //             iconv("utf-8","gbk",$contents);s
                 curl_close($ch); $temparr = array();
                 preg_match_all('/InitialContacts\s*=\s*(.*?);/i',$contents,$temparr);
                 
                 //print_r($temparr);
                 $temp_arr = trim($temparr[1][0], '[]');
                 $t_s = explode('},',$temp_arr);
                 $c = count($t_s);
                 foreach($t_s as $k=>$v){
                  $v .= ($c > 1) ? '}' : '';
                 $trs = json_decode($v);
                 echo "<pre>";
                 print_r($trs);
                 echo "</pre>";
                 }
                 
                 //preg_match_all('/"email":"(.*?)"/i',$temparr[1][0],$temparr1);
                 //preg_match_all('/"contactName":"(.*?)"/i',$temparr[1][0],$temparr2);
    //             echo "<script type='text/javascript'>unescape(".$temparr2[1][0].");</script>";
    //             echo $temparr2[1][0];
    //             echo "<pre>";
    //             print_r($temparr2);
    //             echo "</pre>";
                 //print_R($temparr1);exit;
                 
                 //print_r($temparr1);
                 //print_r($temparr2);
                 //return $temp_arr;
                 //return $temparr1[1];
                 //匹配出JSON对象数组
            }
    修改了 hanlde_date 方法,这里提供思路,不一定是好的方法
      

  4.   

    <?php
    function unicode_decode($name)
    {
    // 转换编码,将Unicode编码转换成可以浏览的utf-8编码
    $pattern = '/([\w]+)|(\\\u([\w]{4}))/i';
    preg_match_all($pattern, $name, $matches);
    if (!empty($matches))
    {
    $name = ”;
    for ($j = 0; $j < count($matches[0]); $j++)
    {
    $str = $matches[0][$j];
    if (strpos($str, '\\u') === 0)
    {
    $code = base_convert(substr($str, 2, 2), 16, 10);
    $code2 = base_convert(substr($str, 4), 16, 10);
    $c = chr($code).chr($code2);
    $c = iconv('UCS-2', 'UTF-8', $c);
    $name .= $c;
    }
    else
    {
    $name .= $str;
    }
    }
    }
    return $name;
    }
      

  5.   


    直接$string = '{"guid":"","contactId":"1","contactName":"\u7684\u8bf4\u7684","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqhq1%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false},{"guid":"","contactId":"2","contactName":"\u997f\u4eba","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqh2%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false}'; 
    $string = json_decode($string);
    print_r($string);就变成中文了.
      

  6.   

    哦..貌似\u7684\u8bf4\u7684这些编码不是utf8的..怪不得上面的大虾要转码...