InitialContacts = [{"guid":"","contactId":"1","contactName":"\u7684\u8bf4\u7684","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqhq1%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false},{"guid":"","contactId":"2","contactName":"\u997f\u4eba","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqh2%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false}];
怎么将上面抓取到的代码中的:
"contactName":"\u7684\u8bf4\u7684"里的\u7684\u8bf4\u7684的编码转回来,本来是中文的,可现在成了这种代码
define( "COOKIEJAR", tempnam( ini_get( "upload_tmp_dir" ), "cookie" ) );
//定义COOKIES存放的路径,要有操作的权限
define( "TIMEOUT", 1000 );
//超时设定
class YAHOO
{
private function login($username, $password)
{
//第一步:模拟抓取登录页面的数据,并记下cookies
$cookies = array(); $matches = array();
//获取表单
$login_url = "https://login.yahoo.com/config/login?.src=fpctx&.intl=us&.done=http%3A%2F%2Fwww.yahoo.com%2F";
$ch = curl_init($login_url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$contents = curl_exec($ch); curl_close($ch);
//构造参数
$name =
array('tries','src','md5','hash','js','last','promo','intl','bypass','partner','u','v','challenge','yplus','emailCode','pkg','stepid','ev','hasMsgr','chkP','done','pd','p
ad','aad');
$postfiles = array();
$matches = array();
foreach($name as $v)
{
preg_match('/<input\s*type="hidden"\s*name=".'.$v.'"\s*value="(.*?)"\s*>/i', $contents, $matches);
if(!empty($matches))
{
$postfiles['.'.$v] = $matches[1];
$matches = array();
}
if($v == 'pd')
{
$postfiles['.'.$v] = urlencode($postfiles['.'.$v]);
}
}
$postfiles['pad'] = 5;
$postfiles['aad'] = 6;
$postfiles['login'] = urlencode($username);
$postfiles['passwd'] = $password;
$postfiles['.persistent'] = 'y';
$postfiles['save'] = '';
$postfiles['.done'] = urlencode($postfiles['.done']);
//$postfiles['.pd'] = urlencode($postfiles['.pd']);
$postargs = '';
foreach($postfiles as $k => $v){ $postargs .= $k.'='.$v.'&'; }
$postargs = substr($postargs,0,-1); $request = "https://login.yahoo.com/config/login?";
// print_r($postargs);exit;
//开始登录
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_URL, $request);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postargs);
curl_setopt($ch, CURLOPT_COOKIEJAR, COOKIEJAR);
curl_setopt($ch, CURLOPT_TIMEOUT, TIMEOUT);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$contents = curl_exec($ch);
curl_close($ch);
// echo $contents;exit;
if (stripos($contents,'submit') != FALSE)
{
return 0;
}
return 1;
}
//获取邮箱通讯录-地址
public function getAddressList($username, $password)
{
if (!$this->login($username, $password))
{
return '登陆失败';
}
//开始进入模拟抓取
//get mail list from the page information username && emailaddress
$url = "http://address.mail.yahoo.com/";
$data = array( );
if ( !$data = $this->hanlde_date( $url, $names, $emails) )
{
return FALSE;
}
return $data;
}
function hanlde_date( $url, &$names, &$emails)
{
$ch = curl_init( );
curl_setopt( $ch, CURLOPT_COOKIEFILE, COOKIEJAR );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_TIMEOUT, TIMEOUT );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$contents = curl_exec($ch);
// echo "<<< EOT". $contents ."EOT";
// iconv("utf-8","gbk",$contents);s
curl_close($ch); $temparr = array();
preg_match_all('/InitialContacts\s*=\s*(.*?);/i',$contents,$temparr);
echo "<pre>";
print_r($temparr);
echo "</pre>";
preg_match_all('/"email":"(.*?)"/i',$temparr[1][0],$temparr1);
preg_match_all('/"contactName":"(.*?)"/i',$temparr[1][0],$temparr2);
// echo "<script type='text/javascript'>unescape(".$temparr2[1][0].");</script>";
// echo $temparr2[1][0];
// echo "<pre>";
// print_r($temparr2);
// echo "</pre>";
//print_R($temparr1);exit;
return $temparr1[1];
//匹配出JSON对象数组
}
}
$yahoo = new YAHOO;
$res = $yahoo->getAddressList('***@yahoo.cn','****');
print_R($res);
?>
{
$ch = curl_init( );
curl_setopt( $ch, CURLOPT_COOKIEFILE, COOKIEJAR );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_TIMEOUT, TIMEOUT );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$contents = curl_exec($ch);
// echo "<<< EOT". $contents ."EOT";
// iconv("utf-8","gbk",$contents);s
curl_close($ch); $temparr = array();
preg_match_all('/InitialContacts\s*=\s*(.*?);/i',$contents,$temparr);
//print_r($temparr);
$temp_arr = trim($temparr[1][0], '[]');
$t_s = explode('},',$temp_arr);
$c = count($t_s);
foreach($t_s as $k=>$v){
$v .= ($c > 1) ? '}' : '';
$trs = json_decode($v);
echo "<pre>";
print_r($trs);
echo "</pre>";
}
//preg_match_all('/"email":"(.*?)"/i',$temparr[1][0],$temparr1);
//preg_match_all('/"contactName":"(.*?)"/i',$temparr[1][0],$temparr2);
// echo "<script type='text/javascript'>unescape(".$temparr2[1][0].");</script>";
// echo $temparr2[1][0];
// echo "<pre>";
// print_r($temparr2);
// echo "</pre>";
//print_R($temparr1);exit;
//print_r($temparr1);
//print_r($temparr2);
//return $temp_arr;
//return $temparr1[1];
//匹配出JSON对象数组
}
修改了 hanlde_date 方法,这里提供思路,不一定是好的方法
function unicode_decode($name)
{
// 转换编码,将Unicode编码转换成可以浏览的utf-8编码
$pattern = '/([\w]+)|(\\\u([\w]{4}))/i';
preg_match_all($pattern, $name, $matches);
if (!empty($matches))
{
$name = ”;
for ($j = 0; $j < count($matches[0]); $j++)
{
$str = $matches[0][$j];
if (strpos($str, '\\u') === 0)
{
$code = base_convert(substr($str, 2, 2), 16, 10);
$code2 = base_convert(substr($str, 4), 16, 10);
$c = chr($code).chr($code2);
$c = iconv('UCS-2', 'UTF-8', $c);
$name .= $c;
}
else
{
$name .= $str;
}
}
}
return $name;
}
直接$string = '{"guid":"","contactId":"1","contactName":"\u7684\u8bf4\u7684","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqhq1%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false},{"guid":"","contactId":"2","contactName":"\u997f\u4eba","email":"[email protected]","emaillink":"http:\/\/mrd.mail.yahoo.com\/compose?To=helloqh2%40222.com","isConnection":false,"connection":"","displayImg":null,"msgrID":"","msgrStatus":"","isMsgrBuddy":false}';
$string = json_decode($string);
print_r($string);就变成中文了.