如何从抓取的页面中匹配下面的内容
<link rel="alternate" type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie.xml" title="RSS" />同时取出 href 的内容 一定要匹配 rel值 alternate type的值 application/rss+xml
<link rel="alternate" type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie.xml" title="RSS" />同时取出 href 的内容 一定要匹配 rel值 alternate type的值 application/rss+xml
//未经测试。
preg_match_all('/<link\s+rel="alternate"\s+type="application/rss+xml".*\/>/i',$strimg,$matches);
var_dump($matches);
<link rel="alternate" type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie.xml" title="RSS" />
<<<
preg_match_all('/<link rel=\"(.*)\" type=\"(.*)\" href=\"(.*)\"[^>]* \/>/is', $string, $matches);
print_r($matches);
<link rel="alternate" type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie.xml" title="RSS" />
<link rel="stylesheet" type="text/css" href="http://simg.sinajs.cn/blog7style/css/blog/newblog.css" />
content;如果 要进行匹配的内容是 string , 我只想匹配 rel=alternate type=application/rss+xml 的标签 同时取得
href的值
$string = <<<content
<link rel="alternate" type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie.xml" title="RSS" />
<link rel="stylesheet" type="text/css" href="http://simg.sinajs.cn/blog7style/css/blog/newblog.css" />
content;$p = array('/<link\s+/i', '/" +/', '#/>#', '/"/', '/=/', "/[\r\n]+/");
$r = array('', '&', '', '', '[]=', '');$s = preg_replace($p, $r, $string);parse_str($s, $info);
print_r($info);
Array
(
[rel] => Array
(
[0] => alternate
[1] => stylesheet
) [type] => Array
(
[0] => application/rss xml
[1] => text/css
) [href] => Array
(
[0] => http://blog.sina.com.cn/rss/lilianjie.xml
[1] => http://simg.sinajs.cn/blog7style/css/blog/newblog.css
) [title] => Array
(
[0] => RSS
))
// example of how to use basic selector to retrieve HTML contents
include('../simple_html_dom.php');
$string = <<<content
<link rel="alternate" type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie.xml" title="RSS" />
<ink rel="stylesheet" type="text/css" href="http://simg.sinajs.cn/blog7style/css/blog/newblog2.css" />
<br>
<div></div>
<lin REL="stylesheet" href="http://simg.sinajs.cn/blog7style/css/blog/newblog3.css" type="text/css"/>
<link Rel="Alternate" type="text/css" href="http://simg.sinajs.cn/blog7style/css/blog/newblog4.css" />
<link type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie24.xml" title="RSS" rel="alternate"/>
<link rel="Alternate" type="application/rss+xml" href="http://blog.sina.com.cn/rss/lilianjie22.xml" title="RSS" />
<link rel="alternate" href=http://blog.sina.com.cn/rss/lilianjie21.xml title="RSS" type='application/rss+xml'/>
<ink rel="stylesheet" type="text/css" href="http://simg.sinajs.cn/blog7style/css/blog/newblog5.css" />
<br>
<div></div>
<lin rel="stylesheet" type="text/css" href="http://simg.sinajs.cn/blog7style/css/blog/newblog6.css" />
content;$html = str_get_html($string);// find all link
foreach($html->find('link') as $e) {
if(strtolower($e->rel)=="alternate" && strtolower($e->type)=="application/rss+xml")
echo $e->href . '<br>';
}