本帖最后由 bupabupa166 于 2014-10-31 22:08:43 编辑

解决方案 »

  1.   

    <?php
    $str='<a  title="women bags" data-text="women bags" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=women+bags&amp;_frs=1"><b>women bags</b></a> <a  title="backpack" data-text="backpack" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=backpack&amp;_frs=1"><b>backpack</b></a> <a  title="wallet" data-text="wallet" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=wallet&amp;_frs=1"><b>wallet</b></a> <a  title="bag men" data-text="bag men" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=bag+men&amp;_frs=1">bag <b>men</b></a> <a  title="keysool bag" data-text="keysool bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=keysool+bag&amp;_frs=1"><b>keysool</b> bag</a> <a  title="dress" data-text="dress" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=dress&amp;_frs=1"><b>dress</b></a> <a  title="handbag" data-text="handbag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=handbag&amp;_frs=1"><b>handbag</b></a> <a  title="shoulder bag" data-text="shoulder bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=shoulder+bag&amp;_frs=1"><b>shoulder</b> bag</a> <a  title="purse" data-text="purse" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=purse&amp;_frs=1"><b>purse</b></a> <a  title="shoes" data-text="shoes" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=shoes&amp;_frs=1"><b>shoes</b></a> <a  title="bag man" data-text="bag man" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=bag+man&amp;_frs=1">bag <b>man</b></a> <a  title="leather bag" data-text="leather bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=leather+bag&amp;_frs=1"><b>leather</b> bag</a>';preg_match_all('/data-text="(.+)"/U',$str,$matches,PREG_PATTERN_ORDER );var_dump($matches[1]);array(12) {
      [0]=>
      string(10) "women bags"
      [1]=>
      string(8) "backpack"
      [2]=>
      string(6) "wallet"
      [3]=>
      string(7) "bag men"
      [4]=>
      string(11) "keysool bag"
      [5]=>
      string(5) "dress"
      [6]=>
      string(7) "handbag"
      [7]=>
      string(12) "shoulder bag"
      [8]=>
      string(5) "purse"
      [9]=>
      string(5) "shoes"
      [10]=>
      string(7) "bag man"
      [11]=>
      string(11) "leather bag"
    }
      

  2.   

    $str = '<a  title="women bags" data-text="women bags" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=women+bags&amp;_frs=1"><b>women bags</b></a> <a  title="backpack" data-text="backpack" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=backpack&amp;_frs=1"><b>backpack</b></a> <a  title="wallet" data-text="wallet" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=wallet&amp;_frs=1"><b>wallet</b></a> <a  title="bag men" data-text="bag men" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=bag+men&amp;_frs=1">bag <b>men</b></a> <a  title="keysool bag" data-text="keysool bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=keysool+bag&amp;_frs=1"><b>keysool</b> bag</a> <a  title="dress" data-text="dress" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=dress&amp;_frs=1"><b>dress</b></a> <a  title="handbag" data-text="handbag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=handbag&amp;_frs=1"><b>handbag</b></a> <a  title="shoulder bag" data-text="shoulder bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=shoulder+bag&amp;_frs=1"><b>shoulder</b> bag</a> <a  title="purse" data-text="purse" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=purse&amp;_frs=1"><b>purse</b></a> <a  title="shoes" data-text="shoes" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=shoes&amp;_frs=1"><b>shoes</b></a> <a  title="bag man" data-text="bag man" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=bag+man&amp;_frs=1">bag <b>man</b></a> <a  title="leather bag" data-text="leather bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=leather+bag&amp;_frs=1"><b>leather</b> bag</a>';
    preg_match_all("/data-text=(\".+\")/U",$str,$match);
    echo "<pre>";
    print_r($match);
      

  3.   


    $str = '<a  title="women bags" data-text="women bags" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=women+bags&amp;_frs=1"><b>women bags</b></a> <a  title="backpack" data-text="backpack" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=backpack&amp;_frs=1"><b>backpack</b></a> <a  title="wallet" data-text="wallet" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=wallet&amp;_frs=1"><b>wallet</b></a> <a  title="bag men" data-text="bag men" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=bag+men&amp;_frs=1">bag <b>men</b></a> <a  title="keysool bag" data-text="keysool bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=keysool+bag&amp;_frs=1"><b>keysool</b> bag</a> <a  title="dress" data-text="dress" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=dress&amp;_frs=1"><b>dress</b></a> <a  title="handbag" data-text="handbag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=handbag&amp;_frs=1"><b>handbag</b></a> <a  title="shoulder bag" data-text="shoulder bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=shoulder+bag&amp;_frs=1"><b>shoulder</b> bag</a> <a  title="purse" data-text="purse" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=purse&amp;_frs=1"><b>purse</b></a> <a  title="shoes" data-text="shoes" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=shoes&amp;_frs=1"><b>shoes</b></a> <a  title="bag man" data-text="bag man" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=bag+man&amp;_frs=1">bag <b>man</b></a> <a  title="leather bag" data-text="leather bag" href="http://www.xxx.com/keys/i.html?_sacat=0&amp;_nkw=leather+bag&amp;_frs=1"><b>leather</b> bag</a>';preg_match_all('/data-text="(.*?)"/is', $str, $matches);print_r($matches[1]);Array
    (
        [0] => women bags
        [1] => backpack
        [2] => wallet
        [3] => bag men
        [4] => keysool bag
        [5] => dress
        [6] => handbag
        [7] => shoulder bag
        [8] => purse
        [9] => shoes
        [10] => bag man
        [11] => leather bag
    )
      

  4.   

    写了一个比较直观的,先提权单链接,然后在单链接中提取data-text="...."引号中的信息//先匹配超链接的前半部分,比如<a href="......" >这一部分
    function filterLink($data)
    {
    $pat='#<a[^>]+?>#ui';
    if(preg_match_all($pat, $data, $match))
    {
    foreach($match[0] as $link)
    {
    //echo $link."<br /><br />";
    filterData($link);
    }
    }
    else
    {
    echo '匹配失败';
    }
    }//然后在单个的<a data-text="..." href="..."> 中提取需要的信息
    function filterData($data)
    {
    $pat='#data-text="([^"]+?)"#i';
    if(preg_match_all($pat, $data, $match))
    {
    foreach($match[1] as $content)
    {
    echo $content."<br />";
    }
    }
    else
    {
    echo '匹配失败';
    }
    }
    filterLink($str);