//需要的是这部分的信息-------------------------------------
<h1 class="firstHeading">PDF阅读器</h1>
        <div id="bodyContent">
            <h3 id="siteSub">取自 AEED</h3>
            <div id="contentSub"></div>
                                    <div id="jump-to-nav">跳转到: <a href="#column-one">导航</a>, <a href="#searchInput">搜索</a></div>            <!-- start content -->
            <div id="divPdf0" title="../2003Z/AdbeRdr812_zh_CN.zip" class="downpdf" style="cursor:hand;">PDF阅读器 Adobe Reader 8.1.2 简体中文版</div>
//---------------------------------------------------------<!-- Saved in parser cache with key wikidb:pcache:idhash:9031-0!1!0!!zh-sg!2 and timestamp 20120327075108 -->
<div class="printfooter">
取自"<a href="http://localhost/index.php/PDF%E9%98%85%E8%AF%BB%E5%99%A8">http://localhost/index.php/PDF%E9%98%85%E8%AF%BB%E5%99%A8</a>"</div>
                        <!-- end content -->
            <div class="visualClear"></div>
        </div>
    </div>
        </div>
        <div id="column-one">
    <div id="p-cactions" class="portlet">
        <h5>查看</h5>
        <div class="pBody">
            <ul>
                         <li id="ca-nstab-main" class="selected"><a href="/index.php/PDF%E9%98%85%E8%AF%BB%E5%99%A8">条目</a></li>
                         <li id="ca-talk" class="new"><a href="/index.php?title=Talk:PDF%E9%98%85%E8%AF%BB%E5%99%A8&amp;action=edit">讨论</a></li>
                         <li id="ca-edit"><a href="/index.php?title=PDF%E9%98%85%E8%AF%BB%E5%99%A8&amp;action=edit">编辑</a></li>
                         <li id="ca-history"><a href="/index.php?title=PDF%E9%98%85%E8%AF%BB%E5%99%A8&amp;action=history">历史</a></li>
                </ul>
        </div>
    </div>
    <div class="portlet" id="p-personal">
        <h5>个人工具</h5>
        <div class="pBody">
            <ul>
                <li id="pt-login"><a href="/index.php?title=Special:Userlogin&amp;returnto=PDF%E9%98%85%E8%AF%BB%E5%99%A8">登录/注册</a></li>
            </ul>
        </div>
    </div>
    <script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
        <div class='portlet' id='p-.E7.9F.A5.E8.AF.86.E5.BA.93'>
        <div class='pBody'>
        </div>
    </div>
        <div id="p-search" class="portlet">
        <h5><label for="searchInput">搜索</label></h5>
        <div id="searchBody" class="pBody">
            <form action="/index.php/Special:Search" id="searchform"><div>
                <input id="searchInput" name="search" type="text" accesskey="f" value="" />
                <input type='submit' name="go" class="searchButton" id="searchGoButton"    value="进入" />&nbsp;
                <input type='submit' name="fulltext" class="searchButton" id="mw-searchButton" value="搜索" />
            </div></form>
        </div>
    </div>
    <div class="portlet" id="p-tb">
        <h5>工具箱</h5>
        <div class="pBody">
        </div>
    </div>
        </div><!-- end of the left (by default at least) column -->
            <div class="visualClear"></div>
            <div id="footer">
                <div id="f-poweredbyico"><a href="http://www.mediawiki.org/"><img src="/skins/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" /></a></div>
                <div id="f-copyrightico"><a href="http://www.gnu.org/copyleft/fdl.html"><img src="/skins/common/images/gnu-fdl.png" alt='GNU Free Documentation License 1.2' /></a></div>
        </div>        
        <script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
</div>
<!-- Served by localhost in 0.230 secs. --></body></html>就是上面标识出来的信息,该怎么用正则来写呢?

解决方案 »

  1.   


    $html = <<<html
    <h1 class="firstHeading">PDF阅读器</h1>
            <div id="bodyContent">
                <h3 id="siteSub">取自 AEED</h3>
                <div id="contentSub"></div>
                                        <div id="jump-to-nav">跳转到: <a href="#column-one">导航</a>, <a href="#searchInput">搜索</a></div>            <!-- start content -->
                <div id="divPdf0" title="../2003Z/AdbeRdr812_zh_CN.zip" class="downpdf" style="cursor:hand;">PDF阅读器 Adobe Reader 8.1.2 简体中文版</div>
    <!-- Saved in parser cache with key wikidb:pcache:idhash:9031-0!1!0!!zh-sg!2 and timestamp 20120327075108 -->
    <div class="printfooter">
    取自"<a href="http://localhost/index.php/PDF%E9%98%85%E8%AF%BB%E5%99%A8">http://localhost/index.php/PDF%E9%98%85%E8%AF%BB%E5%99%A8</a>"</div>
                            <!-- end content -->
                <div class="visualClear"></div>
            </div>
        </div>
            </div>
            <div id="column-one">
        <div id="p-cactions" class="portlet">
            <h5>查看</h5>
            <div class="pBody">
                <ul>
                             <li id="ca-nstab-main" class="selected"><a href="/index.php/PDF%E9%98%85%E8%AF%BB%E5%99%A8">条目</a></li>
                             <li id="ca-talk" class="new"><a href="/index.php?title=Talk:PDF%E9%98%85%E8%AF%BB%E5%99%A8&amp;action=edit">讨论</a></li>
                             <li id="ca-edit"><a href="/index.php?title=PDF%E9%98%85%E8%AF%BB%E5%99%A8&amp;action=edit">编辑</a></li>
                             <li id="ca-history"><a href="/index.php?title=PDF%E9%98%85%E8%AF%BB%E5%99%A8&amp;action=history">历史</a></li>
                    </ul>
            </div>
        </div>
        <div class="portlet" id="p-personal">
            <h5>个人工具</h5>
            <div class="pBody">
                <ul>
                    <li id="pt-login"><a href="/index.php?title=Special:Userlogin&amp;returnto=PDF%E9%98%85%E8%AF%BB%E5%99%A8">登录/注册</a></li>
                </ul>
            </div>
        </div>
        <script type="text/javascript"> if (window.isMSIE55) fixalpha(); </script>
            <div class='portlet' id='p-.E7.9F.A5.E8.AF.86.E5.BA.93'>
            <div class='pBody'>
            </div>
        </div>
            <div id="p-search" class="portlet">
            <h5><label for="searchInput">搜索</label></h5>
            <div id="searchBody" class="pBody">
                <form action="/index.php/Special:Search" id="searchform"><div>
                    <input id="searchInput" name="search" type="text" accesskey="f" value="" />
                    <input type='submit' name="go" class="searchButton" id="searchGoButton"    value="进入" />&nbsp;
                    <input type='submit' name="fulltext" class="searchButton" id="mw-searchButton" value="搜索" />
                </div></form>
            </div>
        </div>
        <div class="portlet" id="p-tb">
            <h5>工具箱</h5>
            <div class="pBody">
            </div>
        </div>
            </div><!-- end of the left (by default at least) column -->
                <div class="visualClear"></div>
                <div id="footer">
                    <div id="f-poweredbyico"><a href="http://www.mediawiki.org/"><img src="/skins/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" /></a></div>
                    <div id="f-copyrightico"><a href="http://www.gnu.org/copyleft/fdl.html"><img src="/skins/common/images/gnu-fdl.png" alt='GNU Free Documentation License 1.2' /></a></div>
            </div>
            <script type="text/javascript">if (window.runOnloadHook) runOnloadHook();</script>
    </div>
    <!-- Served by localhost in 0.230 secs. --></body></html>
    html;preg_match('/(<h1 class="firstHeading">.*)<\!-- Saved in parser/isU', $html, $match);
    echo $match[1];
      

  2.   


    preg_match('/<h1 class="firstHeading".+<div id="divPdf0".+?<\/div>/is',$str,$arr);
    echo $arr[0];
      

  3.   

    这种是一种类型,还有其他类似的这种页面,能不能以<h1>开始,到<!-- saved 之前结束呢?
    //需要的是这部分的信息-------------------------------------
    <h1 class="firstHeading">PDF阅读器</h1>
            <div id="bodyContent">
                <h3 id="siteSub">取自 AEED</h3>
                <div id="contentSub"></div>
                                        <div id="jump-to-nav">跳转到: <a href="#column-one">导航</a>, <a href="#searchInput">搜索</a></div>            <!-- start content -->
                <div id="divPdf0" title="../2003Z/AdbeRdr812_zh_CN.zip" class="downpdf" style="cursor:hand;">PDF阅读器 Adobe Reader 8.1.2 简体中文版</div>
    //---------------------------------------------------------<!-- Saved in parser cache with key wikidb:pcache:idhash:9031-0!1!0!!zh-sg!2 and timestamp 20120327075108 -->
    <div class="printfooter">
      

  4.   

    <!-- Saved in parser cache with key wikidb:pcache:idhash:9031-0!1!0!!zh-sg!2 and timestamp 20120327075108 -->
    <div class="printfooter">这2句在这类页面里都是唯一的,到<div class="printfooter">结束可以这样写吗?:   preg_match_all('/<h1.*<\div class="printfooter">/iUs',,$contents,$match)
      

  5.   


    preg_match('/(<h1 class="firstHeading">.*)<div class="printfooter">/isU', $html, $match);
      

  6.   

    可以。preg_match_all('/<h1.*<div class="printfooter">/iUs',,$contents,$match)