本帖最后由 lingfu516 于 2009-09-26 23:23:14 编辑

解决方案 »

  1.   

    楼主可以不用正则表达式来实现此功能,不多说去网上查看一下HTMLParser的一些例子,相必对你能有帮助。专门进行解析网页用得 
      

  2.   


    package close.regex.test;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    public class RegexTest {
    public static void main(String[] args){

    String str="<li><h4><a  target=\"_blank\" href=\"http://item.taobao.com/auction/item_detail-0db2-da5f7fbc424a36acdf75ab6f8dbfa997.htm\">" +
    "新款韩式书柜电脑桌组合/书柜/电脑桌/组合/写字台/办公桌</a></h4>" +
    "<div class=\"item\">" +
    "<div class=\"pic\">" +
    "<a href=\"http://item.taobao.com/auction/item_detail-0db2-da5f7fbc424a36acdf75ab6f8dbfa997.htm\" target=\"_blank\">" +
    "<img src=\"http://img08.taobaocdn.com/bao/uploaded/i8/T1mw4iXkeejtPYWVja_120003.jpg_160x160.jpg\" />" +
    "</a>" +
    "</div>" +
    "<div class=\"desc\">" +
    "<a  target=\"_blank\" href=\"http://item.taobao.com/auction/item_detail-0db2-da5f7fbc424a36acdf75ab6f8dbfa997.htm\" class=\"permalink\">" +
    "新款韩式书柜电脑桌组合/书柜/电脑桌/组合/写字台/办公桌" +
    "</a>" +
    "</div>" +
    "<div class=\"price\">" +
    "<span>一口价</span>" +
    "<strong>1880.00 元</strong>" +
    "</div>" +
    "<div class=\"remain-date\">剩余 6天</div>" +
    "</div>" +
    "</li>";
    Pattern pattern=Pattern.compile("(<a href).*></a>",Pattern.DOTALL);
    Matcher matcher=pattern.matcher(str);
    if(matcher.find()){
    System.out.println(matcher.group());
    }
    }}
      

  3.   


    import java.util.regex.Matcher;
    import java.util.regex.Pattern;public class Regtest {
    public static void main(String[] args)
    {
    String a = "<div class='item'>\n<a href='http://item.taobao.com/' target='_blank'>\r\n<img src='http'/>\r\n</a>\r\nccc";
    System.out.println(a);
    Pattern p = Pattern.compile("<a.*[\r|\n]*<img.*[\r|\n]*</a>");
    Matcher m = p.matcher(a);
    while(m.find())
    {
    String abc = m.group();
    System.out.println(abc);//System.out.println(abc);
    }
    }
    }简单的测试了一下...是可以过滤出来的...你自己再试一下吧...我用的是JAVA...要想JS的你就自己改一下吧.
      

  4.   


    function test(){
    var aTag = document.getElementsByTagName("a");
    var va ;
    for(var i = 0 ;i < aTag.length; i ++){
    va = aTag[i].parentNode.innerHTML;
    if(va.indexOf("<IMG") != -1)
    alert(va);
    }
    }
    window.onload=test
      

  5.   

    js 取
    var divObj= document.getElementsByTagName('div');
    alert(divObj[ 1 ].innerHTML);
      

  6.   


    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
    <HTML>
     <HEAD>
      <TITLE> New Document </TITLE>
      <META NAME="Generator" CONTENT="EditPlus">
      <META NAME="Author" CONTENT="">
      <META NAME="Keywords" CONTENT="">
      <META NAME="Description" CONTENT="">
     </HEAD> <BODY>
    <li>
    <h4>
    <a  target="_blank" href="http://item.taobao.com/auction/item_detail-0db2-da5f7fbc424a36acdf75ab6f8dbfa997.htm">
                                    新款韩式书柜电脑桌组合/书柜/电脑桌/组合/写字台/办公桌
    </a>
    </h4>
    <div class="item">
    <div class="pic">
    <a href="http://item.taobao.com/auction/item_detail-0db2-da5f7fbc424a36acdf75ab6f8dbfa997.htm" target="_blank">
        <img src="http://img08.taobaocdn.com/bao/uploaded/i8/T1mw4iXkeejtPYWVja_120003.jpg_160x160.jpg" />
    </a>
    </div>            
    <div class="desc">
    <a  target="_blank" href="http://item.taobao.com/auction/item_detail-0db2-da5f7fbc424a36acdf75ab6f8dbfa997.htm" class="permalink">
                                        新款韩式书柜电脑桌组合/书柜/电脑桌/组合/写字台/办公桌
    </a>
    </div>
    <div class="price">
        <span>一口价</span>
        <strong>1880.00 元</strong>
    </div>
    <div class="remain-date">剩余 6天</div>
    </div>
    </li>
    <div id="a"></div>
     </BODY>
    </HTML>
    <SCRIPT LANGUAGE="JavaScript">
    <!--
    document.ondblclick = function () {
    var regex = /(?:<[aA]\s+.*>)\s*(?:<(?:IMG|img)\s+.*>)\s*(?:<\/[aA]>)/ig;
    var m = regex.exec(document.body.outerHTML);
    document.all("a").innerHTML = m[0];
    }
    //-->
    </SCRIPT>