import java.io.*;
import java.net.HttpURLConnection;
import jxl.*;
import java.net.*;import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;public class URLUtil {
static int x;
static int y;
static String ip=null;
static String input1=null;
static String input2=null;
public static String getHtml(String urlString){
try{
StringBuffer html= new StringBuffer();
URL url=new URL(urlString);
HttpURLConnection conn=(HttpURLConnection)url.openConnection();
InputStreamReader isr=new InputStreamReader(conn.getInputStream());
BufferedReader br = new BufferedReader(isr);
String temp;
while((temp = br.readLine())!=null){
html.append(temp).append("\n");
}
br.close();
isr.close();
return html.toString();
}catch (Exception e){
e.printStackTrace();
return null;
}

}


public static void main(String[] args){
 try{
        Workbook book=Workbook.getWorkbook(new File("产品分类.map.xls"));
        Sheet sheet=book.getSheet(0);
        for(int i=1;i<sheet.getRows();i++){
            Cell c=sheet.getCell(5,i);
System.out.println(URLUtil.getHtml(sheet.getCell(5,i).getContents()));
}book.close();
    }catch(Exception e){
        
        System.out.println(e);
    }
    
    
}
}    
    坐等高人请教!!!小弟不胜感激

解决方案 »

  1.   

    htmlparser 楼主去看看这个吧。对于捉取回来得内容。并不推荐使用indexOf.正规表达式是个不错得解决方案。
      

  2.   

    这两个都是解决方案...你可能去看下...然后尝试使用...遇到问题了。我才能帮你分析啊...htmlparser 这个是专门用来做html内容解析得...至于正规表达式  你google下就可以了...
      

  3.   

    htmlparser 里面有些对象,类似于过滤条件之类的功能,去找找资料吧
      

  4.   

    这个indexOf怎么写?写啊在哪里呢??小弟愚笨求教
      

  5.   

    link type="text/css" rel="stylesheet" href="/Public/stylesheets/mstyle.css" />
        <script type="text/javascript" src="/Public/scripts/jquery-1.4.4.min.js"></script>
        <link rel="stylesheet" href="/new/resources/jquery/themes/base/jquery.ui.all.css">
        <link rel="stylesheet" type="text/css" href="/new/resources/styles/newtop.css" />
        <!--[if IE 6]>
    <link rel="stylesheet" type="text/css" href="/new/resources/styles/ie6.css"/>
    <![endif]-->
        <script src="/new/resources/jquery/jquery.js" type="text/javascript"></script>
        <script src="/new/resources/scripts/header.js"></script>    <link type="text/css" rel="Stylesheet" href="/Public/stylesheets/south-street/jquery-ui.css" />
        <script type="text/javascript" src="/Public/scripts/jquery.form.js"></script>
        <script type="text/javascript" src="/Public/scripts/jquery-ui-1.8.7.custom.min.js"></script>
        <script type="text/javascript">
            //添加到收藏夹
            function AddFav(i, obj) {
                if (!IsLogin) {
                    alert('请登陆后使用收藏夹功能');
                    return false;
                };
                $.post('/Json/AddFavorites', { 'pid': i, 'fav': 1, 'r': Math.random() }, function (data) {
                    if (data != 0) {
                        alert('收藏成功');
                    }
                });
            }
            function AddInq(i, obj) {
    //            if (!IsLogin) {
    //                alert('请登陆后使用购物车功能');
    //                return false;
    //            };
                $.post('/Json/AddInquirys', { 'pid': i }, function (data) {
                    if (data == 0) {
                        alert('添加失败');
                    }
                    else {
                        $('#PD_Dialog').html('<span>成功添加到购物车,目前购物车共有<small>' + data + '</small>件商品。<a href="/orders/mycart">查看购物车</a>').dialog('open');
                    };
                });
            }
            $(function () {
                $('#PD_Dialog').dialog({
                    autoOpen: false,
                    width: 300,
                    height: 190,
                    resizable: false,
                    buttons: {
                        '确定': function () {
                            $(this).dialog('close');
                        }
                    }
                });            $('#PL_LT span>a[class]').click(
    function () {
        if ($(this).hasClass('plmax')) {
            this.className = 'plmin'
            this.parentNode.nextSibling.style.display = 'none';
        }
        else {
            this.className = 'plmax';
            this.parentNode.nextSibling.style.display = 'block';
        }
    }
    );
            });
        </script>
    </head>
    <body>
            <div class="container C_newtop">
        <div class="header">
            <input type="hidden" value="navShouye" id="navindex">
            <ul class="new_top_10"><li class="logo"><a href="/" title="顾登商城">
                        <img src="/new/resources/images/logo.gif" width="145" height="63" alt="顾登商城" /></a></li>
                <li class="top_free"></li>
                <li class="a800">
                    <img src="/new/resources/images/top400.gif">
                </li>
                <li class="user_fun_10">
                        <dl id="logindiv" class="userOn">
                            <dd>
                                <img src="/new/resources/images/dot_10.gif"><a href="/account/" class="fontred">登录</a></dd>
                            <dd>
                                <img src="/new/resources/images/dot_10.gif"><a href="/account/register" class="fontred">注册</a>
                            </dd>
                        </dl>
                </li>
            </ul>
            <div class="clear">
            </div>
            <div id="nav">
                <p class="car_txt">
                    <a href="/orders/mycart">我的购物车</a></p>
                <ul>
                    <li id="navShouye"><a href="/"></a></li>
                    <li id="navNanxie"><a href="/brand"></a></li>
                    <li id="navNvxie"><a href="/product/class.shtml"></a></li>
                    <li id="navTongxie"><a href="/news"></a></li>
                    <li id="navFuzhuang"><a href="/about"></a></li>
                    <li id="navBao"><a href="/About/Html/7.shtml"></a></li>
                </ul>这些是运行出来的,然后我想从这里面提取url链接并且保存下来,怎么写啊?求大哥们请教给我写一下谢谢啊
      

  6.   

    link type="text/css" rel="stylesheet" href="/Public/stylesheets/mstyle.css" />
        <script type="text/javascript" src="/Public/scripts/jquery-1.4.4.min.js"></script>
        <link rel="stylesheet" href="/new/resources/jquery/themes/base/jquery.ui.all.css">
        <link rel="stylesheet" type="text/css" href="/new/resources/styles/newtop.css" />
        <!--[if IE 6]>
    <link rel="stylesheet" type="text/css" href="/new/resources/styles/ie6.css"/>
    <![endif]-->
        <script src="/new/resources/jquery/jquery.js" type="text/javascript"></script>
        <script src="/new/resources/scripts/header.js"></script>    <link type="text/css" rel="Stylesheet" href="/Public/stylesheets/south-street/jquery-ui.css" />
        <script type="text/javascript" src="/Public/scripts/jquery.form.js"></script>
        <script type="text/javascript" src="/Public/scripts/jquery-ui-1.8.7.custom.min.js"></script>
        <script type="text/javascript">
            //添加到收藏夹
            function AddFav(i, obj) {
                if (!IsLogin) {
                    alert('请登陆后使用收藏夹功能');
                    return false;
                };
                $.post('/Json/AddFavorites', { 'pid': i, 'fav': 1, 'r': Math.random() }, function (data) {
                    if (data != 0) {
                        alert('收藏成功');
                    }
                });
            }
                 <ul>
                    <li id="navShouye"><a href="/"></a></li>
                    <li id="navNanxie"><a href="/brand"></a></li>
                    <li id="navNvxie"><a href="/product/class.shtml"></a></li>
                    <li id="navTongxie"><a href="/news"></a></li>
                    <li id="navFuzhuang"><a href="/about"></a></li>
                    <li id="navBao"><a href="/About/Html/7.shtml"></a></li>
                </ul>比如这些是运行结果,我想从结果里面提取url链接,并且保存下来。小弟不会写啊,求大哥们指点啊,小弟不胜感激,坐等大哥们
      

  7.   

    String url=request.getHeader("Referer"); 获取URL
      

  8.   


     Parser parser = new Parser("http://****");
     NodeFilter filter = new AndFilter(new TagNameFilter("a"),new HasParentFilter(new TagNameFilter("li"))) ; NodeList nodes = parser.extractAllNodesThatMatch(filter);
    int resultNum = nodes.size();
    if (resulNum > 0) {
    for (NodeIterator ni = nodes.elements(); ni.hasMoreNodes();) {
    System.out.println(ni.nextNode().toHtml());
    }
    }