[跪求]如何抓取检索结果页面源代码里的内容？

现在要将一批数据依次输入到web上某个搜索网页（非google或baidu等搜索引擎）中进行检索，然后获取检索结果页面源代码中某两个字段间的内容。
比如说我在A页面进行输入检索，然后提交后自动跳转到B页面，要抓取B页面源代码中某两个字段间的内容。
具体该如何实现？哪位大虾能给出类似的源代码，跪谢！

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

用htmlParse技太可以获取网页中的结果
是用htmlParse技术可以获取网页中的结果,上面打错一个字了
//以前做得一个例子，做个参考，要先把你要查看的页面源文件下载到本地，然后根据页面结构解析标签的内容就可以了。
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.util.NodeList;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.PrototypicalNodeFactory;
import org.htmlparser.tags.Font;
import org.htmlparser.tags.P;
import org.htmlparser.tags.B;
import org.htmlparser.tags.H3;
import java.io.OutputStreamWriter;
import java.io.FileOutputStream;public class test4 {
  private static OutputStreamWriter writer = null;
  /**
   * 读取文件的方式来分析内容. filePath也可以是一个Url.
   *
   * @param resource
   *            文件/Url
   */
  public static void test5(String resource) throws Exception {
    String aFile = resource;
    String content = readTextFile(aFile, "gb2312");
    //System.out.println(content);
    Parser myParser = Parser.createParser(content, "gb2312");
    PrototypicalNodeFactory factory = new PrototypicalNodeFactory();
    factory.registerTag(new Font ());
    factory.registerTag(new P());
    factory.registerTag(new H3());
    factory.registerTag(new B());
    myParser.setNodeFactory(factory);    //预处理，去处广告
    String filterStr = "table";
    NodeFilter filter = new TagNameFilter(filterStr);
    NodeList nodeList = myParser.extractAllNodesThatMatch(filter);
//    System.out.println(nodeList.size());
    Node divNode = nodeList.elementAt(12);
//    System.out.println(divNode.toHtml());
    writeToFile(divNode.toHtml());    StringBuffer htmlContents = new StringBuffer();
    for(int i=0;i<nodeList.size();i++){
      if(nodeList.elementAt(i).toHtml().indexOf("layout=displayIssue&publication_id=1770000777")>=0){
        System.out.println(i);
        if(nodeList.elementAt(i).toHtml().length() < 20000) {
          htmlContents.append(nodeList.elementAt(i).toHtml()).append("\r\n");
        }
      }
    }
  }
  public static String readTextFile(String sFileName, String sEncode){
    StringBuffer sbStr = new StringBuffer();
    try{
      File ff = new File(sFileName);
      InputStreamReader read = new InputStreamReader(new FileInputStream(ff),sEncode);
      BufferedReader ins = new BufferedReader(read);
      String dataLine = "";
      while (null != (dataLine = ins.readLine())){
        sbStr.append(dataLine);
        sbStr.append("\r\n");
      }
      ins.close();
    }catch (Exception e){
      System.out.println("read Text File Error"+e.toString());
    }
    return sbStr.toString();
  }    public static void writeToFile(String content) {
    try {
      writer = new OutputStreamWriter(new FileOutputStream("D:/华迪计算机有限公司/解析html页面测试/test.htm"));
      writer.write(content);
      writer.close();
    }
    catch (Exception e) {
      e.printStackTrace();
    }
  }  public static void main(String[] args) throws Exception {
    test5("Angolareport.html");
  }
}