JAVA 网页爬虫小程序

package mytest;import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;public class RetrivePage {
private static HttpClient httpClient = new HttpClient();
// 设置代理服务器
static{
//设置代理服务器的IP地址和端口
// httpClient.getHostConfiguration().setProxy("127.0.", 8080);
}
public static boolean downloadPage(String path) throws HttpException,
IOException {
InputStream input = null;
OutputStream output = null;
// 得到post方法
PostMethod postMethod = new PostMethod(path);
//设置post方法的参数
NameValuePair[] postData = new NameValuePair[2];
postData[0] = new NameValuePair("name","lietu");
postData[1] = new NameValuePair("password","******");
postMethod.addParameters(postData);
//执行，反悔状态码
int statusCode = httpClient.executeMethod(postMethod);
//针对状态码进行处理（简单起见，只处理返回值为200的状态码）
if(statusCode == HttpStatus.SC_OK){
input = postMethod.getResponseBodyAsStream();
//得到文件名
int ind = path.lastIndexOf('/')+1;
String filename = path.substring(0,path.lastIndexOf('/')+1);
//获得文件输出流
output = new FileOutputStream(filename);
//输出到文件
int tempByte = -1;
while((tempByte = input.read()) > 0){
output.write(tempByte);
}
//关闭输入输出流
if(input != null){
input.close();
}
if(output != null){
output.close();
}
return true;
}
return false;
} /**
* 测试
* @param args
*/
public static void main(String[] args) {
// 抓去 lietu首页，输出
try{
RetrivePage.downloadPage("http://bbs.it168.com/");
}catch (HttpException e) {
// TODO: handle exception
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}}大侠们你们好我想自己动手做一个类似网页爬虫的小程序，在百度上搜了一个文档
参考他们的代码，但是却行不通。
问题出在获得文件输出流的那句代码上，请问该怎么解决这个问题啊？

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

String filename = path.substring(0,path.lastIndexOf('/')+1);如果楼主要保存到本地，好歹也写一个本地路径，这样写，地址还是http://bbs.it168.com/。怎么存到本地呢？
另外这样也是把网址上面网页代码复制到本地，没有保存什么文件，不知道楼主是不是要达到这个目的。
String filename = path.substring(0,path.lastIndexOf('/')+1);把这句改了，改成一个本地的路径，比如String filename = "d://getsit/info.txt";省得麻烦，就先在本地建一个这样的info.txt文件吧。然后，看了下，http://bbs.it168.com/ 这个地址，好像也不需要用户名，密码。把下面这几行去掉吧。//设置post方法的参数
NameValuePair[] postData = new NameValuePair[2];
postData[0] = new NameValuePair("name","lietu");
postData[1] = new NameValuePair("password","******");
postMethod.addParameters(postData);
这也算爬虫么？就等于在网页上右键，另存为，要用htmlparser