想抓取一个网站上的页面内容,但是那个网页需要登录才能打开.我有用户名和密码,用下面的方式可以登录成功,代码如下:URLConnection connection = new URL("http://localhost/login.jsp?user=test&pswd=123").openConnection();BufferedReader reader = new BufferedReader(new java.io.InputStreamReader(connection.getInputStream()));
String line = "";
while ((line = reader.readLine()) != null){
System.out.println(line);
}
登录成功之后,用户的session被保留在服务器端.===================================================但是,再用这种方式去取需要权限的网页时,得到的还是未登录信息.代码如下:
connection = new URL("http://localhost/user.jsp").openConnection();reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line = "";
while ((line = reader.readLine()) != null){
System.out.println(line);
}
读取失败,也就是说服务器那端没有用户session.
我猜可能是URLConnection对象被重新初始化了,所以才这样.那怎么才能将上面登录之后的session保留下来继续使用呢.
或者有别的什么办法可以突破验证抓取网页.
String line = "";
while ((line = reader.readLine()) != null){
System.out.println(line);
}
登录成功之后,用户的session被保留在服务器端.===================================================但是,再用这种方式去取需要权限的网页时,得到的还是未登录信息.代码如下:
connection = new URL("http://localhost/user.jsp").openConnection();reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line = "";
while ((line = reader.readLine()) != null){
System.out.println(line);
}
读取失败,也就是说服务器那端没有用户session.
我猜可能是URLConnection对象被重新初始化了,所以才这样.那怎么才能将上面登录之后的session保留下来继续使用呢.
或者有别的什么办法可以突破验证抓取网页.
这个看看
import java.io.*;
import java.net.URL;
import java.net.HttpURLConnection;
import java.net.URLEncoder;
import java.io.BufferedReader;
import java.io.InputStreamReader;public class GetCookie {
private String url =
"http://www.aaaa.net/USER/user_login.asp?logid=" + URLEncoder.encode("nihao321") + "&pswd=" + URLEncoder.encode("nihao321");
private String url1 = "http://www.aaaa.net/user/per_data.asp";
public GetCookie() {
//get();
}; public String get() {
String sCurrentLine;
StringBuffer sTotalString;
sCurrentLine = "";
sTotalString = new StringBuffer("");
String Cookie = "";
try {
System.out.println(url);
java.io.InputStream l_urlStream;
java.io.BufferedReader l_reader = null;
java.net.HttpURLConnection l_connection;
java.net.URL l_url = new java.net.URL(this.url1);
l_connection = (java.net.HttpURLConnection)
l_url.openConnection();
l_connection.connect();
Cookie = l_connection.getHeaderField(5);
java.net.HttpURLConnection l_connection_1;
java.net.URL l_url_1 = new java.net.URL(this.url);
l_connection_1 = (java.net.HttpURLConnection)
l_url_1.openConnection();
l_connection_1.addRequestProperty("Cookie", Cookie);
l_connection_1.connect();
l_urlStream = l_connection_1.getInputStream();
l_reader = new java.io.BufferedReader(new java.io.
InputStreamReader(l_urlStream));
while ( (sCurrentLine = l_reader.readLine()) != null) {
sTotalString = sTotalString.append(new StringBuffer(sCurrentLine));
}
//System.out.print(sTotalString);
/*System.out.println(l_connection_1.getHeaderField(0));
System.out.println(l_connection_1.getHeaderField(1));
System.out.println(l_connection_1.getHeaderField(2));
System.out.println(l_connection_1.getHeaderField(3));
System.out.println(l_connection_1.getHeaderField(4));
System.out.println(l_connection_1.getHeaderField(5));
System.out.println(l_connection_1.getHeaderField(6));
System.out.println(l_connection_1.getHeaderField(7));
System.out.println(l_connection_1.getHeaderField(8));
System.out.println(l_connection_1.getHeaderField(9));
System.out.println(l_connection_1.getHeaderField(10));*/
l_connection_1.disconnect();
sCurrentLine = null;
l_urlStream = null;
l_reader = null;
l_connection = null; }
catch (Exception ex) {
ex.printStackTrace();
}
return Cookie;
}}
import java.net.URL;
import java.net.HttpURLConnection;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URLDecoder;public class GetWeb {
private String url = "";
private String Cookie = "";
private String parameter = "";
public GetWeb(String url) {
this.url = url;
};
public GetWeb(String url, String Cookie) {
this.url = url;
this.Cookie = Cookie;
}; public GetWeb(String url, String Cookie, String parameter) {
this.url = url;
this.Cookie = Cookie;
this.parameter = parameter;
}; public StringBuffer Getpage() {
String sCurrentLine;
StringBuffer sTotalString;
sCurrentLine = "";
sTotalString = new StringBuffer("");
try {
java.io.InputStream l_urlStream;
java.io.BufferedReader l_reader = null;
java.net.HttpURLConnection l_connection;
java.net.URL l_url = new java.net.URL(this.url);
l_connection = (java.net.HttpURLConnection)
l_url.openConnection();
l_connection.addRequestProperty("Cookie", this.Cookie);
l_connection.connect();
l_urlStream = l_connection.getInputStream();
l_reader = new java.io.BufferedReader(new java.io.
InputStreamReader(l_urlStream));
while ( (sCurrentLine = l_reader.readLine()) != null) {
sTotalString = sTotalString.append(new StringBuffer(sCurrentLine));
}
l_connection.disconnect();
sCurrentLine = null;
l_urlStream = null;
l_reader = null;
l_connection = null;
}
catch (Exception ex) {
//ex.printStackTrace();
}
//System.out.println(sTotalString);
return sTotalString;
} public StringBuffer Postpage() {
String sCurrentLine;
StringBuffer sTotalString;
sCurrentLine = "";
sTotalString = new StringBuffer("");
//System.out.println(URLDecoder.decode(this.parameter));
try {
java.net.HttpURLConnection l_connection;
java.net.URL l_url = new java.net.URL(this.url);
l_connection = (java.net.HttpURLConnection)l_url.openConnection();
l_connection.setRequestMethod("POST");
l_connection.setDoOutput(true);
PrintWriter out = new PrintWriter(l_connection.getOutputStream());
out.write(this.parameter);
out.flush();
out.close();
BufferedReader in
= new BufferedReader(new InputStreamReader(l_connection.getInputStream()));
while ( (sCurrentLine = in.readLine()) != null) {
sTotalString = sTotalString.append(new StringBuffer(sCurrentLine));
}
l_connection.disconnect();
sCurrentLine = null;
l_connection = null;
}
catch (Exception ex) {
//ex.printStackTrace();
}
//System.out.println(sTotalString);
return sTotalString;
}}很久以前写的程序片段
可以借鉴一下
主要是第一次请求抓Cookie
第二次请求把Cookie放上去就OK了
我用 Cookie = l_connection.getHeaderField(7); 才行,
用Cookie = l_connection.getHeaderField(5);就不行,这是怎么回事呢.