private void getData(HttpURLConnection conn) { try { BufferedReader in = new BufferedReader( new InputStreamReader(conn.getInputStream(), "UTF-8")); FileWriter out = new FileWriter( //output file new File("E:/data.html")); int len = 0; String line = "";
public static void main(String[] args) { // which html you want to get String url = "http://hi.baidu.com/sun_giser/blog/item/f74a5f31682e3d91a9018ee3.html"; new DataCollection(url); } }这个是获取页面信息的方法,还需要解析具体内容.顶一下。
import java.net.*;
import java.io.*;public class DataCollection {
public DataCollection(String url) {
getData(getConnection(url));
} private HttpURLConnection getConnection(String spec) {
try {
URL url = new URL(spec);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
if(!"OK".equals(conn.getResponseMessage())) {
throw new RuntimeException("Connection failed!");
}
return conn;
}
catch(MalformedURLException e) {
e.printStackTrace();
}
catch(IOException e) {
e.printStackTrace();
}
return null;
}
private void getData(HttpURLConnection conn) {
try {
BufferedReader in = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "UTF-8"));
FileWriter out = new FileWriter(
//output file
new File("E:/data.html"));
int len = 0;
String line = "";
while((line = in.readLine()) != null) {
out.write(line);
}
out.flush();
out.close();
}
catch(Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
// which html you want to get
String url = "http://hi.baidu.com/sun_giser/blog/item/f74a5f31682e3d91a9018ee3.html";
new DataCollection(url);
}
}这个是获取页面信息的方法,还需要解析具体内容.顶一下。
java要用特殊的容器控件,获取doucument解析,而这个解析式必须得