本人一段代码:
import java.io.*;
import java.net.*;public class Test5{
static String get_from_internet(String href) throws Exception{
URL url=new URL(href);
URLConnection url_con = url.openConnection();
return get_content(url_con.getInputStream());
}
static String get_from_file(String path) throws Exception{
return get_content(new FileInputStream(path));
}
static void write_to_file(String path,String content)throws Exception{
FileOutputStream output=new FileOutputStream(path);
output.write(content.getBytes());
output.close();
} public static void main(String[] args) throws Exception{
String str = get_from_internet("http://news.sina.com.cn/china/");
String start_tag = "\t<table width=320 cellspacing=0 style='margin:7px 0 7px 0'>\r\n\t<tr><td class='linkBlue f14 lh22'>";
int start = str.indexOf(start_tag) + start_tag.length();
int end = str.indexOf("</td>", start);
str = str.substring(start, end);
start = 0;
String tag = "·<a href=";
int file_count = 0;
while((start=str.indexOf(tag, start))!=-1){
start += tag.length();
String href = str.substring(start, str.indexOf("target=_blank>", start));
start = str.indexOf(">", start) + 1;
String title = str.substring(start, str.indexOf("</a>", start)); String content = get_from_internet(href); String t_t = "<font id=\"zoom\" class=f14>";
int t_s = content.indexOf(t_t) + t_t.length();
content = content.substring(t_s, content.indexOf("<br clear=all>\r\n\t</td></tr>", t_s));
t_t = "</table>";
t_s = content.indexOf("<!--NEWSZW_HZH_BEGIN-->");
int t_e = content.lastIndexOf(t_t) + t_t.length();
content = content.substring(0, t_s) + content.substring(t_e); content = "<html><head><title>" + title + "</title></head><body>\r\n<h1>" + title + "</h1>\r\n" + content + "</body></html>";
file_count++;
write_to_file(file_count + ".htm", content);
}
}
static String get_content(InputStream input) throws Exception{
int i = 0; byte[] b = new byte[1024];
ByteArrayOutputStream bout = new ByteArrayOutputStream();
while((i=input.read(b))!=-1) bout.write(b, 0, i);
input.close();
return bout.toString();
}
}
是截取sina国内数据的代码
我还多地方不明白
麻烦把我讲解一下~~~~~~~~~~
import java.io.*;
import java.net.*;public class Test5{
static String get_from_internet(String href) throws Exception{
URL url=new URL(href);
URLConnection url_con = url.openConnection();
return get_content(url_con.getInputStream());
}
static String get_from_file(String path) throws Exception{
return get_content(new FileInputStream(path));
}
static void write_to_file(String path,String content)throws Exception{
FileOutputStream output=new FileOutputStream(path);
output.write(content.getBytes());
output.close();
} public static void main(String[] args) throws Exception{
String str = get_from_internet("http://news.sina.com.cn/china/");
String start_tag = "\t<table width=320 cellspacing=0 style='margin:7px 0 7px 0'>\r\n\t<tr><td class='linkBlue f14 lh22'>";
int start = str.indexOf(start_tag) + start_tag.length();
int end = str.indexOf("</td>", start);
str = str.substring(start, end);
start = 0;
String tag = "·<a href=";
int file_count = 0;
while((start=str.indexOf(tag, start))!=-1){
start += tag.length();
String href = str.substring(start, str.indexOf("target=_blank>", start));
start = str.indexOf(">", start) + 1;
String title = str.substring(start, str.indexOf("</a>", start)); String content = get_from_internet(href); String t_t = "<font id=\"zoom\" class=f14>";
int t_s = content.indexOf(t_t) + t_t.length();
content = content.substring(t_s, content.indexOf("<br clear=all>\r\n\t</td></tr>", t_s));
t_t = "</table>";
t_s = content.indexOf("<!--NEWSZW_HZH_BEGIN-->");
int t_e = content.lastIndexOf(t_t) + t_t.length();
content = content.substring(0, t_s) + content.substring(t_e); content = "<html><head><title>" + title + "</title></head><body>\r\n<h1>" + title + "</h1>\r\n" + content + "</body></html>";
file_count++;
write_to_file(file_count + ".htm", content);
}
}
static String get_content(InputStream input) throws Exception{
int i = 0; byte[] b = new byte[1024];
ByteArrayOutputStream bout = new ByteArrayOutputStream();
while((i=input.read(b))!=-1) bout.write(b, 0, i);
input.close();
return bout.toString();
}
}
是截取sina国内数据的代码
我还多地方不明白
麻烦把我讲解一下~~~~~~~~~~
import java.net.*;public class Test5{
/**
* 取得URL连接
* @param href
* @return
* @throws Exception
*/
static String get_from_internet(String href) throws Exception{
URL url=new URL(href);
URLConnection url_con = url.openConnection();
return get_content(url_con.getInputStream());
}
/**
* 返回获得信息
* @param path
* @return
* @throws Exception
*/
static String get_from_file(String path) throws Exception{
return get_content(new FileInputStream(path));
}
/**
* 把信息写入某文件
* @param path
* @param content
* @throws Exception
*/
static void write_to_file(String path,String content)throws Exception{
FileOutputStream output=new FileOutputStream(path);
output.write(content.getBytes());
output.close();
} public static void main(String[] args) throws Exception{
String str = get_from_internet("http://news.sina.com.cn/china/");
String start_tag = "\t<table width=320 cellspacing=0 style='margin:7px 0 7px 0'>\r\n\t<tr><td class='linkBlue f14 lh22'>";
int start = str.indexOf(start_tag) + start_tag.length();//1
int end = str.indexOf("</td>", start);//2
str = str.substring(start, end);//3,取得子串,获得有用信息
start = 0;//1
String tag = "·<a href=";//2
int file_count = 0;//3重置参数
//
while((start=str.indexOf(tag, start))!=-1){
start += tag.length();
String href = str.substring(start, str.indexOf("target=_blank>", start));
start = str.indexOf(">", start) + 1;
String title = str.substring(start, str.indexOf("</a>", start)); String content = get_from_internet(href); String t_t = "<font id=\"zoom\" class=f14>";
int t_s = content.indexOf(t_t) + t_t.length();
content = content.substring(t_s, content.indexOf("<br clear=all>\r\n\t</td></tr>", t_s));
t_t = "</table>";
t_s = content.indexOf("<!--NEWSZW_HZH_BEGIN-->");
int t_e = content.lastIndexOf(t_t) + t_t.length();
content = content.substring(0, t_s) + content.substring(t_e); content = "<html><head><title>" + title + "</title></head><body>\r\n<h1>" + title + "</h1>\r\n" + content + "</body></html>";
file_count++;
write_to_file(file_count + ".htm", content);
}
} /**
* 把字节流转换成字符串,这个是公共的
* @param input
* @return
* @throws Exception
*/
static String get_content(InputStream input) throws Exception{
int i = 0; byte[] b = new byte[1024];
ByteArrayOutputStream bout = new ByteArrayOutputStream();
while((i=input.read(b))!=-1) bout.write(b, 0, i);
input.close();
return bout.toString();
}
}还有哪不明白,大家再来说