package URLReader;import java.net.*;
import java.io.*;public class URLReader { //声明抛出所有例外 /**
* @param args
*/
/**
* 读取一个网页全部内容
*/
public void caption(String urlstr, String encodingstr)
{
URL url;
try {
url = new URL(urlstr);
/*********************************************/
BufferedReader reader = new BufferedReader(new InputStreamReader(
url.openStream(),encodingstr));
StringBuffer content = new StringBuffer();
String readerLine;
while ((readerLine = reader.readLine()) != null) {
content.append(readerLine + "\n");
}
/**********************获取head节点中的内容********************/
int headStartIndex = content.indexOf("<head>");
int headEndIndex = content.indexOf("</head>");
if (headStartIndex != 1 && headEndIndex != -1) {
System.out.println(content.substring(headStartIndex,
headEndIndex + 7));
} else {
System.out.println("没有head节点");
} /*********************获取body节点中的内容*********************/
int bodyStartIndex = content.indexOf("<body>");
int bodyEndIndex = content.indexOf("</body>");
if (bodyStartIndex != 1 && bodyEndIndex != -1) {
System.out.println(content.substring(bodyStartIndex,
bodyEndIndex + 7));
} else {
System.out.println("没有body节点");
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String args[])
{
URLReader ur= new URLReader();
String encodingstr = null;
ur.caption("http://www.sina.com","gb2312");
// ur.caption("hdingstr);
}
}
java.lang.StringIndexOutOfBoundsException: String index out of range: -1
at java.lang.AbstractStringBuilder.substring(AbstractStringBuilder.java:880)
at java.lang.StringBuffer.substring(StringBuffer.java:412)
at URLReader.URLReader.caption(URLReader.java:60)
at URLReader.URLReader.main(URLReader.java:75)
报错了请问怎样解决?
import java.io.*;public class URLReader { //声明抛出所有例外 /**
* @param args
*/
/**
* 读取一个网页全部内容
*/
public void caption(String urlstr, String encodingstr)
{
URL url;
try {
url = new URL(urlstr);
/*********************************************/
BufferedReader reader = new BufferedReader(new InputStreamReader(
url.openStream(),encodingstr));
StringBuffer content = new StringBuffer();
String readerLine;
while ((readerLine = reader.readLine()) != null) {
content.append(readerLine + "\n");
}
/**********************获取head节点中的内容********************/
int headStartIndex = content.indexOf("<head>");
int headEndIndex = content.indexOf("</head>");
if (headStartIndex != 1 && headEndIndex != -1) {
System.out.println(content.substring(headStartIndex,
headEndIndex + 7));
} else {
System.out.println("没有head节点");
} /*********************获取body节点中的内容*********************/
int bodyStartIndex = content.indexOf("<body>");
int bodyEndIndex = content.indexOf("</body>");
if (bodyStartIndex != 1 && bodyEndIndex != -1) {
System.out.println(content.substring(bodyStartIndex,
bodyEndIndex + 7));
} else {
System.out.println("没有body节点");
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String args[])
{
URLReader ur= new URLReader();
String encodingstr = null;
ur.caption("http://www.sina.com","gb2312");
// ur.caption("hdingstr);
}
}
java.lang.StringIndexOutOfBoundsException: String index out of range: -1
at java.lang.AbstractStringBuilder.substring(AbstractStringBuilder.java:880)
at java.lang.StringBuffer.substring(StringBuffer.java:412)
at URLReader.URLReader.caption(URLReader.java:60)
at URLReader.URLReader.main(URLReader.java:75)
报错了请问怎样解决?
headEndIndex + 7));
System.out.println(content.substring(bodyStartIndex,
bodyEndIndex + 7));
这二个值headStartIndex,bodyStartIndex
这也太麻烦了吧
HttpURLConnection不就可以吗?
这句话,你把<head>的反向尖括号去掉,改为:content.indexOf("<head");
因为很多网页<head id="...">在head标签加属性了,这样你<head>就获取不到了