try{ s1 = "http://search.shopping.yahoo.com/search/all/__yltc=s:14489115,d:14489115,w:search?p=dvd&did="; url=new URL(s1); URLConnection conn = url.openConnection(); Reader in = new InputStreamReader(conn.getInputStream()); String content = getText(in); System.println(content); }catch (Exception e) {}} public static String getText(Reader rd) {
final StringBuffer buf = new StringBuffer(1000);
try { // Create an HTML document that appends all text to buf HTMLDocument doc = new HTMLDocument() { public HTMLEditorKit.ParserCallback getReader(int pos) { return new HTMLEditorKit.ParserCallback() {
public void handleStartTag(HTML.Tag tag,MutableAttributeSet attributes, int position) {
if (tag == HTML.Tag.A||tag==HTML.Tag.B){ }
else return;
inTag = true; }
public void handleEndTag(HTML.Tag tag, int position) { if (tag == HTML.Tag.A||tag==HTML.Tag.B){ inTag = false; }
import java.io.*;public class Connect {
public Connect() {
}
public void getDocument() throws Exception {
URL url = new URL("http://www.yahoo.com.cn");
URLConnection conn = url.openConnection();
InputStream in = conn.getInputStream();
int count = 0;
byte[] b = new byte[1024];
while ( (count = in.read(b)) != -1) {
System.out.println(new String(b,"GBK"));
}
}
public static void main(String[] args) throws Exception {
Connect connect1 = new Connect();
connect1.getDocument();
}}
用我的吧
应该没问题
import java.text.*;
import java.net.*;
import java.sql.*;
import java.util.*;
import java.lang.String;
import javax.swing.text.*;
import javax.swing.text.html.*;class Test{
private static boolean inTag= false;
public static void main( String args[] ) throws Exception
{
URL url;
StringTokenizer st;
String item = args[0];
try{
s1 = "http://search.shopping.yahoo.com/search/all/__yltc=s:14489115,d:14489115,w:search?p=dvd&did=";
url=new URL(s1);
URLConnection conn = url.openConnection();
Reader in = new InputStreamReader(conn.getInputStream());
String content = getText(in);
System.println(content);
}catch (Exception e)
{}} public static String getText(Reader rd) {
final StringBuffer buf = new StringBuffer(1000);
try {
// Create an HTML document that appends all text to buf
HTMLDocument doc = new HTMLDocument() {
public HTMLEditorKit.ParserCallback getReader(int pos) {
return new HTMLEditorKit.ParserCallback() {
public void handleStartTag(HTML.Tag tag,MutableAttributeSet attributes, int position) {
if (tag == HTML.Tag.A||tag==HTML.Tag.B){ }
else return;
inTag = true;
}
public void handleEndTag(HTML.Tag tag, int position) { if (tag == HTML.Tag.A||tag==HTML.Tag.B){
inTag = false;
}
}
public void handleText(char[] data, int pos) {
if (inTag){
buf.append(data);
buf.append('\n');
}
}
};
}
};
EditorKit kit = new HTMLEditorKit();
kit.read(rd, doc, 0);
} catch (MalformedURLException e) {
} catch (BadLocationException e) {
} catch (IOException e) {
}
return buf.toString();
}
import java.text.*;
import java.net.*;
import java.sql.*;
import java.util.*;
import java.lang.String;
import javax.swing.text.*;
import javax.swing.text.html.*;class Test { private static boolean inTag = false; public static void main(String args[]) throws Exception {
URL url;
StringTokenizer st; try {
String s1 = "http://search.shopping.yahoo.com/search/all/__yltc=s:14489115,d:14489115,w:search?p=dvd&did=";
url = new URL(s1);
URLConnection conn = url.openConnection();
InputStream in = conn.getInputStream();
int count = 0;
byte[] b = new byte[1024];
while((count = in.read(b)) != -1) {
System.out.println(new String(b, "GBK"));
}
}
catch (Exception e) { } } public static String getText(Reader rd) { final StringBuffer buf = new StringBuffer(1000); try {
// Create an HTML document that appends all text to buf
HTMLDocument doc = new HTMLDocument() {
public HTMLEditorKit.ParserCallback getReader(int pos) {
return new HTMLEditorKit.ParserCallback() { public void handleStartTag(HTML.Tag tag,
MutableAttributeSet attributes,
int position) { if (tag == HTML.Tag.A || tag == HTML.Tag.B) {} else
return; inTag = true;
} public void handleEndTag(HTML.Tag tag, int position) { if (tag == HTML.Tag.A || tag == HTML.Tag.B) {
inTag = false;
} } public void handleText(char[] data, int pos) { if (inTag) {
buf.append(data);
buf.append('\n'); }
}
};
}
}; EditorKit kit = new HTMLEditorKit();
kit.read(rd, doc, 0);
}
catch (MalformedURLException e) {
}
catch (BadLocationException e) {
}
catch (IOException e) {
} return buf.toString();
}
}得到文档可以
自己解析吧
import java.text.*;
import java.net.*;
import java.sql.*;
import java.util.*;
import java.lang.String;
import javax.swing.text.*;
import javax.swing.text.html.*;class Test { private static boolean inTag = false; public static void main(String args[]) throws Exception {
URL url;
StringTokenizer st; try {
String s1 = "http://search.shopping.yahoo.com/search/all/__yltc=s:14489115,d:14489115,w:search?p=dvd&did=";
url = new URL(s1);
URLConnection conn = url.openConnection();
InputStream in = conn.getInputStream();
int count = 0;
byte[] b = new byte[1024];
StringBuffer buf = new StringBuffer();
while((count = in.read(b)) != -1) {
buf.append(new String(b, "GBK"));
}
StringReader reader = new StringReader(buf.toString());
getText(reader);
} catch (Exception e) { } } public static String getText(Reader rd) { final StringBuffer buf = new StringBuffer(1000); try {
// Create an HTML document that appends all text to buf
HTMLDocument doc = new HTMLDocument() {
public HTMLEditorKit.ParserCallback getReader(int pos) {
return new HTMLEditorKit.ParserCallback() { public void handleStartTag(HTML.Tag tag,
MutableAttributeSet attributes,
int position) { if (tag == HTML.Tag.A || tag == HTML.Tag.B) {} else
return; inTag = true;
} public void handleEndTag(HTML.Tag tag, int position) { if (tag == HTML.Tag.A || tag == HTML.Tag.B) {
inTag = false;
} } public void handleText(char[] data, int pos) { if (inTag) {
buf.append(data);
buf.append('\n'); }
}
};
}
}; EditorKit kit = new HTMLEditorKit();
kit.read(rd, doc, 0);
}
catch (MalformedURLException e) {
}
catch (BadLocationException e) {
}
catch (IOException e) {
} return buf.toString();
}
}给你改改了
getText(Reader rd)可以调用
不过这个方法里面好像有问题
自己调调吧