OK,我是这样玩的,大家看下还有没其他方法 public String getLocalHtmlStream(String path) throws Exception { File f = new File(path); if (!f.exists() && !f.getAbsolutePath().endsWith("html")) { System.out.println("error"); return ""; } // InputStream in = new FileInputStream(f); StringBuffer b = new StringBuffer(); int c; while ((c = in.read()) != -1) { b.append((char) c); } return new String(b.toString().getBytes("ISO-8859-1"),"gbk"); }
有第三方jar包可以 抓取HTML 标签内容
URL url = new URL(s); DataInputStream datainputstream = new DataInputStream(url.openConnection().getInputStream()); ByteArrayOutputStream bytearrayoutputstream = new ByteArrayOutputStream(); try{ byte abyte0[] = new byte[1024]; boolean flag = false; do{ int i = datainputstream.read(abyte0); if(i == -1) break; bytearrayoutputstream.write(abyte0, 0, i); } while(true); bytearrayoutputstream.flush(); byte abyte1[] = bytearrayoutputstream.toByteArray(); s1 = new String(abyte1, 0, abyte1.length, "GB2312");
File f = new File(path);
if (!f.exists() && !f.getAbsolutePath().endsWith("html")) {
System.out.println("error");
return "";
} //
InputStream in = new FileInputStream(f);
StringBuffer b = new StringBuffer();
int c;
while ((c = in.read()) != -1) {
b.append((char) c);
}
return new String(b.toString().getBytes("ISO-8859-1"),"gbk");
}
DataInputStream datainputstream = new DataInputStream(url.openConnection().getInputStream());
ByteArrayOutputStream bytearrayoutputstream = new ByteArrayOutputStream();
try{
byte abyte0[] = new byte[1024];
boolean flag = false;
do{
int i = datainputstream.read(abyte0);
if(i == -1)
break;
bytearrayoutputstream.write(abyte0, 0, i);
} while(true);
bytearrayoutputstream.flush();
byte abyte1[] = bytearrayoutputstream.toByteArray();
s1 = new String(abyte1, 0, abyte1.length, "GB2312");