java截取html代码 写一个java类,实现对一个网站的html代码进行截取,java+jsp 因为我对这方面不是很懂,还望各位路过的达人有这方面的经验的能给于解决,最好是有一个完整的类,我实在是汗颜啊... 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 HTTPSocket可以实现,流形式读取目标页HTML代码,这个没啥技术含量,网上一搜一大堆 URL,HttpClient都可以实现,也不难,网上很多~ 这是我写的..大家帮我一下import java.io.*; import java.net.*; public class ResultHtml { public static Object getHtml(String turl) throws IOException{ URL url = new URL(turl); InputStream in = url.openConnection().getInputStream(); BufferedReader reader=new BufferedReader(new InputStreamReader(in)); String line=reader.readLine(); StringBuffer a=new StringBuffer(); while(line!=null){ a.append(line); System.out.println(line); line = reader.readLine(); a.append("\n"); } return a.toString(); }}下面是jsp调用的:String a=null;a=(String)ResultHtml.getHtml("www.163.com");out.print(a);报错:java.net.MalformedURLException: no protocol: www.163.com java.net.URL.<init>(URL.java:567) java.net.URL.<init>(URL.java:464) java.net.URL.<init>(URL.java:413) com.foren.family.db.ResultHtml.getHtml(ResultHtml.java:8) org.apache.jsp.foren.master.testhtml_jsp._jspService(testhtml_jsp.java:67) org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:94) javax.servlet.http.HttpServlet.service(HttpServlet.java:802) org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:324) org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:292) org.apache.jasper.servlet.JspServlet.service(JspServlet.java:236) javax.servlet.http.HttpServlet.service(HttpServlet.java:802)看不懂..我太菜了! public static String readHtmlFile(String urlPath){ String htmlFile=""; try { URL url = new URL(urlPath); URLConnection urlConnection = url.openConnection(); urlConnection.setAllowUserInteraction(false); // InputStream urlStream = url.openStream(); InputStream urlStream = urlConnection.getInputStream();//.openStream(); InputStreamReader sr = new InputStreamReader(urlStream, "GBK"); int byteRead = 0; char[] buffer = new char[8192]; while ((byteRead = sr.read(buffer, 0, 8192)) != -1) { System.out.println(new String(buffer,0, byteRead)); htmlFile+=new String(buffer,0, byteRead); } } catch (IOException e) { System.out.println("error :" + e.getMessage()); } return htmlFile; } public static void main(String[] args){ String urlGk="http://gaokao.h-edu.com/yx/yxjj.asp?schoolid=85"; String htmlContent=readHtmlFile(urlGk); String r="<span class=\"fb14\">([^\"]+)</span>"; Pattern s=Pattern.compile(r); Matcher m=s.matcher(htmlContent); System.out.println("名称为"+m.group(1)); }我的,你自己改下 就可以了 用StringBuffer 用html parser 就可以实现.import org.htmlparser.Parser;import org.htmlparser.util.NodeList;import org.htmlparser.util.ParserException; public static String getCode(String urlStr) throws ParserException { Parser p = new Parser(urlStr); NodeList list = p.parse(null); String codeStr = list.toHtml(); System.out.println(codeStr); return codeStr; } 谢谢大家..已经弄出来了..用的URL... 没有用到zhaochunhui的代码..但还是谢谢你的... jsp .net php哪个好? 求助!使用spring ioc注入管理,spring aop做日志记录。。内详! 小问题求教 javascript 和 php 同时获取同名表单问题,求助高手,急急急急......... 请问各位,昨日贴数,今天贴数,和最高日贴数,是怎么实现的 高分求bbs原码(200分,可以开新帖子加) 有关动态连接的写法问题,包含多个参数!!! fileInputStream 重复执行的问题 一段代码不是很理解,可否帮我解释! 急!谁能给我一个实体EJB连接数据库的例子,感谢!在线等!!!!!! 这个错误是什么意思? 如何将有格式的一些汉字在textarea中显示出来
import java.io.*;
import java.net.*;
public class ResultHtml
{
public static Object getHtml(String turl) throws IOException{
URL url = new URL(turl);
InputStream in = url.openConnection().getInputStream();
BufferedReader reader=new BufferedReader(new InputStreamReader(in));
String line=reader.readLine();
StringBuffer a=new StringBuffer();
while(line!=null){
a.append(line);
System.out.println(line);
line = reader.readLine();
a.append("\n");
}
return a.toString();
}
}下面是jsp调用的:
String a=null;
a=(String)ResultHtml.getHtml("www.163.com");
out.print(a);
报错:
java.net.MalformedURLException: no protocol: www.163.com
java.net.URL.<init>(URL.java:567)
java.net.URL.<init>(URL.java:464)
java.net.URL.<init>(URL.java:413)
com.foren.family.db.ResultHtml.getHtml(ResultHtml.java:8)
org.apache.jsp.foren.master.testhtml_jsp._jspService(testhtml_jsp.java:67)
org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:94)
javax.servlet.http.HttpServlet.service(HttpServlet.java:802)
org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:324)
org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:292)
org.apache.jasper.servlet.JspServlet.service(JspServlet.java:236)
javax.servlet.http.HttpServlet.service(HttpServlet.java:802)
看不懂..我太菜了!
String htmlFile="";
try {
URL url = new URL(urlPath);
URLConnection urlConnection = url.openConnection();
urlConnection.setAllowUserInteraction(false);
// InputStream urlStream = url.openStream();
InputStream urlStream = urlConnection.getInputStream();//.openStream();
InputStreamReader sr = new InputStreamReader(urlStream, "GBK");
int byteRead = 0;
char[] buffer = new char[8192];
while ((byteRead = sr.read(buffer, 0, 8192)) != -1) {
System.out.println(new String(buffer,0, byteRead));
htmlFile+=new String(buffer,0, byteRead);
}
} catch (IOException e) {
System.out.println("error :" + e.getMessage());
}
return htmlFile;
}
public static void main(String[] args){
String urlGk="http://gaokao.h-edu.com/yx/yxjj.asp?schoolid=85";
String htmlContent=readHtmlFile(urlGk);
String r="<span class=\"fb14\">([^\"]+)</span>";
Pattern s=Pattern.compile(r);
Matcher m=s.matcher(htmlContent);
System.out.println("名称为"+m.group(1));
}
我的,你自己改下 就可以了 用StringBuffer
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException; public static String getCode(String urlStr) throws ParserException
{
Parser p = new Parser(urlStr);
NodeList list = p.parse(null);
String codeStr = list.toHtml();
System.out.println(codeStr);
return codeStr;
}