catch (MalformedURLException ex) { System.err.println(args[0] + " is not a URL Java understands."); } finally { if (in != null) in.close( ); if (out != null){ out.close( ); }
}
}
}
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection;/** * @author http://blog.csdn.net/xxd851116 */ public class URLUtils { public static String getSourceCode(String urlString) throws IOException { String source = null; URL url = new URL(urlString); URLConnection uc = url.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(uc .getInputStream())); String inputLine = ""; while ((inputLine = in.readLine()) != null) { source += inputLine; } in.close(); return source; } public static void main(String[] args) throws IOException { System.out.println(URLUtils .getSourceCode("http://blog.csdn.net/xxd851116")); } }
html包括javascript吗? 楼上的方法估计可以的
我有代码可以帮帮LZ,LZ用javascript function onSave(){ var val=document.body.innerHTML;//取body里Html代码,提交到servlet,用下的方法解析 } 有不明白的可以联系我public void startParse(String sHtml,HttpServletRequest request) { Document doc=null; Element root = null; try { Reader in = new StringReader("<?xml version=\"1.0\" encoding=\"utf-8\"?><root></root>"); doc = new SAXBuilder().build(in); root=doc.getRootElement(); ParserDelegator parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = new Callback(root); parser.parse(new StringReader(sHtml), callback, true); XMLOutputter outp = new XMLOutputter();//用于输出jdom 文档 Format format=Format.getPrettyFormat(); //格式化文档 format.setEncoding("GBK"); //由于默认的编码是utf-8,中文将显示为乱码,所以设为gbk outp.setFormat(format); outp.output(doc,System.out); } } public class Callback extends HTMLEditorKit.ParserCallback { private Document doc; private Element root; public Callback(Document _doc){ this.doc=_doc; } public Callback(Element _root){ this.root=_root; } public Document getDoc(){ return doc; } public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t.equals(HTML.Tag.INPUT)) { for(int r=0;r<root.getContentSize();r++){ String type = (String) a.getAttribute(HTML.Attribute.TYPE); Element table=(Element)root.getChildren().get(r); String id = (String) a.getAttribute(HTML.Attribute.ID); for(int i=0;i<table.getContentSize();i++){ Element tr=(Element)table.getChildren().get(i); for(int j=0;j<tr.getContentSize();j++){ Element td=(Element)tr.getChildren().get(j); Element input=new Element("input"); if(type==null && id!=null){ if(id.equals(td.getAttributeValue("id"))){ td.addContent(input); input.setAttribute("id",id); input.setAttribute("type","text"); input.setAttribute("size","10"); if(a.getAttribute(HTML.Attribute.WIDTH)!=null){ input.setAttribute("width",a.getAttribute(HTML.Attribute.WIDTH).toString()); }else{ input.setAttribute("width",""); } if(a.getAttribute(HTML.Attribute.STYLE)!=null){ input.setAttribute("style",a.getAttribute(HTML.Attribute.STYLE).toString()); }else{ input.setAttribute("style","text-align:left"); } if(a.getAttribute(HTML.Attribute.VALUE)!=null){ input.setAttribute("value",a.getAttribute(HTML.Attribute.VALUE).toString()); }else{ input.setAttribute("value",""); } if(a.getAttributeCount()>5){ input.setAttribute("readOnly","true"); } } } } } } } } public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t.equals(HTML.Tag.TABLE)) { Element table=new Element("table"); root.addContent(table); table.setAttribute("id",a.getAttribute(HTML.Attribute.ID).toString()); } if(t.equals(HTML.Tag.TR)){ for(int i=0;i<root.getContentSize();i++){ Element table=(Element)root.getChildren().get(i);//从root中获取每个表对象 Element tr=new Element("tr");// String id=(String)a.getAttribute(HTML.Attribute.ID); String trid=id.substring(3,4);//取出行id String tabid=table.getAttributeValue("id").substring(3,4); if(trid.equals(tabid)){//如果行id和列id相等,满足条件 table.addContent(tr);//添加行 tr.setAttribute("id",id); } } } if (t.equals(HTML.Tag.TD)) { for(int r=0;r<root.getContentSize();r++){ Element table=(Element)root.getChildren().get(r);
我写了两个类分别完成相应的功能
第一个类下载网页,我把它放在C盘import java.io.BufferedInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;public class Lianxi1 {
final int BUFFER_SIZE=1024;
public void saveToFile(String downUrl,String filePath)
{
HttpURLConnection connect=null;
BufferedInputStream in=null;
FileOutputStream file=null;
byte[] buf=new byte[BUFFER_SIZE];
int size=0;
try {
URL url=new URL(downUrl);
connect=(HttpURLConnection) url.openConnection();
connect.connect();
in=new BufferedInputStream(connect.getInputStream()); file=new FileOutputStream(filePath); while((size=in.read(buf))!=-1)
{
file.write(buf,0,size);
}
} catch (MalformedURLException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}finally
{
try {
file.close();
in.close();
} catch (IOException e) {
e.printStackTrace();
}
connect.disconnect();
}
}
public static void main(String[]args){
Lianxi1 d= new Lianxi1();
d.saveToFile("http://news.cn.yahoo.com/08-07-/1028/2j9qn.html", "c:\\abc.html");
}
}
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
public class XieRu {
public static void main(String[] args) throws IOException {
//输入流
InputStream in =new FileInputStream("C:/abc.html");
//输出流
OutputStream out =new FileOutputStream("C:/12.txt",true);
try {
byte[] buffer=new byte[1024];
while(true){
int byteRead=in.read(buffer);
if(byteRead==-1)break;
out.write(buffer,0,byteRead);
}
}
catch (MalformedURLException ex) {
System.err.println(args[0] + " is not a URL Java understands.");
}
finally {
if (in != null) in.close( );
if (out != null){
out.close( );
}
}
}
}
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;/**
* @author http://blog.csdn.net/xxd851116
*/
public class URLUtils { public static String getSourceCode(String urlString) throws IOException {
String source = null;
URL url = new URL(urlString);
URLConnection uc = url.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(uc
.getInputStream())); String inputLine = "";
while ((inputLine = in.readLine()) != null) {
source += inputLine;
}
in.close();
return source;
} public static void main(String[] args) throws IOException {
System.out.println(URLUtils
.getSourceCode("http://blog.csdn.net/xxd851116"));
}
}
楼上的方法估计可以的
function onSave(){
var val=document.body.innerHTML;//取body里Html代码,提交到servlet,用下的方法解析
}
有不明白的可以联系我public void startParse(String sHtml,HttpServletRequest request) {
Document doc=null;
Element root = null;
try {
Reader in = new StringReader("<?xml version=\"1.0\" encoding=\"utf-8\"?><root></root>");
doc = new SAXBuilder().build(in);
root=doc.getRootElement();
ParserDelegator parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = new Callback(root);
parser.parse(new StringReader(sHtml), callback, true);
XMLOutputter outp = new XMLOutputter();//用于输出jdom 文档
Format format=Format.getPrettyFormat(); //格式化文档
format.setEncoding("GBK"); //由于默认的编码是utf-8,中文将显示为乱码,所以设为gbk
outp.setFormat(format);
outp.output(doc,System.out);
}
}
public class Callback extends HTMLEditorKit.ParserCallback {
private Document doc;
private Element root;
public Callback(Document _doc){
this.doc=_doc;
}
public Callback(Element _root){
this.root=_root;
}
public Document getDoc(){
return doc;
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
if (t.equals(HTML.Tag.INPUT)) {
for(int r=0;r<root.getContentSize();r++){
String type = (String) a.getAttribute(HTML.Attribute.TYPE);
Element table=(Element)root.getChildren().get(r);
String id = (String) a.getAttribute(HTML.Attribute.ID);
for(int i=0;i<table.getContentSize();i++){
Element tr=(Element)table.getChildren().get(i);
for(int j=0;j<tr.getContentSize();j++){
Element td=(Element)tr.getChildren().get(j);
Element input=new Element("input");
if(type==null && id!=null){
if(id.equals(td.getAttributeValue("id"))){
td.addContent(input);
input.setAttribute("id",id);
input.setAttribute("type","text");
input.setAttribute("size","10");
if(a.getAttribute(HTML.Attribute.WIDTH)!=null){
input.setAttribute("width",a.getAttribute(HTML.Attribute.WIDTH).toString());
}else{
input.setAttribute("width","");
}
if(a.getAttribute(HTML.Attribute.STYLE)!=null){
input.setAttribute("style",a.getAttribute(HTML.Attribute.STYLE).toString());
}else{
input.setAttribute("style","text-align:left");
}
if(a.getAttribute(HTML.Attribute.VALUE)!=null){
input.setAttribute("value",a.getAttribute(HTML.Attribute.VALUE).toString());
}else{
input.setAttribute("value","");
}
if(a.getAttributeCount()>5){
input.setAttribute("readOnly","true");
}
}
}
}
}
}
}
} public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
if (t.equals(HTML.Tag.TABLE)) {
Element table=new Element("table");
root.addContent(table);
table.setAttribute("id",a.getAttribute(HTML.Attribute.ID).toString());
}
if(t.equals(HTML.Tag.TR)){
for(int i=0;i<root.getContentSize();i++){
Element table=(Element)root.getChildren().get(i);//从root中获取每个表对象
Element tr=new Element("tr");//
String id=(String)a.getAttribute(HTML.Attribute.ID);
String trid=id.substring(3,4);//取出行id
String tabid=table.getAttributeValue("id").substring(3,4);
if(trid.equals(tabid)){//如果行id和列id相等,满足条件
table.addContent(tr);//添加行
tr.setAttribute("id",id);
}
}
}
if (t.equals(HTML.Tag.TD)) {
for(int r=0;r<root.getContentSize();r++){
Element table=(Element)root.getChildren().get(r);
for(int i=0;i<table.getContentSize();i++){
String id = (String) a.getAttribute(HTML.Attribute.ID);
if(id!=null ){
Element td=new Element("td");
Element tr=(Element)table.getChildren().get(i);
/**有两张表,tab0表3行4列,tab1表5行4列
* tab(0,1)-------------------tab0---0表一
* row(0,1,2),row(0,1,2,3,4)--row01--01表一,行二
* col(0,1,2,3),col(0,1,2,3)--col011-011表一,行二,列二
*/
String tdid=id.substring(3,4);//取出列ID,第四个位置为哪个表
String trid=tr.getAttributeValue("id").substring(3,5);//取出行TD后两位
String tabid=table.getAttributeValue("id").substring(3,4);//取出表ID
if(tdid.equals(tabid)){
if(id.substring(3,5).equals(trid)){//取出TD后两位
tr.addContent(td);
td.setAttribute("id",id);
}
}
}
}
}
}
} }
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;import org.apache.commons.lang.StringUtils;public class HtmlUtil { /**
* @功能 获取指定URL的页面代码
* @参数 String 页面url
* @返回 String 页面代码
*/
public static String getHtmlContent(String strUrl) {
StringBuffer content = new StringBuffer();
BufferedReader in = null;
InputStreamReader isr = null;
InputStream is = null;
PrintWriter pw = null;
HttpURLConnection huc = null;
try {
URL url = new URL(strUrl);
huc = (HttpURLConnection) url.openConnection();
is = huc.getInputStream();
isr = new InputStreamReader(is);
in = new BufferedReader(isr);
String line = null;
while (((line = in.readLine()) != null)) {
if (line.length() == 0)
continue;
content.append(line);
}
} catch (Exception e) {
System.err.println(e);
content.delete(0, content.length());
} finally {
try {
is.close();
isr.close();
in.close();
huc.disconnect();
pw.close();
} catch (Exception e) {
}
}
return content.toString();
} public static void main(String args[]) {
System.out.println(getHtmlContent("http://www.baidu.com"));
}
}