import java.net.URL;
import java.io.*;
import org.w3c.tidy.Tidy; public class TestHTML2XML {
private String url;
private String outFileName;
private String errOutFileName; public TestHTML2XML(String url, String outFileName, String
errOutFileName) {
this.url = url;
this.outFileName = outFileName;
this.errOutFileName = errOutFileName;
} public void convert() {
URL u;
BufferedInputStream in;
FileOutputStream out; Tidy tidy = new Tidy(); //Tell Tidy to convert HTML to XML
tidy.setXmlOut(true); try {
//Set file for error messages
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
u = new URL(url); //Create input and output streams
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName); //Convert files
tidy.parse(in, out); //Clean up
in.close();
out.close(); } catch (IOException e) {
System.out.println(this.toString() + e.toString());
}
}
}
老大叫看,但严重忽略的我JAVA水平!!
还请大侠们帮我讲解下这代码ya!!!
import java.io.*;
import org.w3c.tidy.Tidy; public class TestHTML2XML {
private String url;
private String outFileName;
private String errOutFileName; public TestHTML2XML(String url, String outFileName, String
errOutFileName) {
this.url = url;
this.outFileName = outFileName;
this.errOutFileName = errOutFileName;
} public void convert() {
URL u;
BufferedInputStream in;
FileOutputStream out; Tidy tidy = new Tidy(); //Tell Tidy to convert HTML to XML
tidy.setXmlOut(true); try {
//Set file for error messages
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
u = new URL(url); //Create input and output streams
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName); //Convert files
tidy.parse(in, out); //Clean up
in.close();
out.close(); } catch (IOException e) {
System.out.println(this.toString() + e.toString());
}
}
}
老大叫看,但严重忽略的我JAVA水平!!
还请大侠们帮我讲解下这代码ya!!!
import java.io.*;
import org.w3c.tidy.Tidy; public class TestHTML2XML {
private String url;
private String outFileName;
private String errOutFileName; public TestHTML2XML(String url, String outFileName, String
errOutFileName) {
this.url = url; //初始URL
this.outFileName = outFileName; //初始要写出的文件名
this.errOutFileName = errOutFileName; //初始错误输出的文件名
} public void convert() { //HTML 2 XML 转换
URL u;
BufferedInputStream in;
FileOutputStream out; Tidy tidy = new Tidy(); //Tell Tidy to convert HTML to XML
tidy.setXmlOut(true); try {
//Set file for error messages
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
u = new URL(url); //Create input and output streams
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName); //Convert files
tidy.parse(in, out); //Clean up
in.close();
out.close(); } catch (IOException e) {
System.out.println(this.toString() + e.toString());
}
}
}
parse(java.io.InputStream in, java.io.OutputStream out) :Reads from the given input and returns the root Node.
该解析类,定义了一个构造器可以初始化三个属性html的url,正确解析后的xml文件名,出
现解析错误后的文件名,covert()就是完成解析工作的主要方法,如下
URL u; // 定位类
BufferedInputStream in; //字节输入流缓存区
FileOutputStream out; //字节输出流缓存区Tidy tidy = new Tidy(); //解析类//Tell Tidy to convert HTML to XML
tidy.setXmlOut(true); //解析类中设置解析为xml文件try {
//Set file for error messages
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true)); //解析类中设置错误解析文件对象
u = new URL(url); //创建html文件对象//Create input and output streams
in = new BufferedInputStream(u.openStream()); // 根据html文件对象创建字节输入流对象
out = new FileOutputStream(outFileName); //创建xml文件的字节输出流对象//Convert files
tidy.parse(in, out); //解析类中主要解析方法,完成解析//Clean up 关闭对象,回收内存
in.close();
out.close();