比较经典:怎么解析HTML文件?请高手赐教!!万分感谢! 怎么解析HTML文件?如传入一个字符串“Name”,能得到HTML中相应的Value值? 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 用NekoHTML解析一下就如同XML一样了 然后遍历一下 相关资料自己google一下 如何在java程序中将html转化为xml输出。 最好能提供一个简单的源程序,虽然我也在网上找到一些解析Html代码,可是我刚接触dom,有些类我还不知道应该引进那个包。 现在能够解析Html文件,但要想通过传一个字段名就能够获取他的Value值,我想应该将Html转化为Xml后再处理就方便了。 不知道那位大哥能提供这方面的源代码,小弟感激不尽,最好是能直接运行的。谢谢!! 这是我做的,大家可以看看:import org.w3c.dom.Node;import org.w3c.dom.DocumentFragment;import org.w3c.dom.html.HTMLDocument;import org.xml.sax.InputSource;import org.apache.html.dom.HTMLDocumentImpl;import org.cyberneko.html.parsers.DOMFragmentParser;import javax.xml.transform.stream.StreamResult;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerFactory;import java.util.*;import java.io.*;import java.io.InputStreamReader;import java.io.InputStream;import java.io.FileReader;import java.net.*;/*@author pengjimingdate: 2006-11-12function:html transform xml*/public class html2xml { public static void main(String args[]) { if (args != null && args.length >= 2) { try { String path = args[0]; System.out.println("!!!!!PATH!!!!===" + path); String fromfile = args[1]; String outputfile = getFileName(); if (args.length > 2) { outputfile = args[2]; System.out.println("####outputfile###===" + outputfile); } boolean b = Boolean.valueOf(fromfile).booleanValue(); System.out.println("@@@@@b@@@@@===" + b); html2xml h2x = new html2xml(); DocumentFragment df = h2x.getSourceNode(path, b); File file = new File(outputfile); if (file.exists()) file.delete(); h2x.genXmlFile(df, file); System.out.println("generate " + file.getCanonicalPath() + " successfully!"); } catch (Exception e) { e.printStackTrace(); } } else { System.out.println("Message:is't parameter!"); } } public void genXmlFile(Node output, File file) throws Exception, Error { TransformerFactory tf = TransformerFactory.newInstance(); Transformer transformer = tf.newTransformer(); DOMSource source = new DOMSource(output); java.io.FileOutputStream fos = new java.io.FileOutputStream(file); StreamResult result = new StreamResult(fos); Properties props = new Properties(); props.setProperty("encoding", "GB2312"); props.setProperty("method", "xml"); props.setProperty("omit-xml-declaration", "yes"); transformer.setOutputProperties(props); transformer.transform(source, result); fos.close(); } public DocumentFragment getSourceNode(String path, boolean fromfile) throws Exception, Error { DOMFragmentParser parser = new DOMFragmentParser(); HTMLDocument document = new HTMLDocumentImpl(); DocumentFragment fragment = document.createDocumentFragment(); if (path != null && !path.trim().equals("")) { String tmp = path; if (fromfile) { try { File input = new File(path); FileReader fr = new FileReader(input); InputSource is = new InputSource(fr); parser.parse(is, fragment); fr.close(); } catch (Exception e) { e.printStackTrace(); System.out.println("Error is:" + e.getMessage()); } } else { URL url = new URL(tmp); HttpURLConnection con = (HttpURLConnection) url .openConnection(); InputStream inputs = con.getInputStream(); InputStreamReader isr = new InputStreamReader(inputs, "GBK"); InputSource source = new InputSource(isr); parser.parse(source, fragment); } return fragment; } else { return null; } } public static String getFileName() throws Exception { Calendar c = Calendar.getInstance(); String name = "tmp" + c.get(Calendar.YEAR) + (c.get(Calendar.MONTH) < 9 ? "0" : "") + (c.get(Calendar.MONTH) + 1) + (c.get(Calendar.DAY_OF_MONTH) < 10 ? "0" : "") + c.get(Calendar.DAY_OF_MONTH) + (c.get(Calendar.HOUR_OF_DAY) < 10 ? "0" : "") + c.get(Calendar.HOUR_OF_DAY) + (c.get(Calendar.MINUTE) < 10 ? "0" : "") + c.get(Calendar.MINUTE) + (c.get(Calendar.SECOND) < 10 ? "0" : "") + c.get(Calendar.SECOND) + (c.get(Calendar.MILLISECOND) < 10 ? "0" : "") + (c.get(Calendar.MILLISECOND) < 100 ? "0" : "") + c.get(Calendar.MILLISECOND); return name; }}其中,实现了HTML转换成XML的功能. 我试过,是可以把所有的值都显示出来,但很难实现得到相关的值.如传入一个字符串“Name”,能得到HTML中显示的相应Value值? 一篇好文,写得比较幽默,看者有分! 看看这个程序! 关于静态域的修改! Hibernate与Spring IOC有点问题`高分请教 类的方法调用 反射相关 关于int 与Integer的不同.请详细说明并举例! 初学JAVA,请大家介绍几本好书! 一个高难度问题:动态生成的文件如何做才能被WEB调用显示 请问怎样为tree的不同叶子设计不同的图标?? 树结构中如何得到某一盘中的一系例文件夹 哪里有下载Visal age最新版的国内站点 谁有JDK1.4中文的API文档,谢谢!! 请高手看看我的jacob的错误问题!!!!
最好能提供一个简单的源程序,虽然我也在网上找到一些解析Html代码,可是我刚接触dom,有些类我还不知道应该引进那个包。
现在能够解析Html文件,但要想通过传一个字段名就能够获取他的Value值,我想应该将Html转化为Xml后再处理就方便了。
不知道那位大哥能提供这方面的源代码,小弟感激不尽,最好是能直接运行的。谢谢!!
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.html.HTMLDocument;
import org.xml.sax.InputSource;
import org.apache.html.dom.HTMLDocumentImpl;
import org.cyberneko.html.parsers.DOMFragmentParser;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import java.util.*;
import java.io.*;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.io.FileReader;
import java.net.*;/*@author pengjiming
date: 2006-11-12
function:html transform xml
*/
public class html2xml {
public static void main(String args[]) {
if (args != null && args.length >= 2) {
try {
String path = args[0];
System.out.println("!!!!!PATH!!!!===" + path);
String fromfile = args[1];
String outputfile = getFileName();
if (args.length > 2) {
outputfile = args[2];
System.out.println("####outputfile###===" + outputfile);
}
boolean b = Boolean.valueOf(fromfile).booleanValue();
System.out.println("@@@@@b@@@@@===" + b);
html2xml h2x = new html2xml();
DocumentFragment df = h2x.getSourceNode(path, b);
File file = new File(outputfile);
if (file.exists())
file.delete();
h2x.genXmlFile(df, file);
System.out.println("generate " + file.getCanonicalPath()
+ " successfully!");
} catch (Exception e) {
e.printStackTrace();
}
} else {
System.out.println("Message:is't parameter!");
}
} public void genXmlFile(Node output, File file) throws Exception, Error {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
DOMSource source = new DOMSource(output);
java.io.FileOutputStream fos = new java.io.FileOutputStream(file);
StreamResult result = new StreamResult(fos);
Properties props = new Properties();
props.setProperty("encoding", "GB2312");
props.setProperty("method", "xml");
props.setProperty("omit-xml-declaration", "yes");
transformer.setOutputProperties(props);
transformer.transform(source, result);
fos.close();
} public DocumentFragment getSourceNode(String path, boolean fromfile)
throws Exception, Error {
DOMFragmentParser parser = new DOMFragmentParser();
HTMLDocument document = new HTMLDocumentImpl();
DocumentFragment fragment = document.createDocumentFragment();
if (path != null && !path.trim().equals("")) {
String tmp = path;
if (fromfile) {
try {
File input = new File(path);
FileReader fr = new FileReader(input);
InputSource is = new InputSource(fr);
parser.parse(is, fragment);
fr.close();
} catch (Exception e) {
e.printStackTrace();
System.out.println("Error is:" + e.getMessage());
}
} else {
URL url = new URL(tmp);
HttpURLConnection con = (HttpURLConnection) url
.openConnection();
InputStream inputs = con.getInputStream();
InputStreamReader isr = new InputStreamReader(inputs, "GBK");
InputSource source = new InputSource(isr);
parser.parse(source, fragment);
}
return fragment;
} else {
return null;
}
} public static String getFileName() throws Exception {
Calendar c = Calendar.getInstance();
String name = "tmp" + c.get(Calendar.YEAR)
+ (c.get(Calendar.MONTH) < 9 ? "0" : "")
+ (c.get(Calendar.MONTH) + 1)
+ (c.get(Calendar.DAY_OF_MONTH) < 10 ? "0" : "")
+ c.get(Calendar.DAY_OF_MONTH)
+ (c.get(Calendar.HOUR_OF_DAY) < 10 ? "0" : "")
+ c.get(Calendar.HOUR_OF_DAY)
+ (c.get(Calendar.MINUTE) < 10 ? "0" : "")
+ c.get(Calendar.MINUTE)
+ (c.get(Calendar.SECOND) < 10 ? "0" : "")
+ c.get(Calendar.SECOND)
+ (c.get(Calendar.MILLISECOND) < 10 ? "0" : "")
+ (c.get(Calendar.MILLISECOND) < 100 ? "0" : "")
+ c.get(Calendar.MILLISECOND);
return name;
}
}其中,实现了HTML转换成XML的功能.