package com.sourceware.sms.awardinvoice.util;import org.w3c.tidy.*;
import java.io.*;public class TidyUtil { private String targetFileName; //转化目标的文件
private String errFileName; //错误输出文件
private String configFileName; //配置文件 public TidyUtil(String targetFileName,
String errFileName, String configFileName) {
this.targetFileName = targetFileName;
this.errFileName = errFileName;
this.configFileName = configFileName;
} public void changeXML(InputStream in) { //参数为输入流
try {
OutputStream out = new FileOutputStream(targetFileName);
Tidy tidy = new Tidy();
//设置配置文件
tidy.setConfigurationFromFile(configFileName);
//设置错误文件
tidy.setErrout(new PrintWriter(new FileWriter(errFileName)));
//解析
tidy.parse(in, out);
}
catch (Exception e) {
e.printStackTrace();
System.out.println(e.getMessage());
}
}
}
import java.io.*;public class TidyUtil { private String targetFileName; //转化目标的文件
private String errFileName; //错误输出文件
private String configFileName; //配置文件 public TidyUtil(String targetFileName,
String errFileName, String configFileName) {
this.targetFileName = targetFileName;
this.errFileName = errFileName;
this.configFileName = configFileName;
} public void changeXML(InputStream in) { //参数为输入流
try {
OutputStream out = new FileOutputStream(targetFileName);
Tidy tidy = new Tidy();
//设置配置文件
tidy.setConfigurationFromFile(configFileName);
//设置错误文件
tidy.setErrout(new PrintWriter(new FileWriter(errFileName)));
//解析
tidy.parse(in, out);
}
catch (Exception e) {
e.printStackTrace();
System.out.println(e.getMessage());
}
}
}
解决方案 »
- s2sh平台网站的自动登录问题(cookie)
- 初学者关于异常的一个问题
- 自加问题
- struts里的form是不是就相当于mvc结构中的entity里的bean啊?
- 同一个oracle数据库中不同用户下的表在hibernate如何访问?
- 一个试了很多次未解决的STRUTS 标签问题?
- 为什么无法将证书加入cacerts,明明加入了,可是最后有显示找不到什么文件?
- 为什么tomcat运行一个晚上后必须重启?
- Who can tell me some chines EJB good site or download book?
- 初学者求指导
- 如何取这个xml中的值,初学.想取<driver><url><user><password>中的值
- [求助]向大家请教一个关于购物车的问题!!!!
indent: auto
indent-spaces: 2
wrap: 72
up: yes
output-xml: yes
input-xml: no
show-warnings: yes
numeric-entities: yes
quote-s: yes
quote-nbsp: yes
quote-ampersand: no
break-before-br: no
uppercase-tags: no
uppercase-attributes: no
char-encoding: latin1 //这个参数处理编码new-inline-tags: cfif, cfelse, math, mroot,
mrow, mi, mn, mo, msqrt, mfrac, msubsup, munderover,
munder, mover, mmultiscripts, msup, msub, mtext,
mprescripts, mtable, mtr, mtd, mth
new-blocklevel-tags: cfoutput, cfquery
new-empty-tags: cfelse
你做过吗?这篇文章我看过,配置文件怎么写?格式是什么?怎么用到它呢?
这句在程序里就指了配置文件 你用xxx.properties 后缀就可以了 然后char-encoding: latin1 //这个参数处理编码 查查这个 有几个参数的 能显示中文 我做过因为我是内部处理XML,不用给客户看的,所以没有这个参数,乱码也没问题,读出来时,进行一下转化就行了不过我做个测试,加了这个参数后,能显示中文,但我忘了 具体参数值是什么,你查查,在多试试了
看看我的代码:
public class TidyXHTML implements Runnable {
private String srcFileName; private String outFileName; private String errOutFileName; private String configFileName; public TidyXHTML(String srcFileName, String outFileName,
String errOutFileName, String confName) {
this.srcFileName = srcFileName;
this.outFileName = outFileName;
this.errOutFileName = errOutFileName;
this.configFileName = confName;
}
public TidyXHTML(String srcFileName, String outFileName) {
this.srcFileName = srcFileName;
this.outFileName = outFileName;
}
public void run() {
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
tidy.setConfigurationFromFile(configFileName);
try {
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName),true));
in = new BufferedInputStream(new FileInputStream(srcFileName));
out = new FileOutputStream(outFileName);
tidy.parse(in, out);
in.close();
out.close();
} catch (IOException e) {
System.out.println(this.toString() + e.toString());
}
} public static void main(String[] args) {
TidyXHTML t1 = new TidyXHTML("forge.htm","TestError2.xml","err.txt","config.properties");
Thread th1 = new Thread(t1);
th1.start();
System.out.println("Successful!!!");
}
}
config.properties:
indent: auto
indent-spaces: 2
wrap: 72
up: yes
output-xml: yes
input-xml: no
show-warnings: yes
numeric-entities: yes
quote-s: yes
quote-nbsp: yes
quote-ampersand: no
break-before-br: no
uppercase-tags: no
uppercase-attributes: no
char-encoding: raw
new-inline-tags: cfif, cfelse, math, mroot,
mrow, mi, mn, mo, msqrt, mfrac, msubsup, munderover,
munder, mover, mmultiscripts, msup, msub, mtext,
mprescripts, mtable, mtr, mtd, mth
new-blocklevel-tags: cfoutput, cfquery
new-empty-tags: cfelse
自我感觉是代码是没有问题的,不知道什么原因,xml文件是乱码(英文网页)。
yubo1209(稻草人),你做过这个是吗?能联系一下吗?我的qq:18966186 msn:[email protected]
package com.yuch.html;/**
* Convert a html file into a well-formed XHtml file.
* Parse the XHtml to a DOM tree.
*
* JTidy是HTML Tidy(一个HTML语法检查器和优雅的打印编排工具)的Java移植,
* 除了本身具有的清除HTML文件难看或错误内容的功能外,还提供了一个DOM接口,
* 程序员可以将JTidy当作一个处理HTML文件的DOM解析器来使用。
*
* @version $version 1.0
* @author yuch
*/import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.File;import org.w3c.dom.Document;
import org.w3c.dom.Attr;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.NamedNodeMap;import org.w3c.tidy.Tidy;
public class HtmlDOMTree
{
protected InputStream in = null; //source inputstream
protected OutputStream out = null; //target outputstream
protected String err = null; //error logs
protected String configFile = null; //configure of displaying layout protected Document document = null; //DOM interface protected Tidy tidy = new Tidy(); /**
* Constructor with empty parameter
*
*/
public HtmlDOMTree() {
} /**
* Constructor with a parameter
*
* @param srcStr a String Object
*/
public HtmlDOMTree( InputStream in ) {
this.in = in;
} /**
* Constructor with two parameter.
*
* @param in a InputStream Object.
* @param out a OutputStream Object.
*/
public HtmlDOMTree( InputStream in,OutputStream out ) {
this.in = in;
this.out = out;
} /**
* Constructor with four parameter.
*
* @param in a InputStream Object.
* @param out a OutputStream Object.
* @param err a FileWriter Object.
* @param configFile a String Object to specify a configuration file.
*/
public HtmlDOMTree( InputStream in,OutputStream out,
String err,String configFile ) {
this.in = in;
this.out = out;
this.err = err;
this.configFile = configFile;
} /**
*
*
*/
public Document parse() {
document = parse( in,out,err,configFile ); return document;
} /**
* Parse html files to DOM Tree.
*
* @param inFile.
*
* @return Document.
*/
public Document parse( File inFile ) {
document = parse( inFile,null,null,null ); return document;
} /**
* Parse html files to DOM Tree.
*
* @param inFile.
* @param outFile.
*
* @return Document.
*/
public Document parse( File inFile,File outFile ) {
document = parse( inFile,outFile,null,null ); return document;
} /**
* Parse html files to DOM Tree.
*
* @param inFile.
* @param outFile.
* @param errFile.
* @param configFile.
*
* @return Document.
*/
public Document parse( File inFile, File outFile,
String errFile,String configFile ) {
try{
in = new BufferedInputStream( new FileInputStream( inFile ) );
out = new BufferedOutputStream( new FileOutputStream( outFile ) );
}
catch( Exception e ) {
} document = parse( in,out,errFile,configFile );
return document;
} /**
* Parse html files to DOM Tree.
*
* @param in.
* @param out.
* @param errFile.
* @param configFile.
*
* @return Document.
*/
public Document parse( InputStream in,OutputStream out,
String errFile,String configFile ) {
if( configFile != null ) {
tidy.setConfigurationFromFile( configFile );
}
try
{
if ( errFile != null )
{
tidy.setErrout( new PrintWriter( new FileWriter(errFile),true ) );
}
document = tidy.parseDOM( in,out );
}
catch ( IOException e )
{
} return document;
} /**
* Traverse DOM Tree
*
*/
public void traverse( Node cNode ) {
String eleName = null;
switch( cNode.getNodeType() ) {
case Node.DOCUMENT_NODE:
System.out.println( "Element " + cNode.getNodeName() ); processChildren( cNode.getChildNodes() );
break;
case Node.ELEMENT_NODE:
eleName = cNode.getNodeName();
System.out.println("Element " + eleName);
NamedNodeMap attributeMap = cNode.getAttributes();
int numAttrs = attributeMap.getLength();
for(int i = 0; i < attributeMap.getLength(); i++ ) {
Attr attribute = (Attr)attributeMap.item(i);
String attrName = attribute.getNodeName();
String attrValue = attribute.getNodeValue();
System.out.println( attrName + " = " + attrValue );
} processChildren(cNode.getChildNodes());
break;
case Node.CDATA_SECTION_NODE:
case Node.TEXT_NODE:
System.out.println( "Text " + cNode.getNodeValue() );
if( !cNode.getNodeValue().trim().equals("") ) {
System.out.println( "eleName " + eleName );
System.out.println( "Text " + cNode.getNodeValue() );
}
break;
}
}
private void processChildren( NodeList nList ){
if( nList.getLength()!=0 ){
for( int i = 0; i < nList.getLength(); i++ ) {
traverse( nList.item(i) );
}
}
} /**
* Dispose resource
*
*/
public void finalize() {
try
{
if( in != null ) {
in.close();
in = null;
}
if( out != null ) {
out.close();
out = null;
}
}
catch ( IOException e )
{
}
} /**
* Solve the chinese displaying problem by pre-processing using the method below.
*
*/
public static String convertToGB( String str ) {
try {
byte bytes[] = str.getBytes( "ISO-8859-1" );
return new String( bytes, "gb2312" );
}
catch( Exception exception ) {
return str;
}
} /**
* Main method to test.
*
* Run the command line: java com.yuch.html.HtmlDOMTree <src_file> <new_file>
*/
public static void main( String[] args ) {
HtmlDOMTree dom = new HtmlDOMTree();
File inFile = new File( "notice.html" );
File outFile = new File( "out.html" );
System.out.println( inFile.getAbsoluteFile() );
System.out.println( "Start to convert..." );
// Document document =
// dom.parse( inFile,outFile,"resource\\error\\error.txt","resource\\config\\config.txt" );
Document document = dom.parse( inFile,outFile );
System.out.println( "\nEnd to convert." );
//Element root = document.getDocumentElement();
//dom.traverse( document ); try
{
BufferedReader in = new BufferedReader( new FileReader(outFile) );
String line;
while( (line = in.readLine()) != null ) {
System.out.println( line );
}
in.close();
}
catch ( IOException e )
{
System.out.println( "读取Html失败" );
}
}
}
[email protected]
哈哈,谁还要,我给谁发