写了两个类,一个基类Extractor,一个子类ExtractPconlineMobile,基类引用子类来对太平洋电脑网进行特定网页的分析,
引发的错误和所用到的类如下所示,请各位大侠帮忙,告诉小弟以下到底是怎么回事,是不是htmlparser.jar本身有问题?
Exception in thread "main" java.lang.IncompatibleClassChangeError: Expecting non-static method org.htmlparser.lexer.Page.getCharset(Ljava/lang/String;)Ljava/lang/String;
at org.htmlparser.tags.MetaTag.doSemanticAction(MetaTag.java:149)
at org.htmlparser.scanners.TagScanner.scan(TagScanner.java:69)
at org.htmlparser.scanners.CompositeTagScanner.scan(CompositeTagScanner.java:160)
at org.htmlparser.util.IteratorImpl.nextNode(IteratorImpl.java:92)
at org.htmlparser.Parser.parse(Parser.java:701)
at com.luceneheritrixbook.extractor.pconline.mobile.ExtractPconlineMobile.extract(ExtractPconlineMobile.java:30)
at com.luceneheritrixbook.extractor.Extractor.traverse(Extractor.java:131)
at com.luceneheritrixbook.extractor.Extractor.main(Extractor.java:143)基类:Extractor
package com.luceneheritrixbook.extractor;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.htmlparser.Parser;import com.luceneheritrixbook.extractor.pconline.mobile.ExtractPconlineMobile;
public abstract class Extractor {
protected static final String NEWLINE="\r\n";
private String outputPath="";
private String inputFilePath="";
private String mirrorDir="";
private String imageDir="";
private Parser parser;
protected static final String HASH_ALGORITHM="md5";
public static final String SEPARATOR="=============";
public void loadFile(String path)
{
try
{
parser=new Parser(path);
inputFilePath=path;
parser.setEncoding("gb2312");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public String getOutputPath()
{
return outputPath;
}
public void setOutputPath(String outputPath)
{
this.outputPath=outputPath;
}
public Parser getParser()
{
return parser;
}
protected String getProp(String pattern,String match,int index)
{
Pattern sp=Pattern.compile(pattern);
Matcher matcher=sp.matcher(match);
while(matcher.find())
{
return matcher.group(index);
}
return null;
}
public abstract void extract();
public String getInputFilePath()
{
return inputFilePath;
}
protected boolean copyImage(String image_url,String new_image_file)
{
//String dirs=image_url.substring(7);
try
{
File file_in=new File(image_url);
if(file_in==null||!file_in.exists())
{
file_in=new File("E:\\defaultlogo.jpg");
}
File file_out=new File(new File(imageDir),new_image_file);
FileInputStream in1=new FileInputStream(file_in);
FileOutputStream out1=new FileOutputStream(file_out);
byte[] bytes=new byte[1024];
int c;
while((c=in1.read(bytes))!=-1)
{
out1.write(bytes,0,c);

}
in1.close();
out1.close();
return true;

}
catch(Exception e)
{
e.printStackTrace();
return false;
}
}
public String getImageDir()
{
return imageDir;
}
public void setImageDir(String imageDir)
{
this.imageDir=imageDir;
}
public String getMirrorDir()
{
return mirrorDir;
}
public void setMirrorDir(String mirrorDir)
{
this.mirrorDir=mirrorDir;
}
public void setInputFilePath(String inputFilePath)
{
this.inputFilePath=inputFilePath;
}
static int count=0;
public static void traverse(Extractor extractor,File path)
throws Exception
{
if(path==null)
{
return;
}
if(path.isDirectory())
{
String[] files=path.list();
for(int i=0;i<files.length;i++)
traverse(extractor,new File(path,files[i]));
}
else
{
if(path.getAbsolutePath().endsWith(".html")&&path.getAbsolutePath().indexOf("_detail")!=-1)
{
System.out.println(path);
count++;
extractor.loadFile(path.getAbsolutePath());
extractor.extract();
}
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
                 Extractor extractor=new ExtractPconlineMobile();
                 extractor.setOutputPath("D:\\test");
                 extractor.setImageDir("D:\\test\\image");
                 extractor.setMirrorDir("E:\\workspace\\heritrix\\jobs\\mobile-20110510024416656\\mirror");
                try
                {
                 traverse(extractor,new File("E:\\workspace\\heritrix\\jobs\\mobile-20110510024416656\\mirror\\product.pconline.com.cn\\mobile\\aigo\\292784_detail.html"));
                }
                catch(Exception e)
                {
                 e.printStackTrace();
                }
}}子类:ExtractPconlineMobile
package com.luceneheritrixbook.extractor.pconline.mobile;
import java.io.BufferedWriter;
import java.io.StringReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Date;
import org.htmlparser.NodeFilter;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NotFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.util.NodeList;
import org.htmlparser.Node;
import com.luceneheritrixbook.extractor.Extractor;public class ExtractPconlineMobile extends Extractor{
public void extract()
{
BufferedWriter bw=null;
NodeFilter attributes_filter= new AndFilter(new TagNameFilter("td"),
new OrFilter(new HasAttributeFilter("class","tdL"),new HasAttributeFilter("width","84%")));
NodeFilter title_filter=new TagNameFilter("h1");

try
{
NodeList title_nodes=this.getParser().parse(title_filter);
Node node_title=title_nodes.elementAt(0);
String name=node_title.toPlainTextString();
StringBuffer sb=new StringBuffer();
sb.append(name).append("-").append((new Date()).getTime());
String path=this.getOutputPath();
bw=new BufferedWriter(new FileWriter(new File(path+sb+".txt")));
int startPos=getInputFilePath().indexOf("mirror")+6;
String url_seg=getInputFilePath().substring(startPos);
url_seg=url_seg.replaceAll("\\\\","/");
String url="http:/"+url_seg;
System.out.println(url);
bw.write(url+NEWLINE);
bw.write(name+NEWLINE);





}
catch(Exception e)
{
e.printStackTrace();
}
this.getParser().reset();
try
{
NodeList attributes_nodes=this.getParser().parse(attributes_filter);
for (int i=0;i<attributes_nodes.size();i++)
{
TableColumn node=(TableColumn)attributes_nodes.elementAt(i);
String attri1=node.getAttribute("class");
String attri2=node.getAttribute("width");

String result=node.toPlainTextString();
if(attri1.equals(new String("tdL")))
{
bw.write(result.trim()+":");

}else if(attri2.equals(new String("84%")))
{
bw.write(result.trim());
bw.newLine();
}
}

}
catch(Exception e)
{
e.printStackTrace();
}
try
{
String image_dirseg=getInputFilePath().substring(0,4);
String fullFileDir=getMirrorDir()+"\\image.pconline.com.cn\\images\\product\\"+image_dirseg+"\\"+getInputFilePath().substring(0,6);
File imageDir=new File(fullFileDir);
String[] files=imageDir.list();
for(int i=0;i<files.length;i++)
{
String image_url=fullFileDir+files[i];
copyImage(image_url,getInputFilePath().substring(0,6)+".jpg");
bw.write(image_url+NEWLINE);
bw.write(SEPARATOR+NEWLINE);
bw.write(getInputFilePath().substring(0,6)+".jpg");
System.out.println(image_url);
}

}
catch(Exception e)
{
e.printStackTrace();
}
try
{
if(bw!=null)
bw.close();
}catch(IOException e)

{
e.printStackTrace();
}



} /**
 * @param args
 */
}