2008-04-25 18:05:45,202 <WARN> [main] (ConnectionManager.java:53) : Can not establish connection
2008-04-25 18:05:45,202 <WARN> [main] (ConnectionManager.java:54) : com.mysql.jdbc.CommunicationsException: Communications link failure due to underlying exception: ** BEGIN NESTED EXCEPTION ** java.net.SocketException
MESSAGE: java.net.BindException: Address already in use: connectSTACKTRACE:java.net.SocketException: java.net.BindException: Address already in use: connect
at com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:156)
at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:276)
at com.mysql.jdbc.Connection.createNewIO(Connection.java:2666)
at com.mysql.jdbc.Connection.<init>(Connection.java:1531)
at com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:266)
at java.sql.DriverManager.getConnection(Unknown Source)
at java.sql.DriverManager.getConnection(Unknown Source)
at com.mnv.dao.ConnectionManager.getConnection(ConnectionManager.java:41)
at com.mnv.dao.MovieDAO.<init>(MovieDAO.java:25)
at com.mnv.core.crawlers.MultiThreadCrawler.startCrawler(MultiThreadCrawler.java:156)
at com.mnv.core.crawlers.CrawlerMainEntry.main(CrawlerMainEntry.java:48)
** END NESTED EXCEPTION **说连接已经被占用了,这是什么情况造成的呢?这问题也不是总出,出现的情况比较随机.
2008-04-25 18:05:45,202 <WARN> [main] (ConnectionManager.java:54) : com.mysql.jdbc.CommunicationsException: Communications link failure due to underlying exception: ** BEGIN NESTED EXCEPTION ** java.net.SocketException
MESSAGE: java.net.BindException: Address already in use: connectSTACKTRACE:java.net.SocketException: java.net.BindException: Address already in use: connect
at com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:156)
at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:276)
at com.mysql.jdbc.Connection.createNewIO(Connection.java:2666)
at com.mysql.jdbc.Connection.<init>(Connection.java:1531)
at com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:266)
at java.sql.DriverManager.getConnection(Unknown Source)
at java.sql.DriverManager.getConnection(Unknown Source)
at com.mnv.dao.ConnectionManager.getConnection(ConnectionManager.java:41)
at com.mnv.dao.MovieDAO.<init>(MovieDAO.java:25)
at com.mnv.core.crawlers.MultiThreadCrawler.startCrawler(MultiThreadCrawler.java:156)
at com.mnv.core.crawlers.CrawlerMainEntry.main(CrawlerMainEntry.java:48)
** END NESTED EXCEPTION **说连接已经被占用了,这是什么情况造成的呢?这问题也不是总出,出现的情况比较随机.
DAO1 dao1=new DAO1();
DAO2 dao2-new DAO2();
.................
dao1.xxxx();
dao2.xxxx();
.................
dao2.close;
dao1.close;//close的时候释放连接
private static Logger logger=Logger.getLogger(MultiThreadCrawler.class);
private PropertiesLoader pl_setting;
private String catalog;
private int startPageNumber;
private int endPageNumber;
private int crwThreadNum;
private int cpThreadNum;
private String crawlRuleSet; /**
* Constructor
*/
public MultiThreadCrawler(){
pl_setting=new PropertiesLoader("configs/setting.properties");
startPageNumber=1;
endPageNumber=1;
crwThreadNum=4;
cpThreadNum=4;
}
/**
* Set the crawl rule set
* @param crawlRuleSet
*/
public void setCrawlRuleSet(String crawlRuleSet){
this.crawlRuleSet=crawlRuleSet;
}
/**
* Set the catalog of this crawl rule set
* @param catalog
*/
public void setCatalog(String catalog){
this.catalog=catalog;
}
/**
* Set the start page number where the crawler to crawl
* @param startPageNumber
*/
public void setStartPageNum(int startPageNumber) {
this.startPageNumber = startPageNumber;
}
/**
* Set the end page number where the crawler stop crawling
* @param endPageNumber
*/
public void setEndPageNum(int endPageNumber) {
this.endPageNumber = endPageNumber;
}
/**
* Set the thread number of page crawler
* @param crwThreadNum
*/
public void setCrawlerThreadNum(int crwThreadNum) {
this.crwThreadNum = crwThreadNum;
}
/**
* Set the thread number of content parser
* @param cpThreadNum
*/
public void setContentParserThreadNum(int cpThreadNum) {
this.cpThreadNum = cpThreadNum;
}
/**
* Start the crawler
* @return if the crawler end normally return true,else return false
*/
public boolean startCrawler(){
//-------------------------------------INIT-----------------------------------------------//
boolean isOk=true;
SynUrlList synUrlList=new SynUrlList();
SynMovieList synMovieList=new SynMovieList();
SynPageList synPageList=new SynPageList();
SynFailedUrlList synFailedUrlList=new SynFailedUrlList();
MovieDAO movieDao=new MovieDAO();
//--------------------------------Get URLs to crawl---------------------------------------//
UrlCrawler urlCrawler=new UrlCrawler(crawlRuleSet);
List<String> urlList=new ArrayList<String>();
List<String> currList=new ArrayList<String>();
List<String> okUrlList=new ArrayList<String>();
for(int i=startPageNumber;i<=endPageNumber;i++){
try{
currList=urlCrawler.startUrlCrawler(catalog, i);
if(currList!=null){
urlList.addAll(currList);
}else{// can not get URL from this page,consider it is the end of catalog
// isOk=false;
logger.info("Catalog is empty....maybe is the end");
FailedUrl failedUrl=new FailedUrl();
failedUrl.setFailedUrl(catalog+i,1,"can not get url from this catalog page");
synFailedUrlList.add(failedUrl);
// break;//get out circle
}
}catch(CanNotGetCatalogUrlListException e){
String exceptionUrl=e.getExceptionUrl();
FailedUrl failedUrl=new FailedUrl();
failedUrl.setFailedUrl(exceptionUrl,1,"get catalog url failed for followling reason\n"+e.getReason());
synFailedUrlList.add(failedUrl);
logger.warn(e);
}
}
int duplicateCounter=0;
if(urlList!=null){
for(String url:urlList){
if(!movieDao.isPageUrlExist(url)){
okUrlList.add(url);
}else{
duplicateCounter++;
//TODO need do something?
}
}
synUrlList.initialize(okUrlList);
}else{
return false;
}
//----------------------------------start crawler thread---------------------------------//
List<Thread> thrdCRWList=new ArrayList<Thread>();
for(int i=1;i<=crwThreadNum;i++){
Thread tc = new ThreadPageCrawler(synUrlList,synPageList,synFailedUrlList);
tc.setName("CrawlThread "+i);
tc.start();
thrdCRWList.add(tc);
}
//----------------------------------start parser thread---------------------------------//
List<Thread> thrdCPList=new ArrayList<Thread>();
SynCrawlerEndFlag synCrawlerEndFlag=new SynCrawlerEndFlag();
for(int i=1;i<=cpThreadNum;i++){
Thread tcp = new ThreadContentParser(synCrawlerEndFlag,synPageList,synMovieList,crawlRuleSet);
tcp.setName("ParserThread "+i);
tcp.start();
thrdCPList.add(tcp);
}
//-------------------------------wait for threads end----------------------------------//
boolean flag=true;
while(flag){
try{
Thread.sleep(1000);
}catch(Exception e){
}
boolean thisFlag=false;
for(int i=0;i<thrdCRWList.size();i++){
thisFlag=thisFlag|thrdCRWList.get(i).isAlive();
flag=thisFlag;
}
}
synCrawlerEndFlag.setSynFlag(true);
logger.info("all pages crawled");//crawler thread end
flag=true;
while(flag){
try{
Thread.sleep(1000);
}catch(Exception e){
}
boolean thisFlag=false;
for(int i=0;i<thrdCPList.size();i++){
thisFlag=thisFlag|thrdCPList.get(i).isAlive();
flag=thisFlag;
}
}
logger.info("all pages parserd");//parser thread end
//-----------------------------------put movie into date base----------------------------------//
List<Movie> movieList=synMovieList.getList();
int counterOfSuccess=0;
if(movieList==null|movieList.size()==0){
logger.warn("The movie list is empty or not exist");
}else{
ResourceDAO resourceDao=new ResourceDAO();
for(int i=0;i<movieList.size();i++){
Movie movie=movieList.get(i);
try{
movieDao.beginTransaction();
resourceDao.beginTransaction();
movieDao.saveOrUpdateMovie(movie);
int movieId=movieDao.getMaxId();
String[] typeArray=pl_setting.getValue("ResourceType").split(",");
for(String type:typeArray){//save all type of resource into database
List<Resource> resourceList=movie.getResourceListByType(type);
if(resourceList!=null){
for(Resource resource:resourceList){
String url=resource.getUrl();
int duplicateId=resourceDao.getIdByUrl(url);
if(duplicateId!=0){//judge the URL of resource is exist
resource.setReDirectionToExistId(duplicateId);//set redirection id
resource.setUrl(null);//set this resource url to null
}
resource.setMovieId(movieId);
resource.setType(type);
resourceDao.saveOrUpdateResource(resource);
}
}
}
resourceDao.commit();
movieDao.commit();
counterOfSuccess++;
}catch(SQLException e){
movieDao.rollback();
resourceDao.rollback();
FailedUrl failedUrl=new FailedUrl();
failedUrl.setFailedUrl(movie.getPageUrl(),3,e.toString());
synFailedUrlList.add(failedUrl);
logger.warn(e);;
}
}
resourceDao.close();
movieDao.close();
}
//---------------------------------put failed URLs into data base--------------------------------//
List<FailedUrl> failedUrlList=synFailedUrlList.getList();
if(failedUrlList==null|failedUrlList.size()==0){
logger.info("Congratulaions no failed urls");
}else{
FailedUrlDAO dao=new FailedUrlDAO();
dao.beginTransaction();
for(int i=0;i<failedUrlList.size();i++){
try{
dao.saveFailedUrl(failedUrlList.get(i));
}catch(SQLException e){
logger.warn("Wrong plus wrong?check everything");
logger.warn(e);
}
}
dao.commit();
dao.close();
} return isOk;
}