package ustc.infosec.spider;import java.util.*;
import java.net.URL;
import java.io.Serializable;
import ustc.infosec.util.Config;
/*
* Create a hashset for urls to download
* Define the download priority level of urls
*/
public class DownloadQueue implements Serializable
{
public DownloadQueue(Config config)
{
this.config = config;
interestingURLsToDownload = new ArrayList();
averageURLsToDownload = new ArrayList();
boringURLsToDownload = new ArrayList();
urlsInQueue = new HashSet();
}
/** Add a certain url to hashset */
public void queueURL(URLToDownload url)
{
URL u = url.getURL();
/**
* CASE IF: URL ALREADY IN THE DOWNLOAD QUEUE
*/
if(urlsInQueue.contains(u))
return;
/**
* CASE IF: URL NOT IN THE DOWNLOAD QUEUE
* PAGE OF URL IS THE ONE WANTED
* ADD TO QUEUE
*/
if(config.isInteresting(u))
/** Adopt depth first search or not */
if(config.isDepthFirstSearch())
/** Insert the url in the head of the rolling list */
interestingURLsToDownload.add(0, url);
else
/** Insert it in the tail of the rolling list */
interestingURLsToDownload.add(url);
else if(config.isBoring(u))
if(config.isDepthFirstSearch())
boringURLsToDownload.add(0, url);
else
boringURLsToDownload.add(url);
else{
if(config.isDepthFirstSearch())
averageURLsToDownload.add(0, url);
else
averageURLsToDownload.add(url);
}
/** Insert it to the hashset */
urlsInQueue.add(u);
}
/** Add all urls of given collection into hashset */
public void queueURLs(Collection urls)
{
for(Iterator i = urls.iterator(); i.hasNext(); )
{
URLToDownload u2d = (URLToDownload) i.next();
queueURL(u2d);
}
}
/** Get url refering to its priority-level */
public URLToDownload getNextInQueue()
{
if(interestingURLsToDownload.size() > 0) {
return returnURLFrom(interestingURLsToDownload);
}
else if(averageURLsToDownload.size() > 0) {
return returnURLFrom(averageURLsToDownload);
}
else if(boringURLsToDownload.size() > 0){
return returnURLFrom(boringURLsToDownload);
}
else{
return null;
}
}
/** Delete returned url in the hashset */
private URLToDownload returnURLFrom(List urlList)
{
URLToDownload u2d = (URLToDownload) urlList.get(0);
urlList.remove(0);
urlsInQueue.remove(u2d.getURL());
return u2d;
}
/** Get the number of urls remaining downloaded */
public int size()
{
return interestingURLsToDownload.size() + averageURLsToDownload.size() + boringURLsToDownload.size();
} public String toString()
{
return size() + " URLs";
}
private final Config config;
/** DOWNLOAD URLS REFER TO ITS PRIORITY */
private List interestingURLsToDownload;
private List averageURLsToDownload;
private List boringURLsToDownload;
private Set urlsInQueue; } // End class DownloadQueue
其中的config 也实现了serializable
import java.net.URL;
import java.io.Serializable;
import ustc.infosec.util.Config;
/*
* Create a hashset for urls to download
* Define the download priority level of urls
*/
public class DownloadQueue implements Serializable
{
public DownloadQueue(Config config)
{
this.config = config;
interestingURLsToDownload = new ArrayList();
averageURLsToDownload = new ArrayList();
boringURLsToDownload = new ArrayList();
urlsInQueue = new HashSet();
}
/** Add a certain url to hashset */
public void queueURL(URLToDownload url)
{
URL u = url.getURL();
/**
* CASE IF: URL ALREADY IN THE DOWNLOAD QUEUE
*/
if(urlsInQueue.contains(u))
return;
/**
* CASE IF: URL NOT IN THE DOWNLOAD QUEUE
* PAGE OF URL IS THE ONE WANTED
* ADD TO QUEUE
*/
if(config.isInteresting(u))
/** Adopt depth first search or not */
if(config.isDepthFirstSearch())
/** Insert the url in the head of the rolling list */
interestingURLsToDownload.add(0, url);
else
/** Insert it in the tail of the rolling list */
interestingURLsToDownload.add(url);
else if(config.isBoring(u))
if(config.isDepthFirstSearch())
boringURLsToDownload.add(0, url);
else
boringURLsToDownload.add(url);
else{
if(config.isDepthFirstSearch())
averageURLsToDownload.add(0, url);
else
averageURLsToDownload.add(url);
}
/** Insert it to the hashset */
urlsInQueue.add(u);
}
/** Add all urls of given collection into hashset */
public void queueURLs(Collection urls)
{
for(Iterator i = urls.iterator(); i.hasNext(); )
{
URLToDownload u2d = (URLToDownload) i.next();
queueURL(u2d);
}
}
/** Get url refering to its priority-level */
public URLToDownload getNextInQueue()
{
if(interestingURLsToDownload.size() > 0) {
return returnURLFrom(interestingURLsToDownload);
}
else if(averageURLsToDownload.size() > 0) {
return returnURLFrom(averageURLsToDownload);
}
else if(boringURLsToDownload.size() > 0){
return returnURLFrom(boringURLsToDownload);
}
else{
return null;
}
}
/** Delete returned url in the hashset */
private URLToDownload returnURLFrom(List urlList)
{
URLToDownload u2d = (URLToDownload) urlList.get(0);
urlList.remove(0);
urlsInQueue.remove(u2d.getURL());
return u2d;
}
/** Get the number of urls remaining downloaded */
public int size()
{
return interestingURLsToDownload.size() + averageURLsToDownload.size() + boringURLsToDownload.size();
} public String toString()
{
return size() + " URLs";
}
private final Config config;
/** DOWNLOAD URLS REFER TO ITS PRIORITY */
private List interestingURLsToDownload;
private List averageURLsToDownload;
private List boringURLsToDownload;
private Set urlsInQueue; } // End class DownloadQueue
其中的config 也实现了serializable
其中的config 也实现了serializable
好像没问题