import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;public class BookChecker { public static void main(final java.lang.String[] args) throws Exception {
BlockingQueue<URL> urls = new LinkedBlockingQueue<URL>();
String [] d=readFileByLines("C:/Documents and Settings/Owner/桌面/xx.txt");
for (int i=1;i<d.length-2;i++){
if(d[i]!=null){
urls.put(new URL(d[i]));
}else{
break;
}
}
for(int i=0;i<100;i++)
new Thread( new CheckURLWorker(Thread.currentThread().getName(),urls)).start();
}
public static String[] readFileByLines(String fileName){
File file = new File(fileName);
String [] kk=new String[50000];
BufferedReader reader = null;
try {
// System.out.println("以行为单位读取文件内容,一次读一整行:");
reader = new BufferedReader(new FileReader(file));
String tempString = null;
int line = 1;
//一次读入一行,直到读入null为文件结束
while ((tempString = reader.readLine()) != null){
//显示行号
// System.out.println("line " + line + ": " + tempString);
kk[line]=tempString;
line++;
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null){
try {
reader.close();
} catch (IOException e1) {
}
}
}
return kk;} private static class CheckURLWorker implements Runnable {
private BlockingQueue<URL> queue;
private String name;
public CheckURLWorker(String name,BlockingQueue<URL> q){
this.name = name;
this.queue = q;
} @Override public void run() {
for (URL url = queue.poll(); url != null; url = queue.poll()) {
try{
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setConnectTimeout(5000);
connection.setRequestMethod("GET");
connection.connect();
int code = connection.getResponseCode();
connection.disconnect();
System.out.printf("[%s]%s:%d%n",name,url.toString(),code);
} catch (SocketTimeoutException e) {
System.out.printf("[%s]%s:%d%n",name,url.toString(),-1);
} catch (IOException e) {
System.err.println(e);
}
}
}
}
}
读5000多条记录时 有很多次程序都不会结束
for (int i=1;i<d.length-2;i++){
if(d[i]!=null){
urls.put(new URL(d[i]));
和这里 int line = 1;
//一次读入一行,直到读入null为文件结束
数组的下标都是从0开始的还有
for (URL url = queue.poll(); url != null; url = queue.poll()) {
这段是什么意思 简直就是垃圾 而且 对于一个url的赋值还搞两次。
你的这里相当于开启100个线程来处理
for(int i=0;i<100;i++)
new Thread( new CheckURLWorker(Thread.currentThread().getName(),urls)).start();难道没考虑过会不会产生问题么对lz非常无语
2楼的我对你无语,知道Queue.poll()是什么方法吗?
我笑了。
大概修改了一下,跑了一遍,没有发生卡死
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.io.PrintStream;
import java.io.FileOutputStream;public class BookChecker {
public static void main(final java.lang.String[] args) throws Exception {
BlockingQueue<URL> urls = new LinkedBlockingQueue<URL>();
String inFile = "C:/Documents and Settings/Owner/桌面/xx.txt";
/*
//测试用数据
String inFile = "test.txt";
PrintStream ps = new PrintStream(new FileOutputStream(inFile));
for (int i=0; i<5000; i++) {
ps.println("http://www.csdn.net");
}
ps.close();
*/
String [] d=readFileByLines(inFile);
for (String s : d){
urls.put(new URL(s));
}
for(int i=0;i<100;i++)
new Thread( new CheckURLWorker("thread-" + i,urls)).start();
}
public static String[] readFileByLines(String fileName){
File file = new File(fileName);
List<String> list = new ArrayList<String>();
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(file));
String tempString = null;
while ((tempString = reader.readLine()) != null){
list.add(tempString);
}
reader.close();
reader = null;
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null){
try {
reader.close();
} catch (IOException e1) {
}
}
}
return list.toArray(new String[0]);
} private static class CheckURLWorker implements Runnable {
private BlockingQueue<URL> queue;
private String name;
public CheckURLWorker(String name,BlockingQueue<URL> q){
this.name = name;
this.queue = q;
} @Override public void run() {
while (queue.size() > 0) {
URL url = queue.poll();
if (url == null) break;
try{
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setConnectTimeout(5000);
connection.setRequestMethod("GET");
connection.connect();
int code = connection.getResponseCode();
connection.disconnect();
System.out.printf("[%s]%s:%d%n",name,url.toString(),code);
} catch (SocketTimeoutException e) {
System.out.printf("[%s]%s:%d%n",name,url.toString(),-1);
} catch (IOException e) {
System.err.println(e);
}
}
}
}
}
1.要从服务器的角度思考下你的代码 你的.txt文件保存的是url吧,无论这个文件里的url是否有多个url指定为同一服务器,我看到你的代码里new了多个线程去实现连接并读取,那么在极短的时间里至少向同一服务器发起了至少100次连接吧。如果同一IP频繁连请求同一服务器,会被服务器拒绝。可能你要说,我并无恶意,只是想试试看我的代码。2.获取的影响码可能并无意义 假如服务器发现某一IP频繁连接,通过重定向给你发送一个页面,这个页面并不是你期望的页面,例如这个页面要求你输入验证码。那么你new的n个线程发起的N个连接可能得到的是同一个重定向后的页面。我说得还是比较温和的拒绝方式,另一种比较温和的拒绝方式可能是这样的,服务器并不立即响应你的连接请求,而是把你的请求放入一个队列延时处理。一般来说,服务器不太喜欢恶意搜索。原因很多。
2.参考qybao的了。