package wadihu.crawl;import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.SocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;/** 爬行类,专门负责网页的下载, 以非阻塞方式连接 */
public class CrawlOrder1 {
private boolean shutdown = false; // 用于控制Connector线程
private Selector selector; // 注册选择器
private Queue<Target> targetLists = new LinkedList<Target>(); // 任务队列
private Queue<Target> taskLists = new LinkedList<Target>(); // 待抓取任务对列
public CrawlOrder1() throws IOException {
selector = Selector.open(); // 打开选择器
RW rw = new RW();
rw.start();
System.out.println("读写线程已启动...");
receiveTarget(); // 用户提交URL任务输入
} /**用户输入URL请求 */
public void receiveTarget() throws IOException {
BufferedReader buf = new BufferedReader(new InputStreamReader(System.in));
String msg = null;
while((msg = buf.readLine()) != null) {
if(!msg.equals("bye")) {
Target target = new Target(msg);
addTarget(target);
}
else {
shutdown = true;
selector.wakeup();
System.out.println("系统已经停止");
break;
}
}
} /** 向任务队列添加任务
* @throws IOException */
public void addTarget(Target target) throws IOException {
synchronized (targetLists) {
targetLists.add(target);
}
selector.wakeup();
} /** 注册读写事件 */
public void registerRW() {
synchronized(targetLists) {
while(targetLists.size() > 0) {
Target target = targetLists.poll();
try {
target.socketChannel.register(selector, SelectionKey.OP_WRITE|SelectionKey.OP_READ, target);
} catch (ClosedChannelException e) {
e.printStackTrace();
}
}
}
} /** 读写就绪事件发生,处理读写的事件
* @throws IOException */
public void processSelectdRWKeys() throws IOException {
for (Iterator<?> it = selector.selectedKeys().iterator(); it.hasNext();) {
SelectionKey selectionKey = (SelectionKey) it.next();
it.remove();
SocketChannel socketChannel = (SocketChannel) selectionKey.channel();
if(selectionKey.isWritable()) {
String head = "GET / HTTP/1.1\r\nHOST:" + socketChannel.socket().getInetAddress().getHostName() + "\r\n" + "Accept:*/*\r\n" + "User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1;)\r\n\r\n";
ByteBuffer buffer = ByteBuffer.wrap(head.getBytes());
socketChannel.write(buffer);
socketChannel.register(selector, SelectionKey.OP_READ);
} else if(selectionKey.isReadable()) {
ByteBuffer buffer = ByteBuffer.allocate(1024);
int ret = socketChannel.read(buffer);
if (ret < 0) {
socketChannel.close();
selectionKey.cancel();
}
buffer.flip();
Charset ch = Charset.forName("gb2312");
System.out.println(ch.decode(buffer));
}
}
} /** 建立读写内部类 */
private class RW extends Thread {
public void run() {
while(!shutdown) {
try {
registerRW();
if(selector.select(500) > 0) {
processSelectdRWKeys();
}
} catch (ClosedChannelException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
selector.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) throws IOException {
new CrawlOrder1();
}
} /** 一项抓取任务,外部类 */
class Target {
SocketAddress address;
SocketChannel socketChannel;
public Target(String host) throws IOException {
address = new InetSocketAddress(InetAddress.getByName(host), 80);
this.socketChannel = SocketChannel.open(address);
this.socketChannel.configureBlocking(false);
}
}
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.SocketAddress;
import java.nio.ByteBuffer;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;/** 爬行类,专门负责网页的下载, 以非阻塞方式连接 */
public class CrawlOrder1 {
private boolean shutdown = false; // 用于控制Connector线程
private Selector selector; // 注册选择器
private Queue<Target> targetLists = new LinkedList<Target>(); // 任务队列
private Queue<Target> taskLists = new LinkedList<Target>(); // 待抓取任务对列
public CrawlOrder1() throws IOException {
selector = Selector.open(); // 打开选择器
RW rw = new RW();
rw.start();
System.out.println("读写线程已启动...");
receiveTarget(); // 用户提交URL任务输入
} /**用户输入URL请求 */
public void receiveTarget() throws IOException {
BufferedReader buf = new BufferedReader(new InputStreamReader(System.in));
String msg = null;
while((msg = buf.readLine()) != null) {
if(!msg.equals("bye")) {
Target target = new Target(msg);
addTarget(target);
}
else {
shutdown = true;
selector.wakeup();
System.out.println("系统已经停止");
break;
}
}
} /** 向任务队列添加任务
* @throws IOException */
public void addTarget(Target target) throws IOException {
synchronized (targetLists) {
targetLists.add(target);
}
selector.wakeup();
} /** 注册读写事件 */
public void registerRW() {
synchronized(targetLists) {
while(targetLists.size() > 0) {
Target target = targetLists.poll();
try {
target.socketChannel.register(selector, SelectionKey.OP_WRITE|SelectionKey.OP_READ, target);
} catch (ClosedChannelException e) {
e.printStackTrace();
}
}
}
} /** 读写就绪事件发生,处理读写的事件
* @throws IOException */
public void processSelectdRWKeys() throws IOException {
for (Iterator<?> it = selector.selectedKeys().iterator(); it.hasNext();) {
SelectionKey selectionKey = (SelectionKey) it.next();
it.remove();
SocketChannel socketChannel = (SocketChannel) selectionKey.channel();
if(selectionKey.isWritable()) {
String head = "GET / HTTP/1.1\r\nHOST:" + socketChannel.socket().getInetAddress().getHostName() + "\r\n" + "Accept:*/*\r\n" + "User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1;)\r\n\r\n";
ByteBuffer buffer = ByteBuffer.wrap(head.getBytes());
socketChannel.write(buffer);
socketChannel.register(selector, SelectionKey.OP_READ);
} else if(selectionKey.isReadable()) {
ByteBuffer buffer = ByteBuffer.allocate(1024);
int ret = socketChannel.read(buffer);
if (ret < 0) {
socketChannel.close();
selectionKey.cancel();
}
buffer.flip();
Charset ch = Charset.forName("gb2312");
System.out.println(ch.decode(buffer));
}
}
} /** 建立读写内部类 */
private class RW extends Thread {
public void run() {
while(!shutdown) {
try {
registerRW();
if(selector.select(500) > 0) {
processSelectdRWKeys();
}
} catch (ClosedChannelException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
selector.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) throws IOException {
new CrawlOrder1();
}
} /** 一项抓取任务,外部类 */
class Target {
SocketAddress address;
SocketChannel socketChannel;
public Target(String host) throws IOException {
address = new InetSocketAddress(InetAddress.getByName(host), 80);
this.socketChannel = SocketChannel.open(address);
this.socketChannel.configureBlocking(false);
}
}
解决方案 »
- jsp高手的来帮个忙啊
- 大家说说都知道的j2se中那些类实现了单例模式?
- 关注引用包的问题?
- frame为什么声明为 static 的?
- 会的近来!!
- 新手用 italic.addItemListener(this);出错
- 在JB7下能编译的文件,用JDK1.4.1为什么编译会出错了呢?
- 为什么我使用java bean的页面不能正常显示?大家帮忙呀!
- 技术探讨:代码表维护解决方案!
- 我是新手,大神来帮忙看看这段代码有什么问题吧!运行后无显示。
- JDBC问题,为什么我的TYPE_SCROLL_SENSITIVE和TYPE_SCROLL_INSENSITIVE不起效果?
- 我刚把jdk给装上了,但是里面的exe文件打开有问题
HTTP/1.1 301 Moved Permanently
Server: nginx/0.5.23
Date: Sat, 05 Dec 2009 04:30:10 GMT
Content-Type: text/html
Content-Length: 185
Connection: keep-alive
Location: http://hbdns4.cncmax.cn:8080?HOST=http&R=/& <html>
<head><title>301 Moved Permanently</title></head>
<body bgcolor="white">
<center><h1>301 Moved Permanently</h1></center>
<hr><center>nginx/0.5.23</center>
</body>
</html> 请问我该怎么处理呢,一天了还没搞好,很多网页都是这个问题,谢谢大家,没太多分了
这个就不行了,不知道是不是哪里写错了,望熟悉socket的朋友帮忙分析下,感激不尽