readLine的问题

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;public class RemoveHTMLReader extends FilterReader{
public RemoveHTMLReader(Reader in){
super(in);
}

boolean intag = false; //标注是否有标签

public int read(char[] buf, int off, int len) throws IOException{
int numchars = 0;

while(numchars == 0){
numchars = in.read(buf, off ,len);
if(numchars == -1)
return -1;

int last = off;
for(int i = off; i < off + numchars; i++){
if(!intag){
if(buf[i] == '<')
intag = true;
else
buf[last++] = buf[i];
}
else{
if(buf[i] == '>')
intag = false;
}
}
numchars = last - off;
}
return numchars;
}

public int read() throws IOException{
char buf[] = new char[1];
int result = in.read(buf, 0, 1);
if(result == -1)
return -1;
return (int)buf[0];
}

public static class Test{
public static void main(String args[]){
try{
if(args.length != 1)
throw new IllegalArgumentException("Wrong number of args");
BufferedReader in = new BufferedReader(new RemoveHTMLReader(new FileReader(args[0])));
String line;
while((line = in.readLine()) != null)
System.out.println(line);
in.close();
}catch(Exception e){
System.err.println(e);
}
}
}
}
这个程序就是要实现打印出去掉html标记后的文件内容，这里的RemoveHTMLReader覆盖了抽象类的FilterReader的2个read方法，完后我有一点不明白，就是什么时候调用了这2个read方法？

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

我还有个问题，在程序的read方法中的for循环for(int i = off; i < off + numchars; i++){
                if(!intag){
                    if(buf[i] == '<')
                        intag = true;
                    else
                        buf[last++] = buf[i];
                }
                else{
                    if(buf[i] == '>')
                        intag = false;
                }
            }这段就是判断如果不是Html标记，就赋值，是Html标记，就跳过，但我觉得这样赋值完后，可能还会残留html标记啊，比如说，文件中的内容为：
<html>aa</html>
那么执行完后，buf里的内容就成为了atml</html>（假设read参数off为0，而len为文件中内容的长度），不知我理解的是否有问题
for(int i = off; i < off + numchars; i++){
                if(!intag){
                    if(buf[i] == '<')
                        intag = true;
                    else
                        buf[last++] = buf[i];
                }
                else{
                    if(buf[i] == '>')
                        intag = false;
                }
什么时候调用了这2个read方法看下 BufferedReader中readline方法，跟踪下就知道了
public String readLine() throws IOException {
        return readLine(false);
    }
上面是BufferedReader中的readline方法
跳转到以下方法String readLine(boolean ignoreLF) throws IOException {
StringBuffer s = null;
int startChar;        synchronized (lock) {
            ensureOpen();
    boolean omitLF = ignoreLF || skipLF; bufferLoop:
    for (;;) { if (nextChar >= nChars)
    fill();
if (nextChar >= nChars) { /* EOF */
    if (s != null && s.length() > 0)
return s.toString();
    else
return null;
}
boolean eol = false;
char c = 0;
int i;                /* Skip a leftover '\n', if necessary */
if (omitLF && (cb[nextChar] == '\n'))
                    nextChar++;
skipLF = false;
omitLF = false;     charLoop:
for (i = nextChar; i < nChars; i++) {
    c = cb[i];
    if ((c == '\n') || (c == '\r')) {
eol = true;
break charLoop;
    }
} startChar = nextChar;
nextChar = i; if (eol) {
    String str;
    if (s == null) {
str = new String(cb, startChar, i - startChar);
    } else {
s.append(cb, startChar, i - startChar);
str = s.toString();
    }
    nextChar++;
    if (c == '\r') {
skipLF = true;
    }
    return str;
}

if (s == null)
    s = new StringBuffer(defaultExpectedLineLength);
s.append(cb, startChar, i - startChar);
    }
        }
    }fill方法如下 private void fill() throws IOException {
int dst;
if (edChar <= UNMARKED) {
    /* No  */
    dst = 0;
} else {
    /* Marked */
    int delta = nextChar - edChar;
    if (delta >= readAheadLimit) {
/* Gone past read-ahead limit: Invalidate  */
edChar = INVALIDATED;
readAheadLimit = 0;
dst = 0;
    } else {
if (readAheadLimit <= cb.length) {
    /* Shuffle in the current buffer */
    System.arraycopy(cb, edChar, cb, 0, delta);
    edChar = 0;
    dst = delta;
} else {
    /* Reallocate buffer to accommodate read-ahead limit */
    char ncb[] = new char[readAheadLimit];
    System.arraycopy(cb, edChar, ncb, 0, delta);
    cb = ncb;
    edChar = 0;
    dst = delta;
}
                nextChar = nChars = delta;
    }
} int n;
do {
    n = in.read(cb, dst, cb.length - dst);
} while (n == 0);
if (n > 0) {
    nChars = dst + n;
    nextChar = dst;
}
    }上面in.read(cb,dst,cb.length-dst)就是你复写的那方法了
至于另外一个方法没有调用到
这段就是判断如果不是Html标记，就赋值，是Html标记，就跳过，但我觉得这样赋值完后，可能还会残留html标记啊，比如说，文件中的内容为：
<html>aa</html>
那么执行完后，buf里的内容就成为了atml</html>（假设read参数off为0，而len为文件中内容的长度），不知我理解的是否有问程序应该没什么问题啊，建议楼主debug下
我想问我理解的对不对，是不是清除不干净html标记，因为至始至终都在同一个buf里赋值