/** * Reads from the * stream <code>in</code> a representation * of a Unicode character string encoded in * <a href="DataInput.html#modified-utf-8">modified UTF-8</a> format; * this string of characters is then returned as a <code>String</code>. * The details of the modified UTF-8 representation * are exactly the same as for the <code>readUTF</code> * method of <code>DataInput</code>. * * @param in a data input stream. * @return a Unicode string. * @exception EOFException if the input stream reaches the end * before all the bytes. * @exception IOException the stream has been closed and the contained * input stream does not support reading after close, or * another I/O error occurs. * @exception UTFDataFormatException if the bytes do not represent a * valid modified UTF-8 encoding of a Unicode string. * @see java.io.DataInputStream#readUnsignedShort() */ public final static String readUTF(DataInput in) throws IOException { int utflen = in.readUnsignedShort(); byte[] bytearr = null; char[] chararr = null; if (in instanceof DataInputStream) { DataInputStream dis = (DataInputStream)in; if (dis.bytearr.length < utflen){ dis.bytearr = new byte[utflen*2]; dis.chararr = new char[utflen*2]; } chararr = dis.chararr; bytearr = dis.bytearr; } else { bytearr = new byte[utflen]; chararr = new char[utflen]; } int c, char2, char3; int count = 0; int chararr_count=0; in.readFully(bytearr, 0, utflen); while (count < utflen) { c = (int) bytearr[count] & 0xff; if (c > 127) break; count++; chararr[chararr_count++]=(char)c; } while (count < utflen) { c = (int) bytearr[count] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 0xxxxxxx*/ count++; chararr[chararr_count++]=(char)c; break; case 12: case 13: /* 110x xxxx 10xx xxxx*/ count += 2; if (count > utflen) throw new UTFDataFormatException( "malformed input: partial character at end"); char2 = (int) bytearr[count-1]; if ((char2 & 0xC0) != 0x80) throw new UTFDataFormatException( "malformed input around byte " + count); chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > utflen) throw new UTFDataFormatException( "malformed input: partial character at end"); char2 = (int) bytearr[count-2]; char3 = (int) bytearr[count-1]; if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) throw new UTFDataFormatException( "malformed input around byte " + (count-1)); chararr[chararr_count++]=(char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; default: /* 10xx xxxx, 1111 xxxx */ throw new UTFDataFormatException( "malformed input around byte " + count); } } // The number of chars produced may be less than utflen return new String(chararr, 0, chararr_count); }这是源码中readUTF最终掉的方法,LZ自己看看就明白了
/**
* Reads from the
* stream <code>in</code> a representation
* of a Unicode character string encoded in
* <a href="DataInput.html#modified-utf-8">modified UTF-8</a> format;
* this string of characters is then returned as a <code>String</code>.
* The details of the modified UTF-8 representation
* are exactly the same as for the <code>readUTF</code>
* method of <code>DataInput</code>.
*
* @param in a data input stream.
* @return a Unicode string.
* @exception EOFException if the input stream reaches the end
* before all the bytes.
* @exception IOException the stream has been closed and the contained
* input stream does not support reading after close, or
* another I/O error occurs.
* @exception UTFDataFormatException if the bytes do not represent a
* valid modified UTF-8 encoding of a Unicode string.
* @see java.io.DataInputStream#readUnsignedShort()
*/
public final static String readUTF(DataInput in) throws IOException {
int utflen = in.readUnsignedShort();
byte[] bytearr = null;
char[] chararr = null;
if (in instanceof DataInputStream) {
DataInputStream dis = (DataInputStream)in;
if (dis.bytearr.length < utflen){
dis.bytearr = new byte[utflen*2];
dis.chararr = new char[utflen*2];
}
chararr = dis.chararr;
bytearr = dis.bytearr;
} else {
bytearr = new byte[utflen];
chararr = new char[utflen];
} int c, char2, char3;
int count = 0;
int chararr_count=0; in.readFully(bytearr, 0, utflen); while (count < utflen) {
c = (int) bytearr[count] & 0xff;
if (c > 127) break;
count++;
chararr[chararr_count++]=(char)c;
} while (count < utflen) {
c = (int) bytearr[count] & 0xff;
switch (c >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
/* 0xxxxxxx*/
count++;
chararr[chararr_count++]=(char)c;
break;
case 12: case 13:
/* 110x xxxx 10xx xxxx*/
count += 2;
if (count > utflen)
throw new UTFDataFormatException(
"malformed input: partial character at end");
char2 = (int) bytearr[count-1];
if ((char2 & 0xC0) != 0x80)
throw new UTFDataFormatException(
"malformed input around byte " + count);
chararr[chararr_count++]=(char)(((c & 0x1F) << 6) |
(char2 & 0x3F));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (count > utflen)
throw new UTFDataFormatException(
"malformed input: partial character at end");
char2 = (int) bytearr[count-2];
char3 = (int) bytearr[count-1];
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
throw new UTFDataFormatException(
"malformed input around byte " + (count-1));
chararr[chararr_count++]=(char)(((c & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F) << 0));
break;
default:
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException(
"malformed input around byte " + count);
}
}
// The number of chars produced may be less than utflen
return new String(chararr, 0, chararr_count);
}这是源码中readUTF最终掉的方法,LZ自己看看就明白了
public final static String readUTF(DataInput in) throws IOException {
int utflen = in.readUnsignedShort();
byte[] bytearr = null;
char[] chararr = null;
if (in instanceof DataInputStream) {
DataInputStream dis = (DataInputStream)in;
if (dis.bytearr.length < utflen){
dis.bytearr = new byte[utflen*2];
dis.chararr = new char[utflen*2];
}
chararr = dis.chararr;
bytearr = dis.bytearr;
} else {
bytearr = new byte[utflen];
chararr = new char[utflen];
} int c, char2, char3;
int count = 0;
int chararr_count=0; in.readFully(bytearr, 0, utflen); while (count < utflen) {
c = (int) bytearr[count] & 0xff;
if (c > 127) break;
count++;
chararr[chararr_count++]=(char)c;
} while (count < utflen) {
c = (int) bytearr[count] & 0xff;
switch (c >> 4) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
/* 0xxxxxxx*/
count++;
chararr[chararr_count++]=(char)c;
break;
case 12: case 13:
/* 110x xxxx 10xx xxxx*/
count += 2;
if (count > utflen)
throw new UTFDataFormatException(
"malformed input: partial character at end");
char2 = (int) bytearr[count-1];
if ((char2 & 0xC0) != 0x80)
throw new UTFDataFormatException(
"malformed input around byte " + count);
chararr[chararr_count++]=(char)(((c & 0x1F) << 6) |
(char2 & 0x3F));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (count > utflen)
throw new UTFDataFormatException(
"malformed input: partial character at end");
char2 = (int) bytearr[count-2];
char3 = (int) bytearr[count-1];
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
throw new UTFDataFormatException(
"malformed input around byte " + (count-1));
chararr[chararr_count++]=(char)(((c & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F) << 0));
break;
default:
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException(
"malformed input around byte " + count);
}
}
// The number of chars produced may be less than utflen
return new String(chararr, 0, chararr_count);
}
首先读取两个字节,并使用它们构造一个无符号 16 位整数,构造方式与 readUnsignedShort 方法的方式完全相同。该整数值被称为 UTF 长度,它指定要读取的额外字节数。然后成组地将这些字节转换为字符。每组的长度根据该组第一个字节的值计算。紧跟在某个组后面的字节(如果有)是下一组的第一个字节。
诚心希望各位Coder百尺竿头更进一步!!!!!!
tb = data_In.readUTF();
System.out.println(tb);
carId.setString(tb);
System.out.println(carId.getString());
为什么tb什么也没有?输出语句木有反应