现在的release版1.5.1对sheetName没有encoding的支持。dev版已经修正了这个bugUnicode support in sheetname patch--------------------------------------------------------------------------------From: patrickl Subject: Unicode support in sheetname patch Date: Sun, 07 Jul 2002 19:59:30 -0700 --------------------------------------------------------------------------------Hi allI have made some modifications on BoundSheetRecord.java (an ugly one though) to support unicode(Chinese in my case) in sheetname. Could somebody review it, please. I am willing to modify and/or refactor it.For read in unicode support, this patch extends protected void fillFields(byte [] data, short size, int offset) so that it will interpret BIFF8 structure as needed. It REUSE the SSTDeserializer.manufactureStrings () as it correctly interpret the BIFF8 structure.'setSheetname' also modified to set the field4_compressed_unicode_flag depending on whether sheetname is 16bit encoding string.To write out unicode string, public int serialize(int offset, byte [] data) is extended.Attached below is the code.Thanks Patrick Lee /** * lifted from SSTDeserializer */ private void arraycopy( byte[] src, int src_position, byte[] dst, int dst_position, int length ) { System.arraycopy( src, src_position, dst, dst_position, length ); } protected void fillFields(byte [] data, short size, int offset) { field_1_position_of_BOF = LittleEndian.getInt(data, 0 + offset); field_2_option_flags = LittleEndian.getShort(data, 4 + offset); field_3_sheetname_length = data[ 6 + offset ]; field_4_compressed_unicode_flag = data[ 7 + offset ]; //field_5_sheetname = new String(data, 8 + offset, // LittleEndian.ubyteToInt( field_3_sheetname_length)); BinaryTree tempBT = new BinaryTree(); SSTDeserializer deserializer; deserializer = new SSTDeserializer( tempBT); int length = LittleEndian.ubyteToInt( field_3_sheetname_length); // deserializer.dump(data,0,size); if ((field_4_compressed_unicode_flag & 0x01)==1) { byte [] newData = new byte[length*2 +3]; arraycopy(data,7+offset,newData,2,length*2+1); LittleEndian.putShort(newData,0,(short)data[6+offset]); // deserializer.dump(newData,0,length*2 +3); // System.out.println("calling manufactureStrings!"); deserializer.manufactureStrings(newData,0, (short)(length *2+3)); // System.out.println("returned from manufactureStrings!"); field_5_sheetname = ((UnicodeString)tempBT.get(new Integer(0))).getString(); tempBT=null; } else { field_5_sheetname = new String(data, 8 + offset, LittleEndian.ubyteToInt( field_3_sheetname_length)); } // System.out.println("f_5_sn is "+field_5_sheetname); } /** * Check if String use 16-bit encoding character * Lifted from SSTRecord.addString */ public boolean is16bitString(String string) { // scan for characters greater than 255 ... if any are // present, we have to use 16-bit encoding. Otherwise, we // can use 8-bit encoding boolean useUTF16 = false; int strlen = string.length(); for ( int j = 0; j < strlen; j++ ) { if ( string.charAt( j ) > 255 ) { useUTF16 = true; break; } } return useUTF16 ; } /** * Set the sheetname for this sheet. (this appears in the tabs at the bottom) * @param sheetname the name of the sheet */ public void setSheetname(String sheetname) { boolean is16bit = is16bitString(sheetname); setSheetnameLength((byte) sheetname.length() ); setCompressedUnicodeFlag((byte ) (is16bit?1:0)); field_5_sheetname = sheetname; } public int serialize(int offset, byte [] data) { LittleEndian.putShort(data, 0 + offset, sid); LittleEndian.putShort(data, 2 + offset, ( short ) (0x08 + getSheetnameLength()* (getCompressedUnicodeFlag()==0?1:2))); LittleEndian.putInt(data, 4 + offset, getPositionOfBof()); LittleEndian.putShort(data, 8 + offset, getOptionFlags()); data[ 10 + offset ] = getSheetnameLength(); data[ 11 + offset ] = getCompressedUnicodeFlag(); if (getCompressedUnicodeFlag()==0){ // we assume compressed unicode (bein the dern americans we are ; -p) StringUtil.putCompressedUnicode(getSheetname(), data, 12 + offset); } else { try { StringUtil.putUncompressedUnicode(getSheetname(), data, 12 + offset); // String unicodeString = new String(getSheetname().getBytes ("Unicode"),"Unicode"); // StringUtil.putUncompressedUnicode(unicodeString, data, 12 + offset); } catch (Exception e){ System.out.println("encoding exception in BoundSheetRecord.serialize!"); } } return getRecordSize(); }
Subject: Unicode support in sheetname patch
Date: Sun, 07 Jul 2002 19:59:30 -0700 --------------------------------------------------------------------------------Hi allI have made some modifications on BoundSheetRecord.java (an ugly one
though) to support unicode(Chinese in my case) in sheetname. Could
somebody review it, please. I am willing to modify and/or refactor it.For read in unicode support, this patch extends protected void
fillFields(byte [] data, short size, int offset) so that it will interpret
BIFF8 structure as needed. It REUSE the SSTDeserializer.manufactureStrings
() as it correctly interpret the BIFF8
structure.'setSheetname' also modified to set the field4_compressed_unicode_flag
depending on whether sheetname is 16bit encoding string.To write out unicode string, public int serialize(int offset, byte []
data) is extended.Attached below is the code.Thanks
Patrick Lee /**
* lifted from SSTDeserializer
*/ private void arraycopy( byte[] src, int src_position,
byte[] dst, int dst_position,
int length )
{
System.arraycopy( src, src_position, dst, dst_position, length );
}
protected void fillFields(byte [] data, short size, int offset)
{
field_1_position_of_BOF = LittleEndian.getInt(data,
0 + offset);
field_2_option_flags = LittleEndian.getShort(data,
4 + offset);
field_3_sheetname_length = data[ 6 + offset ];
field_4_compressed_unicode_flag = data[ 7 + offset ];
//field_5_sheetname = new String(data, 8 + offset,
// LittleEndian.ubyteToInt( field_3_sheetname_length));
BinaryTree tempBT = new BinaryTree();
SSTDeserializer deserializer;
deserializer = new SSTDeserializer( tempBT);
int length = LittleEndian.ubyteToInt( field_3_sheetname_length);
// deserializer.dump(data,0,size);
if ((field_4_compressed_unicode_flag & 0x01)==1) {
byte [] newData = new byte[length*2 +3];
arraycopy(data,7+offset,newData,2,length*2+1);
LittleEndian.putShort(newData,0,(short)data[6+offset]);
// deserializer.dump(newData,0,length*2 +3);
// System.out.println("calling manufactureStrings!");
deserializer.manufactureStrings(newData,0, (short)(length *2+3));
// System.out.println("returned from manufactureStrings!");
field_5_sheetname = ((UnicodeString)tempBT.get(new
Integer(0))).getString(); tempBT=null;
}
else {
field_5_sheetname = new String(data, 8 + offset,
LittleEndian.ubyteToInt( field_3_sheetname_length));
}
// System.out.println("f_5_sn is "+field_5_sheetname);
}
/**
* Check if String use 16-bit encoding character
* Lifted from SSTRecord.addString
*/ public boolean is16bitString(String string)
{
// scan for characters greater than 255 ... if any are
// present, we have to use 16-bit encoding. Otherwise, we
// can use 8-bit encoding
boolean useUTF16 = false;
int strlen = string.length(); for ( int j = 0; j < strlen; j++ )
{
if ( string.charAt( j ) > 255 )
{
useUTF16 = true;
break;
}
}
return useUTF16 ;
} /**
* Set the sheetname for this sheet. (this appears in the tabs at the
bottom)
* @param sheetname the name of the sheet
*/ public void setSheetname(String sheetname)
{
boolean is16bit = is16bitString(sheetname);
setSheetnameLength((byte) sheetname.length() );
setCompressedUnicodeFlag((byte ) (is16bit?1:0));
field_5_sheetname = sheetname; } public int serialize(int offset, byte [] data)
{
LittleEndian.putShort(data, 0 + offset, sid);
LittleEndian.putShort(data, 2 + offset,
( short ) (0x08 + getSheetnameLength()*
(getCompressedUnicodeFlag()==0?1:2)));
LittleEndian.putInt(data, 4 + offset, getPositionOfBof());
LittleEndian.putShort(data, 8 + offset, getOptionFlags());
data[ 10 + offset ] = getSheetnameLength();
data[ 11 + offset ] = getCompressedUnicodeFlag(); if (getCompressedUnicodeFlag()==0){
// we assume compressed unicode (bein the dern americans we are ;
-p)
StringUtil.putCompressedUnicode(getSheetname(), data, 12 +
offset);
}
else {
try {
StringUtil.putUncompressedUnicode(getSheetname(), data, 12 +
offset);
// String unicodeString = new String(getSheetname().getBytes
("Unicode"),"Unicode");
// StringUtil.putUncompressedUnicode(unicodeString, data, 12 +
offset);
}
catch (Exception e){
System.out.println("encoding exception in
BoundSheetRecord.serialize!");
}
}
return getRecordSize();
}