Option Explicit
Dim comped$, comping As String * 1, comping_p%, bit_p&
Dim ebitmask(1 To 8) As Integer ' 存储掩码的数组
Sub Command1_Click ()
Dim ibuf$, obuf1$, obuf2$
MousePointer = 11
' 设置掩码
ebitmask(1) = 128: ebitmask(2) = 64: ebitmask(3) = 32: ebitmask(4) = 16
ebitmask(5) = 8: ebitmask(6) = 4: ebitmask(7) = 2: ebitmask(8) = 1
Open "d:\temp\compress\theory.txt" For Input As #1 ' 压缩前的源文件
Open "d:\temp\compress\theory.6bt" For Output As #2 ' 压缩后的文件
Open "d:\temp\compress\theory_2.txt" For Output As #3 ' 解压后的文件
Do While Not EOF(1)
Line Input #1, ibuf
obuf1 = DoCompress(ibuf)
Print #2, obuf1
obuf2 = UnDoCompress(obuf1)
Print #3, obuf2
Loop
Close #1, #2, #3
MousePointer = 0
End Sub
Function DoCompress (in_buf$) As String ' 对输入的字符串进行压缩
Dim i&, buf_len&, c As String * 1
comped = "": comping = Chr(0): comping_p = 0
buf_len = Len(in_buf)
If buf_len > 0 Then
For i = 1 To buf_len
c = Mid(in_buf, i, 1)
Select Case c
Case " ", "A" To "Z", "a" To "z"
putbits 0, c ' 第一组中的字符
Case "!" To "/", ":" To "@", "[" To "`", "{" To "~", Chr(1) To Chr(31)
putbits 1, c ' 第二组中的字符
Case Else
putbits 2, c ' 其它字符
End Select
Next i
putbits 3, Chr(0)
End If
DoCompress = comped
End Function
Sub putbits (flag%, cc$) ' 压缩冗余的比特位(bits)
Dim i%, c As String * 1
c = cc
Select Case flag
Case 0 '对第一组中的字符内码进行重新定位
Select Case c
Case " "
c = Chr(1)
Case "0" To "9"
c = Chr(Asc(c) - 46)
Case "A" To "Z"
c = Chr(Asc(c) - 53)
Case "a" To "z"
c = Chr(Asc(c) - 59)
End Select
Case 1 '对第二组中的字符内码进行重新定位
Select Case c
Case "!" To "/"
c = Chr(Asc(c) - 32)
Case ":" To "@"
c = Chr(Asc(c) - 42)
Case "[" To "`"
c = Chr(Asc(c) - 68)
Case "{" To "~"
c = Chr(Asc(c) - 94)
Case Chr(1) To Chr(31)
c = Chr(Asc(c) + 32)
End Select
For i = 1 To 6
putbit 0
Next i
Case 2
For i = 1 To 12
putbit 0
Next i
For i = 1 To 8
If (Asc(c) And ebitmask(i)) <> 0 Then
putbit 1
Else
putbit 0
End If
Next i
Case 3
For i = comping_p + 1 To 9
putbit 0
Next i
End Select
If flag < 2 Then
For i = 1 To 6
If (Asc(c) And ebitmask(i + 2)) <> 0 Then
putbit 1
Else
putbit 0
End If
Next i
End If
End Sub
Sub putbit (bit%) ' 设置比特位(bit)
comping_p = comping_p + 1
If comping_p > 8 Then
comped = comped + comping
comping = Chr(0)
comping_p = 1
End If
If bit = 1 Then
comping = Chr(Asc(comping) Or ebitmask(comping_p))
End If
End Sub
Function UnDoCompress (in_buf$) As String ' 对输入的字符串进行解压缩
Dim bits_buf%, out_buf$, c As String * 1, comped_len&
comped = in_buf: bit_p = 1: comped_len = Len(comped) * 8
Do While bit_p <= comped_len
If comped_len - bit_p < 5 Then
Exit Do
End If
bits_buf = getbits(6)
If bits_buf <> 0 Then ' 根据控制字符判断字符的组别
Select Case bits_buf
Case 1
c = " "
Case 2 To 11 ' "0" To "9"
c = Chr(bits_buf + 46)
Case 12 To 37 ' "A" To "Z"
c = Chr(bits_buf + 53)
Case 38 To 63 ' "a" To "z"
c = Chr(bits_buf + 59)
End Select
out_buf = out_buf + c
Else
If bit_p > comped_len Then
Exit Do
End If
bits_buf = getbits(6)
If bits_buf <> 0 Then
Select Case bits_buf
Case 1 To 15 ' "!" To "/"
c = Chr(bits_buf + 32)
Case 16 To 22 ' ":" To "@"
c = Chr(bits_buf + 42)
Case 23 To 28 ' "[" To "`"
c = Chr(bits_buf + 68)
Case 29 To 32 ' "{" To "~"
c = Chr(bits_buf + 94)
Case 33 To 63 ' Chr(1) To Chr(31)
c = Chr(bits_buf - 32)
End Select
out_buf = out_buf + c
Else
bits_buf = getbits(8)
out_buf = out_buf + Chr(bits_buf)
End If
End If
Loop
UnDoCompress = out_buf
End Function
Function getbits (numb As Integer) As Integer ' 获取比特位(bit)
Dim byte_p&, bit_o%, byte1 As String * 1, i%, j%, k%, m%
byte_p = (bit_p \ 8) + 1: bit_o = bit_p Mod 8
byte1 = Mid(comped, byte_p, 1): j = 0: k = 0
For i = bit_o To 8
k = k + 1
If k > numb Then
Exit For
End If
If (Asc(byte1) And ebitmask(i)) <> 0 Then
j = j Or ebitmask(IIf(numb = 6, k + 2, k))
End If
Next i
If k < numb Then
byte1 = Mid(comped, byte_p + 1, 1): m = numb - k
For i = 1 To m
k = k + 1
If (Asc(byte1) And ebitmask(i)) <> 0 Then
j = j Or ebitmask(IIf(numb = 6, k + 2, k))
End If
Next i
End If
bit_p = bit_p + numb
getbits = j
End Function
Sub Command2_Click ()
End
End Sub
这是一个关于压缩的问题,大侠给我说一下putbit()和 putbits()这两个函数具体是怎么执行,说一下他的运行过程啊
Dim comped$, comping As String * 1, comping_p%, bit_p&
Dim ebitmask(1 To 8) As Integer ' 存储掩码的数组
Sub Command1_Click ()
Dim ibuf$, obuf1$, obuf2$
MousePointer = 11
' 设置掩码
ebitmask(1) = 128: ebitmask(2) = 64: ebitmask(3) = 32: ebitmask(4) = 16
ebitmask(5) = 8: ebitmask(6) = 4: ebitmask(7) = 2: ebitmask(8) = 1
Open "d:\temp\compress\theory.txt" For Input As #1 ' 压缩前的源文件
Open "d:\temp\compress\theory.6bt" For Output As #2 ' 压缩后的文件
Open "d:\temp\compress\theory_2.txt" For Output As #3 ' 解压后的文件
Do While Not EOF(1)
Line Input #1, ibuf
obuf1 = DoCompress(ibuf)
Print #2, obuf1
obuf2 = UnDoCompress(obuf1)
Print #3, obuf2
Loop
Close #1, #2, #3
MousePointer = 0
End Sub
Function DoCompress (in_buf$) As String ' 对输入的字符串进行压缩
Dim i&, buf_len&, c As String * 1
comped = "": comping = Chr(0): comping_p = 0
buf_len = Len(in_buf)
If buf_len > 0 Then
For i = 1 To buf_len
c = Mid(in_buf, i, 1)
Select Case c
Case " ", "A" To "Z", "a" To "z"
putbits 0, c ' 第一组中的字符
Case "!" To "/", ":" To "@", "[" To "`", "{" To "~", Chr(1) To Chr(31)
putbits 1, c ' 第二组中的字符
Case Else
putbits 2, c ' 其它字符
End Select
Next i
putbits 3, Chr(0)
End If
DoCompress = comped
End Function
Sub putbits (flag%, cc$) ' 压缩冗余的比特位(bits)
Dim i%, c As String * 1
c = cc
Select Case flag
Case 0 '对第一组中的字符内码进行重新定位
Select Case c
Case " "
c = Chr(1)
Case "0" To "9"
c = Chr(Asc(c) - 46)
Case "A" To "Z"
c = Chr(Asc(c) - 53)
Case "a" To "z"
c = Chr(Asc(c) - 59)
End Select
Case 1 '对第二组中的字符内码进行重新定位
Select Case c
Case "!" To "/"
c = Chr(Asc(c) - 32)
Case ":" To "@"
c = Chr(Asc(c) - 42)
Case "[" To "`"
c = Chr(Asc(c) - 68)
Case "{" To "~"
c = Chr(Asc(c) - 94)
Case Chr(1) To Chr(31)
c = Chr(Asc(c) + 32)
End Select
For i = 1 To 6
putbit 0
Next i
Case 2
For i = 1 To 12
putbit 0
Next i
For i = 1 To 8
If (Asc(c) And ebitmask(i)) <> 0 Then
putbit 1
Else
putbit 0
End If
Next i
Case 3
For i = comping_p + 1 To 9
putbit 0
Next i
End Select
If flag < 2 Then
For i = 1 To 6
If (Asc(c) And ebitmask(i + 2)) <> 0 Then
putbit 1
Else
putbit 0
End If
Next i
End If
End Sub
Sub putbit (bit%) ' 设置比特位(bit)
comping_p = comping_p + 1
If comping_p > 8 Then
comped = comped + comping
comping = Chr(0)
comping_p = 1
End If
If bit = 1 Then
comping = Chr(Asc(comping) Or ebitmask(comping_p))
End If
End Sub
Function UnDoCompress (in_buf$) As String ' 对输入的字符串进行解压缩
Dim bits_buf%, out_buf$, c As String * 1, comped_len&
comped = in_buf: bit_p = 1: comped_len = Len(comped) * 8
Do While bit_p <= comped_len
If comped_len - bit_p < 5 Then
Exit Do
End If
bits_buf = getbits(6)
If bits_buf <> 0 Then ' 根据控制字符判断字符的组别
Select Case bits_buf
Case 1
c = " "
Case 2 To 11 ' "0" To "9"
c = Chr(bits_buf + 46)
Case 12 To 37 ' "A" To "Z"
c = Chr(bits_buf + 53)
Case 38 To 63 ' "a" To "z"
c = Chr(bits_buf + 59)
End Select
out_buf = out_buf + c
Else
If bit_p > comped_len Then
Exit Do
End If
bits_buf = getbits(6)
If bits_buf <> 0 Then
Select Case bits_buf
Case 1 To 15 ' "!" To "/"
c = Chr(bits_buf + 32)
Case 16 To 22 ' ":" To "@"
c = Chr(bits_buf + 42)
Case 23 To 28 ' "[" To "`"
c = Chr(bits_buf + 68)
Case 29 To 32 ' "{" To "~"
c = Chr(bits_buf + 94)
Case 33 To 63 ' Chr(1) To Chr(31)
c = Chr(bits_buf - 32)
End Select
out_buf = out_buf + c
Else
bits_buf = getbits(8)
out_buf = out_buf + Chr(bits_buf)
End If
End If
Loop
UnDoCompress = out_buf
End Function
Function getbits (numb As Integer) As Integer ' 获取比特位(bit)
Dim byte_p&, bit_o%, byte1 As String * 1, i%, j%, k%, m%
byte_p = (bit_p \ 8) + 1: bit_o = bit_p Mod 8
byte1 = Mid(comped, byte_p, 1): j = 0: k = 0
For i = bit_o To 8
k = k + 1
If k > numb Then
Exit For
End If
If (Asc(byte1) And ebitmask(i)) <> 0 Then
j = j Or ebitmask(IIf(numb = 6, k + 2, k))
End If
Next i
If k < numb Then
byte1 = Mid(comped, byte_p + 1, 1): m = numb - k
For i = 1 To m
k = k + 1
If (Asc(byte1) And ebitmask(i)) <> 0 Then
j = j Or ebitmask(IIf(numb = 6, k + 2, k))
End If
Next i
End If
bit_p = bit_p + numb
getbits = j
End Function
Sub Command2_Click ()
End
End Sub
这是一个关于压缩的问题,大侠给我说一下putbit()和 putbits()这两个函数具体是怎么执行,说一下他的运行过程啊
一个小巧的数据库压缩算法
是这样一件尴尬的事促使我们寻找一种压缩算法:我们刚刚制作完成的欧洲十五国进出口商数据库及其检索系统要占用700兆空间,它刚好放不到一张光盘上去!
常用的压缩工具(如WinZip或ARJ)现在也不起作用,因为我们并不是对整个数据库进行压缩(那样光盘上的数据将无法检索),而只是要将数据库中的某些字段的内容压缩后存入库中——以减少整个数据库占用的空间——然后在使用中动态解压将数据还原为本来面目。
最理想的办法当然是数据库系统(DBMS)本身直接支持数据压缩存储,但令人遗憾的是:常见的DBMS均未提供该功能。
在互联网上确实能找到一些有关数据压缩的思想、算法甚至C语言的部分源代码,但它们大都过于复杂,或应用范围有限(如仅对图像或纯数字数据有效),或是在版权方面有太苛刻的要求。
最后我们采用了自行设计的算法。该方法的压缩率只有20%至25%,但它小巧、容易实现,在实际应用中取得了良好的效果。一、算法概述
压缩冗余的比特位(bit)是常见的压缩思想之一。
例如,字符串CCW-2000的二进制表示为:
01000011,01000011,01010111,00101101,00110010,00110000,00110000,00110000
其中每个二进制的前导“0”是冗余的,去掉前导“0”后的表示为:
1000011,1000011,1010111,101101,110010,110000,110000,110000
这显然达到了数据压缩的目的,但同时也带来一个很大的问题:二进制流仅仅由“0”和“1”组成,并不存在上面为了表述清晰而加入的“,”。即:由于压缩后的二进制不再是“定长”的,两个二进制之间如何“划界”成了难题。
常见的解决方案有:霍夫曼表(Huffmancodes)、LZW的动态查询表、Markov的多维长度字典表,以及根据前值和前长度变换应用规则的DAKX方法等。但这些方法大都过于繁杂或未公布技术细节而难以实现。
为了实现起来简捷,避免用“宰牛刀”,我们设计的算法采用一种折衷的思想:用“缩短”了的“定长”二进制来实现压缩,同时又避免了“划界”的难题。具体思路是:
(1)所有字符的二进制长度均为6个bit而不是一般的8个bit。即每个字符节约2个bit。这决定了本方法的理想(最高)压缩率为25%。
(2)由于6个bit仅能区分64种字符,故将内码小于127的字符分为“常用”和“不常用”两组。
(3)第一组由英文字母(大小写)、0至9的数字、空格以及一个控制字符
组成;第二组由内码小于127的其它字符组成。
(4)每组内的字符均重新编码,以使其“新内码”均小于64。
(5)第二组的每个字符前均以第一组中的控制字符做前缀。
(6)内码大于等于127的字符用普通的8-bit表示,前缀是两个连续的第一组中的控制字符。
(7)本方法显然仅适用于西文文本。
(8)压缩后的数据相当于被加密了,这大大提高了信息安全性。二、算法实现
我们制作的西欧进出口商数据库中有大量大段的企业英文描述,故应用上述算法非常成功,如愿以偿地将整个系统的大小降到了600兆以下,可以轻松地装入一张光盘中。
检索系统用VB编程,数据库用的是MS-Access。
后附程序为清晰起见,将压缩数据库的功能改为压缩文本文件的功能。该程序已在机器上测试通过,可作为一个压缩工具单独运行(为了同时说明压缩及解压缩这两个函数的使用,下面的程序会将刚被压缩的文件再进行解压缩,解压后文件与源文件是完全一样的)。附完整的VB源代码:OptionExplicit
Dimcomped$,compingAsString*1,comping_p%,bit_p&
Dimebitmask(1To8)AsInteger’存储掩码的数组
SubCommand1_Click()
Dimibuf$,obuf1$,obuf2$
MousePointer=11
’设置掩码
ebitmask(1)=128:ebitmask(2)=64:ebitmask(3)=32:ebitmask(4)=16
ebitmask(5)=8:ebitmask(6)=4:ebitmask(7)=2:ebitmask(8)=1
Open“d:\temp\compress\theory.txt“ForInputAs#1’压缩前的源文件
Open“d:\temp\compress\theory.6bt“ForOutputAs#2’压缩后的文件
Open“d:\temp\compress\theory_2.txt“ForOutputAs#3’解压后的文件
DoWhileNotEOF(1)
LineInput#1,ibuf
obuf1=DoCompress(ibuf)
Print#2,obuf1
obuf2=UnDoCompress(obuf1)
Print#3,obuf2
Loop
Close#1,#2,#3
MousePointer=0
EndSub
FunctionDoCompress(in_buf$)AsString’对输入的字符串进行压缩
Dimi&,buf_len&,cAsString*1
comped=““:comping=Chr(0):comping_p=0
buf_len=Len(in_buf)
Ifbuf_len〉0Then
Fori=1Tobuf_len
c=Mid(in_buf,i,1)
SelectCasec
Case““,“A“To“Z“,“a“To“z“
putbits0,c’第一组中的字符
Case“!“To“/“,“:“To“@“,“[“To“`“,“{“To“~“,Chr(1)ToChr(31)
putbits1,c’第二组中的字符
CaseElse
putbits2,c’其它字符
EndSelect
Nexti
putbits3,Chr(0)
EndIf
DoCompress=comped
EndFunction
Subputbits(flag%,cc$)’压缩冗余的比特位(bits)
Dimi%,cAsString*1
c=cc
SelectCaseflag
Case0’对第一组中的字符内码进行重新定位
SelectCasec
Case““
c=Chr(1)
Case“0“To“9“
c=Chr(Asc(c)-46)
Case“A“To“Z“
c=Chr(Asc(c)-53)
Case“a“To“z“
c=Chr(Asc(c)-59)
EndSelect
Case1’对第二组中的字符内码进行重新定位
SelectCasec
Case“!“To“/“
c=Chr(Asc(c)-32)
Case“:“To“@“
c=Chr(Asc(c)-42)
Case“[“To“`“
c=Chr(Asc(c)-68)
Case“{“To“~“
c=Chr(Asc(c)-94)
CaseChr(1)ToChr(31)
c=Chr(Asc(c)+32)
EndSelect
Fori=1To6
putbit0
Nexti
Case2
Fori=1To12
putbit0
Nexti
Fori=1To8
If(Asc(c)Andebitmask(i))〈〉0Then
putbit1
Else
putbit0
EndIf
Nexti
Case3
Fori=comping_p+1To9
putbit0
Nexti
EndSelect
Ifflag〈2Then
Fori=1To6
If(Asc(c)Andebitmask(i+2))〈〉0Then
putbit1
Else
putbit0
EndIf
Nexti
EndIf
EndSub
Subputbit(bit%)’设置比特位(bit)
comping_p=comping_p+1
Ifcomping_p〉8Then
comped=comped+comping
comping=Chr(0)
comping_p=1
EndIf
Ifbit=1Then
comping=Chr(Asc(comping)Orebitmask(comping_p))
EndIf
EndSub
FunctionUnDoCompress(in_buf$)AsString’对输入的字符串进行解压缩
Dimbits_buf%,out_buf$,cAsString*1,comped_len&
comped=in_buf:bit_p=1:comped_len=Len(comped)*8
DoWhilebit_p〈=comped_len
Ifcomped_len-bit_p〈5Then
ExitDo
EndIf
bits_buf=getbits(6)
Ifbits_buf〈〉0Then’根据控制字符判断字符的组别
SelectCasebits_buf
Case1
c=““
Case2To11’“0“To“9“
c=Chr(bits_buf+46)
Case12To37’“A“To“Z“
c=Chr(bits_buf+53)
Case38To63’“a“To“z“
c=Chr(bits_buf+59)
EndSelect
out_buf=out_buf+c
Else
Ifbit_p〉comped_lenThen
ExitDo
EndIf
bits_buf=getbits(6)
Ifbits_buf〈〉0Then
SelectCasebits_buf
Case1To15’“!“To“/“
c=Chr(bits_buf+32)
Case16To22’“:“To“@“
c=Chr(bits_buf+42)
Case23To28’“[“To“`“
c=Chr(bits_buf+68)
Case29To32’“{“To“~“
c=Chr(bits_buf+94)
Case33To63’Chr(1)ToChr(31)
c=Chr(bits_buf-32)
EndSelect
out_buf=out_buf+c
Else
bits_buf=getbits(8)
out_buf=out_buf+Chr(bits_buf)
EndIf
EndIf
Loop
UnDoCompress=out_buf
EndFunction
Functiongetbits(numbAsInteger)AsInteger’获取比特位(bit)
Dimbyte_p&,bit_o%,byte1AsString*1,i%,j%,k%,m%
byte_p=(bit_p\8)+1:bit_o=bit_pMod8
byte1=Mid(comped,byte_p,1):j=0:k=0
Fori=bit_oTo8
k=k+1
Ifk〉numbThen
ExitFor
EndIf
If(Asc(byte1)Andebitmask(i))〈〉0Then
j=jOrebitmask(IIf(numb=6,k+2,k))
EndIf
Nexti
Ifk〈numbThen
byte1=Mid(comped,byte_p+1,1):m=numb-k
Fori=1Tom
k=k+1
If(Asc(byte1)Andebitmask(i))〈〉0Then
j=jOrebitmask(IIf(numb=6,k+2,k))
EndIf
Nexti
EndIf
bit_p=bit_p+numb
getbits=j
EndFunction
SubCommand2_Click()
End
EndSub