'如果可选的sytle参数为"hex", 输出utf8的hex字符串(用于调试和专门目的),否则默认输出的是utf的字符串
Public Function Str_UTF_8(ByVal Str_GB As String, Optional ByVal Sytle As String = "string") As String
Dim Source() As Byte
Dim UTF_16 As Long
Dim Str_Bin As String
Dim My_utf_Bin As String
Dim Str_chr As String
Dim UTF_VAL As Long
Dim Str_hex As String
Dim Str_utf_hex As String
For j = 1 To Len(Str_GB)
CopyMemory UTF_VAL, ByVal StrPtr(Mid(Str_GB, j, 1)), 2 '得到unicode码
Str_hex = Hex(UTF_VAL) '转为16进制字符串
Str_Bin = H_To_B(Str_hex, 16) '转为2进制字符串
If UTF_VAL < &H80 Then ' 1 UTF-8 byte
My_utf_Bin = Mid(Str_Bin, 9, 8) ElseIf UTF_VAL < &H800 Then ' 2 UTF-8 bytes
My_utf_Bin = "110" + Mid(Str_Bin, 5, 5) + "10" + Mid(Str_Bin, 11, 6)
Else ' 3 UTF-8 bytes
My_utf_Bin = "1110" + Mid(Str_Bin, 1, 4) + "10" + Mid(Str_Bin, 5, 6) + "10" + Mid(Str_Bin, 11, 6)
End If
Str_utf_hex = Str_utf_hex + B_To_H(My_utf_Bin) '转为utf8的16进制字符串 Next j
'''''''''''''''''''''以下是转换成为utf8编码 nLength = Len(Str_utf_hex) / 2
ReDim Source(Len(Str_utf_hex) / 2) For i = 1 To Len(Str_utf_hex) Step 2 CopyMemory Source((i + 1) / 2), ByVal StrPtr(ChrB("&h" + Mid(Str_utf_hex, i, 2))), 1
Str_chr = Str_chr & ChrB(Source((i + 1) / 2))
Next i
If Sytle = "hex" Or Sytle = "Hex" Or Sytle = "HEX" Then '判断是不是要输出机器码
Str_UTF_8 = Str_utf_hex
Else
Str_UTF_8 = Str_chr
End If
End Function '二进制转16进制函数
Public Function B_To_H(ByVal Bininary_in As String) As String
Dim i As Long
Dim H As String
If Len(Bininary_in) Mod 4 <> 0 Then
Bininary_in = String(4 - Len(Bininary_in) Mod 4, "0") & Bininary_in
End If
For i = 1 To Len(Bininary_in) Step 4
Select Case Mid(Bininary_in, i, 4)
Case "0000": H = H & "0"
Case "0001": H = H & "1"
Case "0010": H = H & "2"
Case "0011": H = H & "3"
Case "0100": H = H & "4"
Case "0101": H = H & "5"
Case "0110": H = H & "6"
Case "0111": H = H & "7"
Case "1000": H = H & "8"
Case "1001": H = H & "9"
Case "1010": H = H & "A"
Case "1011": H = H & "B"
Case "1100": H = H & "C"
Case "1101": H = H & "D"
Case "1110": H = H & "E"
Case "1111": H = H & "F"
End Select
Next i
B_To_H = H
End Function '16进制转二进制函数
Public Function H_To_B(ByVal hex_str As String, MinimumDigits As Integer) As String
Dim i As Long
Dim B As String
Dim ExtraDigitsNeeded As Integer hex_str = UCase(hex_str)
For i = 1 To Len(hex_str)
Select Case Mid(hex_str, i, 1)
Case "0": B = B & "0000"
Case "1": B = B & "0001"
Case "2": B = B & "0010"
Case "3": B = B & "0011"
Case "4": B = B & "0100"
Case "5": B = B & "0101"
Case "6": B = B & "0110"
Case "7": B = B & "0111"
Case "8": B = B & "1000"
Case "9": B = B & "1001"
Case "A": B = B & "1010"
Case "B": B = B & "1011"
Case "C": B = B & "1100"
Case "D": B = B & "1101"
Case "E": B = B & "1110"
Case "F": B = B & "1111"
End Select
Next i ExtraDigitsNeeded = MinimumDigits - Len(B)
If ExtraDigitsNeeded > 0 Then
B = String(ExtraDigitsNeeded, "0") & B
End If
H_To_B = B
End Function
Public Function Str_UTF_8(ByVal Str_GB As String, Optional ByVal Sytle As String = "string") As String
Dim Source() As Byte
Dim UTF_16 As Long
Dim Str_Bin As String
Dim My_utf_Bin As String
Dim Str_chr As String
Dim UTF_VAL As Long
Dim Str_hex As String
Dim Str_utf_hex As String
For j = 1 To Len(Str_GB)
CopyMemory UTF_VAL, ByVal StrPtr(Mid(Str_GB, j, 1)), 2 '得到unicode码
Str_hex = Hex(UTF_VAL) '转为16进制字符串
Str_Bin = H_To_B(Str_hex, 16) '转为2进制字符串
If UTF_VAL < &H80 Then ' 1 UTF-8 byte
My_utf_Bin = Mid(Str_Bin, 9, 8) ElseIf UTF_VAL < &H800 Then ' 2 UTF-8 bytes
My_utf_Bin = "110" + Mid(Str_Bin, 5, 5) + "10" + Mid(Str_Bin, 11, 6)
Else ' 3 UTF-8 bytes
My_utf_Bin = "1110" + Mid(Str_Bin, 1, 4) + "10" + Mid(Str_Bin, 5, 6) + "10" + Mid(Str_Bin, 11, 6)
End If
Str_utf_hex = Str_utf_hex + B_To_H(My_utf_Bin) '转为utf8的16进制字符串 Next j
'''''''''''''''''''''以下是转换成为utf8编码 nLength = Len(Str_utf_hex) / 2
ReDim Source(Len(Str_utf_hex) / 2) For i = 1 To Len(Str_utf_hex) Step 2 CopyMemory Source((i + 1) / 2), ByVal StrPtr(ChrB("&h" + Mid(Str_utf_hex, i, 2))), 1
Str_chr = Str_chr & ChrB(Source((i + 1) / 2))
Next i
If Sytle = "hex" Or Sytle = "Hex" Or Sytle = "HEX" Then '判断是不是要输出机器码
Str_UTF_8 = Str_utf_hex
Else
Str_UTF_8 = Str_chr
End If
End Function '二进制转16进制函数
Public Function B_To_H(ByVal Bininary_in As String) As String
Dim i As Long
Dim H As String
If Len(Bininary_in) Mod 4 <> 0 Then
Bininary_in = String(4 - Len(Bininary_in) Mod 4, "0") & Bininary_in
End If
For i = 1 To Len(Bininary_in) Step 4
Select Case Mid(Bininary_in, i, 4)
Case "0000": H = H & "0"
Case "0001": H = H & "1"
Case "0010": H = H & "2"
Case "0011": H = H & "3"
Case "0100": H = H & "4"
Case "0101": H = H & "5"
Case "0110": H = H & "6"
Case "0111": H = H & "7"
Case "1000": H = H & "8"
Case "1001": H = H & "9"
Case "1010": H = H & "A"
Case "1011": H = H & "B"
Case "1100": H = H & "C"
Case "1101": H = H & "D"
Case "1110": H = H & "E"
Case "1111": H = H & "F"
End Select
Next i
B_To_H = H
End Function '16进制转二进制函数
Public Function H_To_B(ByVal hex_str As String, MinimumDigits As Integer) As String
Dim i As Long
Dim B As String
Dim ExtraDigitsNeeded As Integer hex_str = UCase(hex_str)
For i = 1 To Len(hex_str)
Select Case Mid(hex_str, i, 1)
Case "0": B = B & "0000"
Case "1": B = B & "0001"
Case "2": B = B & "0010"
Case "3": B = B & "0011"
Case "4": B = B & "0100"
Case "5": B = B & "0101"
Case "6": B = B & "0110"
Case "7": B = B & "0111"
Case "8": B = B & "1000"
Case "9": B = B & "1001"
Case "A": B = B & "1010"
Case "B": B = B & "1011"
Case "C": B = B & "1100"
Case "D": B = B & "1101"
Case "E": B = B & "1110"
Case "F": B = B & "1111"
End Select
Next i ExtraDigitsNeeded = MinimumDigits - Len(B)
If ExtraDigitsNeeded > 0 Then
B = String(ExtraDigitsNeeded, "0") & B
End If
H_To_B = B
End Function
http://community.csdn.net/Expert/topic/4527/4527535.xml?temp=.4105951
支持常见的ANSI、UTF-8、UTF-16LE、UTF-16BE这几种编码文本
一、使用字符串模拟移位,当然速度慢。用乘除移位快得多(VB便一起会自动将乘除“2^x”优化成移位)
二、不会使用AscW、ChrW函数
而且楼主的代码没考虑代理对机制
代理对机制允许用两个wchar存储一个Unicode编码在10000~1FFFFF之间字符
即代理对字符占两个“字符单元”
相关的模块不打算贴了,我只是想让楼主看看VB的位运算程序怎么写
Public Function ConvUTF8ByPtr(ByVal lpszSrc As Long) As String
Const ErrorChar As Byte = &H3F 'AscW("?")
Dim sOut As String
Dim cchSrc As Long
Dim cchDest As Long
Dim pByte() As Byte
Dim pBytePtr As SAFEARRAY1D
Dim byMask As Byte
Dim dwCode As Long
Dim iCount As Long
Dim I As Long
'得到源字符串长度
cchSrc = lstrlenA(ByVal lpszSrc)
If Len(cchSrc) = 0 Then Exit Function
sOut = String$(cchSrc, 0)
cchDest = 0
'构造模拟指针
Call MakePoint(VarPtrArray(pByte), pBytePtr, 1)
'开始循环
pBytePtr.pvData = lpszSrc
Do While pByte(0) <> 0
'得到该Unicode字符所占字节数
iCount = 0
byMask = &H80
Do While (pByte(0) And byMask) <> 0
iCount = iCount + 1
byMask = byMask \ 2 '右移一位
Loop
'得到该Unicode字符
If iCount = 0 Then 'ASCII
dwCode = pByte(0)
pBytePtr.pvData = pBytePtr.pvData + 1
ElseIf iCount = 1 Then '无效字符
dwCode = ErrorChar
pBytePtr.pvData = pBytePtr.pvData + 1
ElseIf iCount <= 6 Then 'UTF-8编码字符
dwCode = pByte(0) And (byMask - 1) '注意上面那个循环是怎么结束的,byMask正好是为0的那一位,它右边的位就是编码数据
For I = 1 To iCount - 1
If (pByte(I) And &HC0) = &H80 Then
dwCode = dwCode * &H40 Or (pByte(I) And &H3F)
Else
dwCode = ErrorChar
iCount = I
Exit For
End If
Next I
pBytePtr.pvData = pBytePtr.pvData + iCount
Else '无效字符
dwCode = ErrorChar
pBytePtr.pvData = pBytePtr.pvData + 1
End If
'UTF-16LE编码
If dwCode >= &H10FFFF Then dwCode = ErrorChar '超过UTF-16的表示范围
If dwCode <= &HFFFF& Then '普通形式
'sOut = sOut & ChrW(LoWord(dwCode))
Mid$(sOut, cchDest + 1, 1) = ChrW(LoWord(dwCode))
cchDest = cchDest + 1
Else '代理对形式
dwCode = dwCode - &H10000
'sOut = sOut _
& ChrW(&HD800 Or ((dwCode And &HFFC00) \ &H400)) _
& ChrW(&HDC00 Or (dwCode And &H3FF))
Mid$(sOut, cchDest + 1, 2) = ChrW(&HD800 Or ((dwCode And &HFFC00) \ &H400)) _
& ChrW(&HDC00 Or (dwCode And &H3FF))
cchDest = cchDest + 2
End If
Loop
'释放模拟指针
Call FreePoint(VarPtrArray(pByte))
ConvUTF8ByPtr = Left$(sOut, cchDest)
End Function
而字符串操作速度慢最关键的原因是我们用 & 链接字符串
每用一次& 就要创建字符串对象 合并字符串 释放资源
太消耗时间要提供速度就不要用&
关于这个讨论可以看
“用&进行字符串连接,竟然慢得象蜗牛,谁有高招?”
网址:http://community.csdn.net/Expert/topic/4150/4150233.xml?temp=.6081049zyl910上面的方法用的是Mid$连接字符串我用的是zlt982001(乐天)提供的CStringBuilder 类(其实是用数组 join 链接字符串)
我的代码是
http://club.5ivb.net/UploadFile/200621615615byUID16686.rar
一个把剪贴板里面的网页utf-8代码转换成unicode的例子
先在一个网页全选复制 然后点击 Get按钮
Function UTF8ToUniStr(BArray() As Byte) As String
' Convert a byte stream of UTF-8 to Unicode String
Dim i As Long
Dim TopIndex As Long
Dim TwoBytes(1) As Byte
Dim ThreeBytes(2) As Byte
Dim AByte As Byte
Dim TStr As String
Dim StrTmp As New CStringBuilder
TopIndex = UBound(BArray) ' Number of bytes equal TopIndex+1
If TopIndex = 0 Then Exit Function ' get out if there's nothing to convert
i = 0 ' Initialise pointer
' Iterate through the Byte Array
Do While i <= TopIndex
AByte = BArray(i) ' fetch a byte
If (AByte And &HF0) = &HE0 Then '&HE1 1110 000
' Start of 3 byte UTF-8 group for a character
' Copy 3 byte to ThreeBytes
ThreeBytes(0) = BArray(i): i = i + 1
ThreeBytes(1) = BArray(i): i = i + 1
ThreeBytes(2) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
'TStr = TStr & ChrW(ToUTF16(ThreeBytes))
StrTmp.Append ChrW(ToUTF16(ThreeBytes))
ElseIf (AByte And &HD0) = &HC0 Then
'ElseIf (AByte >= &HC3) And (AByte <= &HC6) Then
' Start of 2 byte UTF-8 group for a character
TwoBytes(0) = BArray(i): i = i + 1
TwoBytes(1) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
'TStr = TStr & ChrW(ToUTF16(TwoBytes))
StrTmp.Append ChrW(ToUTF16(TwoBytes))
Else
' Normal ANSI character - use it as is
'TStr = TStr & ChrW(AByte)
StrTmp.Append ChrW(AByte)
i = i + 1 ' Increment byte array index
End If
Loop
TStr = StrTmp.toString
UTF8ToUniStr = TStr ' Return the resultant string
End Function
Function ToUTF16(BArray() As Byte) As Long
' Convert 2 or 3 UTF-8 bytes to a 16bit UTF-16BE
Dim IntUB
IntUB = UBound(BArray) ' Find out how many bytes UTF-8 takes
Select Case IntUB
Case 0 ' one byte UTF-8. Note that bArray starts with index=0
ToUTF16 = BArray(0) ' Use number as is
Case 1 ' two byte UTF-8
'If BArray(0) = 194 Then BArray(0) = &HC0
ToUTF16 = (BArray(0) And &H1F) * &H40 + (BArray(1) And &H3F)
If ToUTF16 = 160 Then ToUTF16 = 32
'ToUTF16 = ((BArray(0) * &H40) And &HF8) + (BArray(1) And &H3F)
Case 2 ' three byte UTF-8
ToUTF16 = CLng((BArray(0) And &HF)) * &H1000 + (BArray(1) And &H3F) * &H40 + (BArray(2) And &H3F)
End Select
End Function
而CStringBuilder 类的代码如下
使用方法参考网址:http://blog.csdn.net/zlt982001/archive/2005/05/29/383551.aspxOption Explicit' The secret to this class is that it uses the join
'function which is part of the VBA.Strings ClassPrivate mvarStringArray() As String
Private mvarArrayItems As Long
Public Sub Append(ByVal newStr As String)
ReDim Preserve mvarStringArray(mvarArrayItems) As String
mvarStringArray(mvarArrayItems) = newStr
mvarArrayItems = mvarArrayItems + 1
End Sub
Public Property Get toString() As String
If mvarArrayItems > 0 Then toString = Join(mvarStringArray, "")
End Property
Public Sub Reset()
mvarArrayItems = 0
Erase mvarStringArray
End SubPrivate Sub Class_Initialize()
If mvarArrayItems > 0 Then Reset
End Sub
Private Sub Class_Terminate()
Reset
End Sub
http://club.5ivb.net/dispbbs.asp?BoardID=3&id=38196里面是我以前百度搜索的一些帖子汇总的