在网上搜索了很久,发现下面两个函数对部份汉字的转换是有效的,但对部份汉字的转换却老是出问题,希望大家能帮忙找出问题所在,或给出可通用的转换函数,谢谢!!!
问题疹状:
?UTF8_Decode(UTF8_Encode("pH值"))
-->返回pH
UTF8_Decode(UTF8_Encode("周杰伦"))
-->返回周杰
......Option Explicit
Public Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Public Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)
Public Const CP_UTF8 = 65001
Public Const m_bIsNt = True '标志系统是NT(2000)还是98
'Purpose:Convert Utf8 to Unicode
Public Function UTF8_Decode(ByVal sUTF8 As String) As String
Dim lngUtf8Size As Long
Dim strBuffer As String
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
Dim n As Long
If LenB(sUTF8) = 0 Then Exit Function
If m_bIsNt Then
On Error GoTo EndFunction
bytUtf8 = StrConv(sUTF8, vbFromUnicode)
lngUtf8Size = UBound(bytUtf8) + 1
On Error GoTo 0
'Set buffer for longest possible string i.e. each byte is
'ANSI, thus 1 unicode(2 bytes)for every utf-8 character.
lngBufferSize = lngUtf8Size * 2
strBuffer = String$(lngBufferSize, vbNullChar)
'Translate using code page 65001(UTF-8)
lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _
lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
'Trim result to actual length
If lngResult Then
UTF8_Decode = Left$(strBuffer, lngResult)
End If
Else
Dim i As Long
Dim TopIndex As Long
Dim TwoBytes(1) As Byte
Dim ThreeBytes(2) As Byte
Dim AByte As Byte
Dim TStr As String
Dim BArray() As Byte
'Resume on error in case someone inputs text with accents
'that should have been encoded as UTF-8
On Error Resume Next
TopIndex = Len(sUTF8) ' Number of bytes equal TopIndex+1
If TopIndex = 0 Then Exit Function ' get out if there's nothing to convert
BArray = StrConv(sUTF8, vbFromUnicode)
i = 0 ' Initialise pointer
TopIndex = TopIndex - 1
' Iterate through the Byte Array
Do While i <= TopIndex
AByte = BArray(i)
If AByte < &H80 Then
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
ElseIf AByte >= &HE0 Then 'was = &HE1 Then
' Start of 3 byte UTF-8 group for a character
' Copy 3 byte to ThreeBytes
ThreeBytes(0) = BArray(i): i = i + 1
ThreeBytes(1) = BArray(i): i = i + 1
ThreeBytes(2) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F))
ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then
' Start of 2 byte UTF-8 group for a character
TwoBytes(0) = BArray(i): i = i + 1
TwoBytes(1) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F))
Else
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
End If
Loop
UTF8_Decode = TStr ' Return the resultant string
Erase BArray
End If
EndFunction:
End Function
问题疹状:
?UTF8_Decode(UTF8_Encode("pH值"))
-->返回pH
UTF8_Decode(UTF8_Encode("周杰伦"))
-->返回周杰
......Option Explicit
Public Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Public Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)
Public Const CP_UTF8 = 65001
Public Const m_bIsNt = True '标志系统是NT(2000)还是98
'Purpose:Convert Utf8 to Unicode
Public Function UTF8_Decode(ByVal sUTF8 As String) As String
Dim lngUtf8Size As Long
Dim strBuffer As String
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
Dim n As Long
If LenB(sUTF8) = 0 Then Exit Function
If m_bIsNt Then
On Error GoTo EndFunction
bytUtf8 = StrConv(sUTF8, vbFromUnicode)
lngUtf8Size = UBound(bytUtf8) + 1
On Error GoTo 0
'Set buffer for longest possible string i.e. each byte is
'ANSI, thus 1 unicode(2 bytes)for every utf-8 character.
lngBufferSize = lngUtf8Size * 2
strBuffer = String$(lngBufferSize, vbNullChar)
'Translate using code page 65001(UTF-8)
lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _
lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
'Trim result to actual length
If lngResult Then
UTF8_Decode = Left$(strBuffer, lngResult)
End If
Else
Dim i As Long
Dim TopIndex As Long
Dim TwoBytes(1) As Byte
Dim ThreeBytes(2) As Byte
Dim AByte As Byte
Dim TStr As String
Dim BArray() As Byte
'Resume on error in case someone inputs text with accents
'that should have been encoded as UTF-8
On Error Resume Next
TopIndex = Len(sUTF8) ' Number of bytes equal TopIndex+1
If TopIndex = 0 Then Exit Function ' get out if there's nothing to convert
BArray = StrConv(sUTF8, vbFromUnicode)
i = 0 ' Initialise pointer
TopIndex = TopIndex - 1
' Iterate through the Byte Array
Do While i <= TopIndex
AByte = BArray(i)
If AByte < &H80 Then
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
ElseIf AByte >= &HE0 Then 'was = &HE1 Then
' Start of 3 byte UTF-8 group for a character
' Copy 3 byte to ThreeBytes
ThreeBytes(0) = BArray(i): i = i + 1
ThreeBytes(1) = BArray(i): i = i + 1
ThreeBytes(2) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F))
ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then
' Start of 2 byte UTF-8 group for a character
TwoBytes(0) = BArray(i): i = i + 1
TwoBytes(1) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F))
Else
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
End If
Loop
UTF8_Decode = TStr ' Return the resultant string
Erase BArray
End If
EndFunction:
End Function
Public Function UTF8_Encode(ByVal strUnicode As String, Optional ByVal bHTML As Boolean = True) As String
Dim i As Long
Dim TLen As Long
Dim lPtr As Long
Dim UTF16 As Long
Dim UTF8_EncodeLong As String
TLen = Len(strUnicode)
If TLen = 0 Then Exit Function
If m_bIsNt Then
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
'Set buffer for longest possible string.
lngBufferSize = TLen * 3 + 1
ReDim bytUtf8(lngBufferSize - 1)
'Translate using code page 65001(UTF-8).
lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), _
TLen, bytUtf8(0), lngBufferSize, vbNullString, 0)
'Trim result to actual length.
If lngResult Then
lngResult = lngResult - 1
ReDim Preserve bytUtf8(lngResult)
'CopyMemory StrPtr(UTF8_Encode), bytUtf8(0&), lngResult
UTF8_Encode = StrConv(bytUtf8, vbUnicode)
' For i = 0 To lngResult
' UTF8_Encode = UTF8_Encode & Chr$(bytUtf8(i))
' Next
End If
Else
For i = 1 To TLen
' Get UTF-16 value of Unicode character
lPtr = StrPtr(strUnicode) + ((i - 1) * 2)
CopyMemory UTF16, ByVal lPtr, 2
'Convert to UTF-8
If UTF16 < &H80 Then ' 1 UTF-8 byte
UTF8_EncodeLong = Chr$(UTF16)
ElseIf UTF16 < &H800 Then ' 2 UTF-8 bytes
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&HC0 + (UTF16 And &H1F)) & UTF8_EncodeLong ' Use 5 remaining bits
Else ' 3 UTF-8 bytes
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) & UTF8_EncodeLong ' Use next 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&HE0 + (UTF16 And &HF)) & UTF8_EncodeLong ' Use 4 remaining bits
End If
UTF8_Encode = UTF8_Encode & UTF8_EncodeLong
Next
End If
'Substitute vbCrLf with HTML line breaks if requested.
If bHTML Then
UTF8_Encode = Replace$(UTF8_Encode, vbCrLf, "<br/>")
End If
End Function
http://topic.csdn.net/u/20080119/13/616723e6-f38f-41c7-b57c-c876c8a70cfd.html
'累死了.
Option ExplicitPrivate Sub Command1_Click()
Dim Arr() As Byte
Arr = UnicodeToUTF8("中华人民共和国23fvf刘")
MsgBox UTF8ToUnicode(Arr)
End SubPrivate Function UnicodeToUTF8(ByVal Txt As String) As Byte()
Dim szRet As String
Dim x As Single
Dim nAsc As Long
Dim cArr() As Byte
Dim Count As Long
Dim i As Single
Dim wch As String
If Txt = "" Then Exit Function
Count = 0
For x = 1 To Len(Txt)
wch = Mid(Txt, x, 1)
nAsc = AscW(wch) If nAsc < 0 Then
nAsc = nAsc + 65536
End If
szRet = ConverTenToN(nAsc, 2, "01")
If Len(szRet) < 16 Then
szRet = String(16 - Len(szRet), Asc("0")) & szRet
End If
szRet = "1110" & Mid(szRet, 1, 4) & "10" & Mid(szRet, 5, 6) & "10" & Mid(szRet, 11, 6)
Me.Caption = szRet
Count = Count + 3
ReDim Preserve cArr(Count) As Byte
cArr(Count - 2) = "&H" & Mid(Hex(ConverNToTen(2, szRet, "01")), 1, 2)
cArr(Count - 1) = "&H" & Mid(Hex(ConverNToTen(2, szRet, "01")), 3, 2)
cArr(Count) = "&H" & Mid(Hex(ConverNToTen(2, szRet, "01")), 5, 2)
Next
UnicodeToUTF8 = cArr
End Function
Private Function UTF8ToUnicode(bArr() As Byte) As String
Dim i As Long
Dim cTxt As String
Dim sAns As String
Dim rAns As String
For i = 1 To UBound(bArr) / 3
cTxt = Hex(bArr(i * 3 - 2)) & Hex(bArr(i * 3 - 1)) & Hex(bArr(i * 3))
sAns = ConverTenToN(ConverNToTen(16, cTxt, "0123456789ABCDEF"), 2, "01")
rAns = Mid(sAns, 5, 4) & Mid(sAns, 11, 6) & Mid(sAns, 19, 6)
UTF8ToUnicode = UTF8ToUnicode & ChrW(ConverNToTen(2, rAns, "01"))
Next
End Function
'*************************************
'目的:将10进制数转换为N进制数
'输入: Ten 要转换的十进制数
' N N进制数
' Chr N进制数的字符集
'返回: 成功 转换后的数据
' 失败 ""
'*************************************
Private Function ConverTenToN(ByVal Ten As Long, ByVal N As Long, ByVal Chr As String) As String
If N <= 1 Then Exit Function
If Len(Chr) < N Then Exit Function
While Ten <> 0
ConverTenToN = Mid(Chr, (Ten Mod N) + 1, 1) & ConverTenToN
Ten = Int(Ten / N)
Wend
End Function
'*************************************
'目的:将N进制数转换为十进制数
'输入: Data 字符串 要转换的N进制数字符串
' N N进制数
' Chr N进制数的字符集
'返回: 成功 转换后的数据
' 失败 ""
'*************************************
Private Function ConverNToTen(ByVal N As Long, ByVal Data As String, ByVal Chr As String) As Double
Dim DataIndex As Long
Dim i As Long
ConverNToTen = 0
For i = 0 To Len(Data) - 1
DataIndex = InStr(1, Chr, Mid(Data, Len(Data) - i, 1))
'Stop
If DataIndex = -1 Then
MsgBox "要转换的" & N & "进制数字符集错误!"
Exit Function
Else
ConverNToTen = ConverNToTen + N ^ i * (DataIndex - 1)
End If
Next
End Function
不过,刚才试了下,用你的UnicodeToUTF8函数转换后的字串确实是可以用UTF8ToUnicode函数进行还原...
但问题是:你的UnicodeToUTF8函数生成的是UTF8编码吗???我试了下用UTF8ToUnicode函数转换别人已经生成好的UTF8编码,发现所有字符(包括英文)都变成乱码了...
而用我上面提供的UTF8_Decode函数进行转换,至少英文转换是不会有乱码现象的,且绝大部份中文的转换也是正常的...
中1
UTF8 的编码是 &HE4 &HB8 &HAD &H30(0)'完全没有进行编码转换而我的,无论是英文还是中文都按 UTF8 的规则进行了转换 一个数据 用三个字节表示.中1
UTF8 的编码是 &HE4 &HB8 &HAD &HE0 &H80 &HB0 二个字符一共是6个字节.当然了.如果你想达到别人的效果,你在我的上面改一下,如果不是汉字就不进行转换就行了.
E 4 B 8 A D
0x80 == ( *( utf8 + 1 ) & 0xc0 ) &&
0x80 == ( *( utf8 + 2 ) & 0xc0 ) )
{
//3个字节
}
else if ( 0xc0 == ( *utf8 & 0xe0 ) &&
0x80 == ( *( utf8 + 1 ) & 0xc0 ) )
{
//2个字节
}
else
{
//1个字节
}