UTF-8与Unicode相互转换时部份汉字变成乱码或消失的问题

在网上搜索了很久,发现下面两个函数对部份汉字的转换是有效的,但对部份汉字的转换却老是出问题,希望大家能帮忙找出问题所在,或给出可通用的转换函数,谢谢!!!
问题疹状:
?UTF8_Decode(UTF8_Encode("pH值"))
-->返回pH
UTF8_Decode(UTF8_Encode("周杰伦"))
-->返回周杰
......Option Explicit

  Public Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
  Public Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
  Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)

  Public Const CP_UTF8 = 65001
  Public Const m_bIsNt = True    '标志系统是NT(2000)还是98

  'Purpose:Convert   Utf8   to   Unicode
  Public Function UTF8_Decode(ByVal sUTF8 As String) As String

        Dim lngUtf8Size               As Long
        Dim strBuffer                   As String
        Dim lngBufferSize           As Long
        Dim lngResult                   As Long
        Dim bytUtf8()                   As Byte
        Dim n                                   As Long


        If LenB(sUTF8) = 0 Then Exit Function

        If m_bIsNt Then
              On Error GoTo EndFunction
              bytUtf8 = StrConv(sUTF8, vbFromUnicode)
              lngUtf8Size = UBound(bytUtf8) + 1
              On Error GoTo 0
              'Set   buffer   for   longest   possible   string   i.e.   each   byte   is
              'ANSI,   thus   1   unicode(2   bytes)for   every   utf-8   character.
              lngBufferSize = lngUtf8Size * 2
              strBuffer = String$(lngBufferSize, vbNullChar)
              'Translate   using   code   page   65001(UTF-8)
              lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _
                    lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
              'Trim   result   to   actual   length
              If lngResult Then
                    UTF8_Decode = Left$(strBuffer, lngResult)
              End If
        Else
              Dim i                                   As Long
              Dim TopIndex                     As Long
              Dim TwoBytes(1)               As Byte
              Dim ThreeBytes(2)           As Byte
              Dim AByte                           As Byte
              Dim TStr                             As String
              Dim BArray()                     As Byte

              'Resume   on   error   in   case   someone   inputs   text   with   accents
              'that   should   have   been   encoded   as   UTF-8
              On Error Resume Next

              TopIndex = Len(sUTF8)         '   Number   of   bytes   equal   TopIndex+1
              If TopIndex = 0 Then Exit Function               '   get   out   if   there's   nothing   to   convert
              BArray = StrConv(sUTF8, vbFromUnicode)
              i = 0       '   Initialise   pointer
              TopIndex = TopIndex - 1
              '   Iterate   through   the   Byte   Array
              Do While i <= TopIndex
                    AByte = BArray(i)
                    If AByte < &H80 Then
                          '   Normal   ANSI   character   -   use   it   as   is
                          TStr = TStr & Chr$(AByte):           i = i + 1           '   Increment   byte   array   index
                    ElseIf AByte >= &HE0 Then                           'was   =   &HE1   Then
                          '   Start   of   3   byte   UTF-8   group   for   a   character
                          '   Copy   3   byte   to   ThreeBytes
                          ThreeBytes(0) = BArray(i):       i = i + 1
                          ThreeBytes(1) = BArray(i):       i = i + 1
                          ThreeBytes(2) = BArray(i):       i = i + 1
                          '   Convert   Byte   array   to   UTF-16   then   Unicode
                          TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F))
                    ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then
                          '   Start   of   2   byte   UTF-8   group   for   a   character
                          TwoBytes(0) = BArray(i):       i = i + 1
                          TwoBytes(1) = BArray(i):       i = i + 1
                          '   Convert   Byte   array   to   UTF-16   then   Unicode
                          TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F))
                    Else
                          '   Normal   ANSI   character   -   use   it   as   is
                          TStr = TStr & Chr$(AByte):           i = i + 1           '   Increment   byte   array   index
                    End If
              Loop
              UTF8_Decode = TStr             '   Return   the   resultant   string
              Erase BArray
        End If

EndFunction:

  End Function

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

  'Purpose:Convert   Unicode   string   to   UTF-8.
  Public Function UTF8_Encode(ByVal strUnicode As String, Optional ByVal bHTML As Boolean = True) As String
        Dim i                                   As Long
        Dim TLen                             As Long
        Dim lPtr                             As Long
        Dim UTF16                           As Long
        Dim UTF8_EncodeLong       As String

        TLen = Len(strUnicode)
        If TLen = 0 Then Exit Function

        If m_bIsNt Then
              Dim lngBufferSize           As Long
              Dim lngResult                   As Long
              Dim bytUtf8()                   As Byte
              'Set   buffer   for   longest   possible   string.
              lngBufferSize = TLen * 3 + 1
              ReDim bytUtf8(lngBufferSize - 1)
              'Translate   using   code   page   65001(UTF-8).
              lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), _
                    TLen, bytUtf8(0), lngBufferSize, vbNullString, 0)
              'Trim   result   to   actual   length.
              If lngResult Then
                    lngResult = lngResult - 1
                    ReDim Preserve bytUtf8(lngResult)
                    'CopyMemory   StrPtr(UTF8_Encode),   bytUtf8(0&),   lngResult
                    UTF8_Encode = StrConv(bytUtf8, vbUnicode)
                    '   For   i   =   0   To   lngResult
                    '         UTF8_Encode   =   UTF8_Encode   &   Chr$(bytUtf8(i))
                    '   Next
              End If
        Else
              For i = 1 To TLen
                    '   Get   UTF-16   value   of   Unicode   character
                    lPtr = StrPtr(strUnicode) + ((i - 1) * 2)
                    CopyMemory UTF16, ByVal lPtr, 2
                    'Convert   to   UTF-8
                    If UTF16 < &H80 Then                                                                                     '   1   UTF-8   byte
                          UTF8_EncodeLong = Chr$(UTF16)
                    ElseIf UTF16 < &H800 Then                                                                           '   2   UTF-8   bytes
                          UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F))                                         '   Least   Significant   6   bits
                          UTF16 = UTF16 \ &H40                                                                               '   Shift   right   6   bits
                          UTF8_EncodeLong = Chr$(&HC0 + (UTF16 And &H1F)) & UTF8_EncodeLong                     '   Use   5   remaining   bits
                    Else                                                                                                             '   3   UTF-8   bytes
                          UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F))                                         '   Least   Significant   6   bits
                          UTF16 = UTF16 \ &H40                                                                               '   Shift   right   6   bits
                          UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) & UTF8_EncodeLong                     '   Use   next   6   bits
                          UTF16 = UTF16 \ &H40                                                                               '   Shift   right   6   bits
                          UTF8_EncodeLong = Chr$(&HE0 + (UTF16 And &HF)) & UTF8_EncodeLong                       '   Use   4   remaining   bits
                    End If
                    UTF8_Encode = UTF8_Encode & UTF8_EncodeLong
              Next
        End If

        'Substitute   vbCrLf   with   HTML   line   breaks   if   requested.
        If bHTML Then
              UTF8_Encode = Replace$(UTF8_Encode, vbCrLf, "<br/>")
        End If

  End Function
参考一下这里面的:
http://topic.csdn.net/u/20080119/13/616723e6-f38f-41c7-b57c-c876c8a70cfd.html
'刚刚写的,用纯VB代码写的.
'累死了.
Option ExplicitPrivate Sub Command1_Click()
  Dim Arr() As Byte

  Arr = UnicodeToUTF8("中华人民共和国23fvf刘")

  MsgBox UTF8ToUnicode(Arr)
End SubPrivate Function UnicodeToUTF8(ByVal Txt As String) As Byte()
     Dim szRet As String
     Dim x As Single
     Dim nAsc As Long
     Dim cArr() As Byte
     Dim Count As Long
     Dim i As Single
     Dim wch As String

     If Txt = "" Then Exit Function
     Count = 0

     For x = 1 To Len(Txt)
         wch = Mid(Txt, x, 1)
         nAsc = AscW(wch)         If nAsc < 0 Then
               nAsc = nAsc + 65536
         End If

         szRet = ConverTenToN(nAsc, 2, "01")


         If Len(szRet) < 16 Then
             szRet = String(16 - Len(szRet), Asc("0")) & szRet
         End If

         szRet = "1110" & Mid(szRet, 1, 4) & "10" & Mid(szRet, 5, 6) & "10" & Mid(szRet, 11, 6)

         Me.Caption = szRet
         Count = Count + 3
         ReDim Preserve cArr(Count) As Byte

         cArr(Count - 2) = "&H" & Mid(Hex(ConverNToTen(2, szRet, "01")), 1, 2)
         cArr(Count - 1) = "&H" & Mid(Hex(ConverNToTen(2, szRet, "01")), 3, 2)
         cArr(Count) = "&H" & Mid(Hex(ConverNToTen(2, szRet, "01")), 5, 2)

     Next

     UnicodeToUTF8 = cArr
End Function
Private Function UTF8ToUnicode(bArr() As Byte) As String
   Dim i As Long
   Dim cTxt As String
   Dim sAns As String

   Dim rAns As String

   For i = 1 To UBound(bArr) / 3
      cTxt = Hex(bArr(i * 3 - 2)) & Hex(bArr(i * 3 - 1)) & Hex(bArr(i * 3))
      sAns = ConverTenToN(ConverNToTen(16, cTxt, "0123456789ABCDEF"), 2, "01")
      rAns = Mid(sAns, 5, 4) & Mid(sAns, 11, 6) & Mid(sAns, 19, 6)
      UTF8ToUnicode = UTF8ToUnicode & ChrW(ConverNToTen(2, rAns, "01"))

   Next


End Function
'*************************************
   '目的:将10进制数转换为N进制数

   '输入: Ten  要转换的十进制数
   '      N    N进制数
   '      Chr  N进制数的字符集

   '返回: 成功转换后的数据
   '      失败 ""

'*************************************
Private Function ConverTenToN(ByVal Ten As Long, ByVal N As Long, ByVal Chr As String) As String
If N <= 1 Then Exit Function
If Len(Chr) < N Then Exit Function
   While Ten <> 0
     ConverTenToN = Mid(Chr, (Ten Mod N) + 1, 1) & ConverTenToN
     Ten = Int(Ten / N)
   Wend
End Function
'*************************************
   '目的:将N进制数转换为十进制数

   '输入: Data  字符串要转换的N进制数字符串
   '      N     N进制数
   '      Chr   N进制数的字符集

   '返回: 成功转换后的数据
   '      失败 ""

'*************************************
Private Function ConverNToTen(ByVal N As Long, ByVal Data As String, ByVal Chr As String) As Double
   Dim DataIndex As Long
   Dim i As Long
   ConverNToTen = 0
   For i = 0 To Len(Data) - 1
       DataIndex = InStr(1, Chr, Mid(Data, Len(Data) - i, 1))
       'Stop
       If DataIndex = -1 Then
          MsgBox "要转换的" & N & "进制数字符集错误！"
          Exit Function
       Else
          ConverNToTen = ConverNToTen + N ^ i * (DataIndex - 1)
       End If
   Next
End Function
辛苦了!谢谢哈!!!
不过,刚才试了下,用你的UnicodeToUTF8函数转换后的字串确实是可以用UTF8ToUnicode函数进行还原...
但问题是:你的UnicodeToUTF8函数生成的是UTF8编码吗???我试了下用UTF8ToUnicode函数转换别人已经生成好的UTF8编码,发现所有字符(包括英文)都变成乱码了...
而用我上面提供的UTF8_Decode函数进行转换,至少英文转换是不会有乱码现象的,且绝大部份中文的转换也是正常的...
注意:关于英文及数字的 UFT8 编码.  别人的都是没有进行编码的.比如:
中1
UTF8 的编码是 &HE4 &HB8 &HAD &H30(0)'完全没有进行编码转换而我的,无论是英文还是中文都按 UTF8 的规则进行了转换一个数据用三个字节表示.中1
UTF8 的编码是 &HE4 &HB8 &HAD &HE0 &H80 &HB0 二个字符一共是6个字节.当然了.如果你想达到别人的效果,你在我的上面改一下,如果不是汉字就不进行转换就行了.
UFT8 编码规则:你可以自己研究一下.Unicode 是16 位.UFT8    是24 位.比如: 中字0100 1110 0010 1101  4     E    2    DUFT8:(1110) 0100 (10)11 1000 (10)10 1101
   E    4      B     8     A     D
if( 0xe0 == ( *utf8 & 0xf0 ) &&
0x80 == ( *( utf8 + 1 ) & 0xc0 ) &&
0x80 == ( *( utf8 + 2 ) & 0xc0 ) )
{
//3个字节
}
else if ( 0xc0 == ( *utf8 & 0xe0 ) &&
0x80 == ( *( utf8 + 1 ) & 0xc0 ) )
{
//2个字节
}
else
{
//1个字节
}
补充上一楼：utf8是8或16或24位的。