Public Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long Public Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)
Public Const CP_UTF8 = 65001
'Purpose:Convert Utf8 to Unicode Public Function UTF8_Decode(ByVal sUTF8 As String) As String
Dim lngUtf8Size As Long Dim strBuffer As String Dim lngBufferSize As Long Dim lngResult As Long Dim bytUtf8() As Byte Dim n As Long
If LenB(sUTF8) = 0 Then Exit Function m_bIsNt = True ''''''''''''''''''' ADD ''''''''''''''''''''''''''' If m_bIsNt Then On Error GoTo EndFunction bytUtf8 = StrConv(sUTF8, vbFromUnicode) lngUtf8Size = UBound(bytUtf8) + 1 On Error GoTo 0 'Set buffer for longest possible string i.e. each byte is 'ANSI, thus 1 unicode(2 bytes)for every utf-8 character. lngBufferSize = lngUtf8Size * 2 strBuffer = String$(lngBufferSize, vbNullChar) 'Translate using code page 65001(UTF-8) lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _ lngUtf8Size, StrPtr(strBuffer), lngBufferSize) 'Trim result to actual length If lngResult Then UTF8_Decode = Left$(strBuffer, lngResult) End If Else Dim i As Long Dim TopIndex As Long Dim TwoBytes(1) As Byte Dim ThreeBytes(2) As Byte Dim AByte As Byte Dim TStr As String Dim BArray() As Byte
'Resume on error in case someone inputs text with accents 'that should have been encoded as UTF-8 On Error Resume Next
TopIndex = Len(sUTF8) ' Number of bytes equal TopIndex+1 If TopIndex = 0 Then Exit Function ' get out if there's nothing to convert BArray = StrConv(sUTF8, vbFromUnicode) i = 0 ' Initialise pointer TopIndex = TopIndex - 1 ' Iterate through the Byte Array Do While i <= TopIndex AByte = BArray(i) If AByte < &H80 Then ' Normal ANSI character - use it as is TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index ElseIf AByte >= &HE0 Then 'was = &HE1 Then ' Start of 3 byte UTF-8 group for a character ' Copy 3 byte to ThreeBytes ThreeBytes(0) = BArray(i): i = i + 1 ThreeBytes(1) = BArray(i): i = i + 1 ThreeBytes(2) = BArray(i): i = i + 1 ' Convert Byte array to UTF-16 then Unicode TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F)) ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then ' Start of 2 byte UTF-8 group for a character TwoBytes(0) = BArray(i): i = i + 1 TwoBytes(1) = BArray(i): i = i + 1 ' Convert Byte array to UTF-16 then Unicode TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F)) Else ' Normal ANSI character - use it as is TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index End If Loop UTF8_Decode = TStr ' Return the resultant string Erase BArray End If
EndFunction:
End Function
接上 'Purpose:Convert Unicode string to UTF-8. Public Function UTF8_Encode(ByVal strUnicode As String, Optional ByVal bHTML As Boolean = True) As String Dim i As Long Dim TLen As Long Dim lPtr As Long Dim UTF16 As Long Dim UTF8_EncodeLong As String
TLen = Len(strUnicode) If TLen = 0 Then Exit Function
If m_bIsNt Then Dim lngBufferSize As Long Dim lngResult As Long Dim bytUtf8() As Byte 'Set buffer for longest possible string. lngBufferSize = TLen * 3 + 1 ReDim bytUtf8(lngBufferSize - 1) 'Translate using code page 65001(UTF-8). lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), _ TLen, bytUtf8(0), lngBufferSize, vbNullString, 0) 'Trim result to actual length. If lngResult Then lngResult = lngResult - 1 ReDim Preserve bytUtf8(lngResult) 'CopyMemory StrPtr(UTF8_Encode), bytUtf8(0&), lngResult UTF8_Encode = StrConv(bytUtf8, vbUnicode) ' For i = 0 To lngResult ' UTF8_Encode = UTF8_Encode & Chr$(bytUtf8(i)) ' Next End If Else For i = 1 To TLen ' Get UTF-16 value of Unicode character lPtr = StrPtr(strUnicode) + ((i - 1) * 2) CopyMemory UTF16, ByVal lPtr, 2 'Convert to UTF-8 If UTF16 < &H80 Then ' 1 UTF-8 byte UTF8_EncodeLong = Chr$(UTF16) ElseIf UTF16 < &H800 Then ' 2 UTF-8 bytes UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits UTF16 = UTF16 \ &H40 ' Shift right 6 bits UTF8_EncodeLong = Chr$(&HC0 + (UTF16 And &H1F)) & UTF8_EncodeLong ' Use 5 remaining bits Else ' 3 UTF-8 bytes UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits UTF16 = UTF16 \ &H40 ' Shift right 6 bits UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) & UTF8_EncodeLong ' Use next 6 bits UTF16 = UTF16 \ &H40 ' Shift right 6 bits UTF8_EncodeLong = Chr$(&HE0 + (UTF16 And &HF)) & UTF8_EncodeLong ' Use 4 remaining bits End If UTF8_Encode = UTF8_Encode & UTF8_EncodeLong Next End If
'Substitute vbCrLf with HTML line breaks if requested. If bHTML Then UTF8_Encode = Replace$(UTF8_Encode, vbCrLf, "<br/>") End If
End Function
FORM中:Private Sub Command1_Click() Dim str_UTF8 As String Dim str_Unicode As String Open "path" For Input As 1 str_UTF8 = StrConv(InputB$(LOF(1), 1), vbUnicode) '读到字符串 str_UTF8 = Right(str_UTF8, Len(str_UTF8) - 1) '第一个字符是格式标志,无实际意义,去掉 Close 1 Module_UTF8.m_bIsNt = True str_Unicode = UTF8_Decode(str_UTF8) '转换为Unicode MsgBox str_Unicode End Sub
刚才试着打开 rainstormmaster(暴风雨 v2.0) 给的网页出现病毒???...
To ALL: zyl910的专栏给出了目前中国最好的VB读文本文件的Sorce Code. http://blog.csdn.net/zyl910/archive/2006/05/30/762693.aspx另外, http://www.chenoe.com/blog/article.asp?id=2030也是转自zyl910TO Rose2007(): 在英文Windows中,这一句有问题: bytUtf8 = StrConv(sUTF8, vbFromUnicode) 最好用CopyMemory. Good luck!
这么长~~ 你那个文件是UTF-8的嘛一般新建的TXT都是Unicode的.所以才会是12个字节.要读的方法多了去了~~~~~连我所知道的就有两种.这里给你一种简单的:Public Function ReadFile(ByVal FileName As String) As String Dim objStream As Object Set objStream = CreateObject("ADODB.Stream")
With objStream .Type = 2 .Mode = 3 .Open .Charset = "UTF-8" '不同编码时自己换 .LoadFromFile FileName ReadFile = .ReadText .Close End With End Function直接调用ReadFile("tmp1.txt"),返回值就是它的内容了.我用这个好象也可以,谢谢了~~
Public m_bIsNt As Boolean
Public Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Public Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (lpvDest As Any, lpvSource As Any, ByVal cbCopy As Long)
Public Const CP_UTF8 = 65001
'Purpose:Convert Utf8 to Unicode
Public Function UTF8_Decode(ByVal sUTF8 As String) As String
Dim lngUtf8Size As Long
Dim strBuffer As String
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
Dim n As Long
If LenB(sUTF8) = 0 Then Exit Function
m_bIsNt = True ''''''''''''''''''' ADD '''''''''''''''''''''''''''
If m_bIsNt Then
On Error GoTo EndFunction
bytUtf8 = StrConv(sUTF8, vbFromUnicode)
lngUtf8Size = UBound(bytUtf8) + 1
On Error GoTo 0
'Set buffer for longest possible string i.e. each byte is
'ANSI, thus 1 unicode(2 bytes)for every utf-8 character.
lngBufferSize = lngUtf8Size * 2
strBuffer = String$(lngBufferSize, vbNullChar)
'Translate using code page 65001(UTF-8)
lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), _
lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
'Trim result to actual length
If lngResult Then
UTF8_Decode = Left$(strBuffer, lngResult)
End If
Else
Dim i As Long
Dim TopIndex As Long
Dim TwoBytes(1) As Byte
Dim ThreeBytes(2) As Byte
Dim AByte As Byte
Dim TStr As String
Dim BArray() As Byte
'Resume on error in case someone inputs text with accents
'that should have been encoded as UTF-8
On Error Resume Next
TopIndex = Len(sUTF8) ' Number of bytes equal TopIndex+1
If TopIndex = 0 Then Exit Function ' get out if there's nothing to convert
BArray = StrConv(sUTF8, vbFromUnicode)
i = 0 ' Initialise pointer
TopIndex = TopIndex - 1
' Iterate through the Byte Array
Do While i <= TopIndex
AByte = BArray(i)
If AByte < &H80 Then
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
ElseIf AByte >= &HE0 Then 'was = &HE1 Then
' Start of 3 byte UTF-8 group for a character
' Copy 3 byte to ThreeBytes
ThreeBytes(0) = BArray(i): i = i + 1
ThreeBytes(1) = BArray(i): i = i + 1
ThreeBytes(2) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((ThreeBytes(0) And &HF) * &H1000 + (ThreeBytes(1) And &H3F) * &H40 + (ThreeBytes(2) And &H3F))
ElseIf (AByte >= &HC2) And (AByte <= &HDB) Then
' Start of 2 byte UTF-8 group for a character
TwoBytes(0) = BArray(i): i = i + 1
TwoBytes(1) = BArray(i): i = i + 1
' Convert Byte array to UTF-16 then Unicode
TStr = TStr & ChrW$((TwoBytes(0) And &H1F) * &H40 + (TwoBytes(1) And &H3F))
Else
' Normal ANSI character - use it as is
TStr = TStr & Chr$(AByte): i = i + 1 ' Increment byte array index
End If
Loop
UTF8_Decode = TStr ' Return the resultant string
Erase BArray
End If
EndFunction:
End Function
Public Function UTF8_Encode(ByVal strUnicode As String, Optional ByVal bHTML As Boolean = True) As String
Dim i As Long
Dim TLen As Long
Dim lPtr As Long
Dim UTF16 As Long
Dim UTF8_EncodeLong As String
TLen = Len(strUnicode)
If TLen = 0 Then Exit Function
If m_bIsNt Then
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
'Set buffer for longest possible string.
lngBufferSize = TLen * 3 + 1
ReDim bytUtf8(lngBufferSize - 1)
'Translate using code page 65001(UTF-8).
lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), _
TLen, bytUtf8(0), lngBufferSize, vbNullString, 0)
'Trim result to actual length.
If lngResult Then
lngResult = lngResult - 1
ReDim Preserve bytUtf8(lngResult)
'CopyMemory StrPtr(UTF8_Encode), bytUtf8(0&), lngResult
UTF8_Encode = StrConv(bytUtf8, vbUnicode)
' For i = 0 To lngResult
' UTF8_Encode = UTF8_Encode & Chr$(bytUtf8(i))
' Next
End If
Else
For i = 1 To TLen
' Get UTF-16 value of Unicode character
lPtr = StrPtr(strUnicode) + ((i - 1) * 2)
CopyMemory UTF16, ByVal lPtr, 2
'Convert to UTF-8
If UTF16 < &H80 Then ' 1 UTF-8 byte
UTF8_EncodeLong = Chr$(UTF16)
ElseIf UTF16 < &H800 Then ' 2 UTF-8 bytes
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&HC0 + (UTF16 And &H1F)) & UTF8_EncodeLong ' Use 5 remaining bits
Else ' 3 UTF-8 bytes
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) ' Least Significant 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&H80 + (UTF16 And &H3F)) & UTF8_EncodeLong ' Use next 6 bits
UTF16 = UTF16 \ &H40 ' Shift right 6 bits
UTF8_EncodeLong = Chr$(&HE0 + (UTF16 And &HF)) & UTF8_EncodeLong ' Use 4 remaining bits
End If
UTF8_Encode = UTF8_Encode & UTF8_EncodeLong
Next
End If
'Substitute vbCrLf with HTML line breaks if requested.
If bHTML Then
UTF8_Encode = Replace$(UTF8_Encode, vbCrLf, "<br/>")
End If
End Function
FORM中:Private Sub Command1_Click()
Dim str_UTF8 As String
Dim str_Unicode As String
Open "path" For Input As 1
str_UTF8 = StrConv(InputB$(LOF(1), 1), vbUnicode) '读到字符串
str_UTF8 = Right(str_UTF8, Len(str_UTF8) - 1) '第一个字符是格式标志,无实际意义,去掉
Close 1
Module_UTF8.m_bIsNt = True
str_Unicode = UTF8_Decode(str_UTF8) '转换为Unicode
MsgBox str_Unicode
End Sub
zyl910的专栏给出了目前中国最好的VB读文本文件的Sorce Code.
http://blog.csdn.net/zyl910/archive/2006/05/30/762693.aspx另外,
http://www.chenoe.com/blog/article.asp?id=2030也是转自zyl910TO Rose2007():
在英文Windows中,这一句有问题:
bytUtf8 = StrConv(sUTF8, vbFromUnicode)
最好用CopyMemory.
Good luck!
目前测试正常
谢谢VBAdvisor(Sunlight)的指点 我看下...
你那个文件是UTF-8的嘛一般新建的TXT都是Unicode的.所以才会是12个字节.要读的方法多了去了~~~~~连我所知道的就有两种.这里给你一种简单的:Public Function ReadFile(ByVal FileName As String) As String
Dim objStream As Object
Set objStream = CreateObject("ADODB.Stream")
With objStream
.Type = 2
.Mode = 3
.Open
.Charset = "UTF-8" '不同编码时自己换
.LoadFromFile FileName
ReadFile = .ReadText
.Close
End With
End Function直接调用ReadFile("tmp1.txt"),返回值就是它的内容了.我用这个好象也可以,谢谢了~~
你那个文件是UTF-8的嘛
================================你测试了吗? 怎么知道不是?
CSDN改规则了???
不是说你上面的方法~ 不好意思哦~ 谢谢!
-----------------------你那个文件是UTF-8的嘛一般新建的TXT都是Unicode的.
---------------
怎么总有病毒...