我想实现:从日文网页中截取所需的内容。我操作过程:下载网页到本地,然后读源码,找到相应的内容,保存到记事本中。以上是做英文的内容的,没有问题。但是,现在日文网页,下载到本地后,源码中日文部分都成乱码了,就是截取到记事本上,也是乱码。
网页内容:“書籍名”
提取出来的结果:“今酪叹”查看网页编码:是日文EUC
上次提问的的帖子:http://topic.csdn.net/TopicFiles/2005/06/03/10/4056226.xml
网页编码是:日文(Shift-JIS)
网页内容:“書籍名”
提取出来的结果:“今酪叹”查看网页编码:是日文EUC
上次提问的的帖子:http://topic.csdn.net/TopicFiles/2005/06/03/10/4056226.xml
网页编码是:日文(Shift-JIS)
Open "c:\euc-jp.txt" For Binary As #1
Dim b() As Byte, b2() As Byte
ReDim b(LOF(1) - 1)
Get #1, , b
EUC2Shift b(), b2(), UBound(b)
Text1.Text = Fn2(b2()) '(StrConv(b2, vbUnicode))
Close #1
End SubFunction Fn(ByVal sIn As String) As String
Dim lLen As Long
lLen = MultiByteToWideChar(932, 0, ByVal sIn, -1, ByVal 0, 0)
Fn = Space$(lLen - 1)
Call MultiByteToWideChar(932, 0, ByVal sIn, -1, ByVal StrPtr(Fn), lLen)
End FunctionFunction Fn2(abIn() As Byte) As String
Dim lLen As Long
lLen = MultiByteToWideChar(932, 0, abIn(0), -1, ByVal 0, 0)
Fn2 = Space$(lLen - 1)
Call MultiByteToWideChar(932, 0, abIn(0), -1, ByVal StrPtr(Fn2), lLen)
End FunctionSub Seven2Eight(b1 As Byte, b2 As Byte)
If (b1 Mod 2) <> 0 Then
b2 = b2 + 31
Else
b2 = b2 + 126
End If
If b2 >= 127 And b2 < 158 Then
b2 = b2 + 1
End If
If b1 >= 33 And b1 <= 93 And (b1 Mod 2 <> 0) Then
b1 = (b1 - 1) \ 2 + 113
ElseIf b1 >= 34 And b1 <= 94 And (b1 Mod 2 = 0) Then
b1 = b1 \ 2 + 112
ElseIf b1 >= 95 And b1 <= 125 And (b1 Mod 2 <> 0) Then
b1 = (b1 - 1) \ 2 + 177
ElseIf b1 >= 96 And b1 <= 126 And (b1 Mod 2 = 0) Then
b1 = b1 \ 2 + 176
End If
End SubSub EUC2Shift(abIn() As Byte, abOut() As Byte, lSize As Long)
Dim b1 As Byte, b2 As Byte
Dim i As Long, j As Long
ReDim abOut(lSize)
j = 0
For i = 0 To lSize
b1 = abIn(i)
If i < lSize Then
If abIn(i) = &HD And abIn(i + 1) = &HA Then
i = i + 1
abOut(j) = &HD
j = j + 1
abOut(j) = &HA
j = j + 1
GoTo nexthere
End If
End If
If b1 >= 161 And b1 <= 254 Then
i = i + 1
b2 = abIn(i)
b1 = b1 - 128
b2 = b2 - 128
Seven2Eight b1, b2
abOut(j) = b1
j = j + 1
abOut(j) = b2
j = j + 1
Else
If b1 <> &HA And b1 <> &HC Then
abOut(j) = b1
j = j + 1
End If
End If
nexthere:
Next
End Sub