请教下大家c#里 用Encoding.GetEncoding("gb2312")编码读取文件仍然有乱码,是什么原因?一个文件使用StreamWriter写入的含有汉字的.dat文件,然后用StreamReader sr = new StreamReader(dd, Encoding.GetEncoding("gb2312"));来读取,结果读取出来有乱码是什么原因? 编码
在日本工作,只有日本的判断,有时间做一个中文的。参考一下Public Shared Function GetCode(ByVal bytes As Byte()) As System.Text.Encoding Const bEscape As Byte = &H1B Const bAt As Byte = &H40 Const bDollar As Byte = &H24 Const bAnd As Byte = &H26 Const bOpen As Byte = &H28 ''(' Const bB As Byte = &H42 Const bD As Byte = &H44 Const bJ As Byte = &H4A Const bI As Byte = &H49 Dim len As Integer = bytes.Length Dim b1 As Byte, b2 As Byte, b3 As Byte, b4 As Byte 'Encode::is_utf8 無視 Dim isBinary As Boolean = False Dim i As Integer For i = 0 To len - 1 b1 = bytes(i) If b1 <= &H6 OrElse b1 = &H7F OrElse b1 = &HFF Then ''binary' isBinary = True If b1 = &H0 AndAlso i < len - 1 AndAlso bytes(i + 1) <= &H7F Then 'smells like raw unicode Return System.Text.Encoding.Unicode End If End If Next If isBinary Then Return Nothing End If 'not Japanese Dim notJapanese As Boolean = True For i = 0 To len - 1 b1 = bytes(i) If b1 = bEscape OrElse &H80 <= b1 Then notJapanese = False Exit For End If Next If notJapanese Then Return System.Text.Encoding.ASCII End If For i = 0 To len - 3 b1 = bytes(i) b2 = bytes(i + 1) b3 = bytes(i + 2) If b1 = bEscape Then If b2 = bDollar AndAlso b3 = bAt Then 'JIS_0208 1978 'JIS Return System.Text.Encoding.GetEncoding(50220) ElseIf b2 = bDollar AndAlso b3 = bB Then 'JIS_0208 1983 'JIS Return System.Text.Encoding.GetEncoding(50220) ElseIf b2 = bOpen AndAlso (b3 = bB OrElse b3 = bJ) Then 'JIS_ASC 'JIS Return System.Text.Encoding.GetEncoding(50220) ElseIf b2 = bOpen AndAlso b3 = bI Then 'JIS_KANA 'JIS Return System.Text.Encoding.GetEncoding(50220) End If If i < len - 3 Then b4 = bytes(i + 3) If b2 = bDollar AndAlso b3 = bOpen AndAlso b4 = bD Then 'JIS_0212 'JIS Return System.Text.Encoding.GetEncoding(50220) End If If i < len - 5 AndAlso _ b2 = bAnd AndAlso b3 = bAt AndAlso b4 = bEscape AndAlso _ bytes(i + 4) = bDollar AndAlso bytes(i + 5) = bB Then 'JIS_0208 1990 'JIS Return System.Text.Encoding.GetEncoding(50220) End If End If End If Next 'should be euc|sjis|utf8 Dim sjis As Integer = 0 Dim euc As Integer = 0 Dim utf8 As Integer = 0 For i = 0 To len - 2 b1 = bytes(i) b2 = bytes(i + 1) If ((&H81 <= b1 AndAlso b1 <= &H9F) OrElse _ (&HE0 <= b1 AndAlso b1 <= &HFC)) AndAlso _ ((&H40 <= b2 AndAlso b2 <= &H7E) OrElse _ (&H80 <= b2 AndAlso b2 <= &HFC)) Then 'SJIS_C sjis += 2 i += 1 End If Next For i = 0 To len - 2 b1 = bytes(i) b2 = bytes(i + 1) If ((&HA1 <= b1 AndAlso b1 <= &HFE) AndAlso _ (&HA1 <= b2 AndAlso b2 <= &HFE)) OrElse _ (b1 = &H8E AndAlso (&HA1 <= b2 AndAlso b2 <= &HDF)) Then 'EUC_C 'EUC_KANA euc += 2 i += 1 ElseIf i < len - 2 Then b3 = bytes(i + 2) If b1 = &H8F AndAlso (&HA1 <= b2 AndAlso b2 <= &HFE) AndAlso _ (&HA1 <= b3 AndAlso b3 <= &HFE) Then 'EUC_0212 euc += 3 i += 2 End If End If Next For i = 0 To len - 2 b1 = bytes(i) b2 = bytes(i + 1) If (&HC0 <= b1 AndAlso b1 <= &HDF) AndAlso _ (&H80 <= b2 AndAlso b2 <= &HBF) Then 'UTF8 utf8 += 2 i += 1 ElseIf i < len - 2 Then b3 = bytes(i + 2) If (&HE0 <= b1 AndAlso b1 <= &HEF) AndAlso _ (&H80 <= b2 AndAlso b2 <= &HBF) AndAlso _ (&H80 <= b3 AndAlso b3 <= &HBF) Then 'UTF8 utf8 += 3 i += 2 End If End If Next 'utf8 += utf8 / 2; System.Diagnostics.Debug.WriteLine( _ String.Format("sjis = {0}, euc = {1}, utf8 = {2}", sjis, euc, utf8)) If euc > sjis AndAlso euc > utf8 Then 'EUC Return System.Text.Encoding.GetEncoding(51932) ElseIf sjis > euc AndAlso sjis > utf8 Then 'SJIS Return System.Text.Encoding.GetEncoding(932) ElseIf utf8 > euc AndAlso utf8 > sjis Then 'UTF8 Return System.Text.Encoding.UTF8 End If Return Nothing End Function 'Button1Click Private Sub Button1_Click(ByVal sender As Object, _ ByVal e As System.EventArgs) Handles Button1.Click Dim fs As New System.IO.FileStream(TextBox1.Text, _ System.IO.FileMode.Open, System.IO.FileAccess.Read) Dim bs(fs.Length - 1) As Byte fs.Read(bs, 0, bs.Length) fs.Close() Dim enc As System.Text.Encoding = GetCode(bs) RichTextBox1.Text = enc.GetString(bs) End Sub
就是说得用StreamWriter流用的编码来读取是吗?以前用没碰到过这种情况好像,基本上 Encoding.GetEncoding("gb2312")都行了,StreamWriter我没设置编码类型
就是说得用StreamWriter流用的编码来读取是吗?以前用没碰到过这种情况好像,基本上 Encoding.GetEncoding("gb2312")都行了,StreamWriter我没设置编码类型如果没有设置编码,用这个也试试。。Encoding.Default
谢谢你,用UTF-8可以,用Encoding.GetEncoding("gb2312")或者System.Text.Encoding.Default写都出现乱码...
Const bEscape As Byte = &H1B
Const bAt As Byte = &H40
Const bDollar As Byte = &H24
Const bAnd As Byte = &H26
Const bOpen As Byte = &H28 ''('
Const bB As Byte = &H42
Const bD As Byte = &H44
Const bJ As Byte = &H4A
Const bI As Byte = &H49 Dim len As Integer = bytes.Length
Dim b1 As Byte, b2 As Byte, b3 As Byte, b4 As Byte 'Encode::is_utf8 無視 Dim isBinary As Boolean = False
Dim i As Integer
For i = 0 To len - 1
b1 = bytes(i)
If b1 <= &H6 OrElse b1 = &H7F OrElse b1 = &HFF Then
''binary'
isBinary = True
If b1 = &H0 AndAlso i < len - 1 AndAlso bytes(i + 1) <= &H7F Then
'smells like raw unicode
Return System.Text.Encoding.Unicode
End If
End If
Next
If isBinary Then
Return Nothing
End If 'not Japanese
Dim notJapanese As Boolean = True
For i = 0 To len - 1
b1 = bytes(i)
If b1 = bEscape OrElse &H80 <= b1 Then
notJapanese = False
Exit For
End If
Next
If notJapanese Then
Return System.Text.Encoding.ASCII
End If For i = 0 To len - 3
b1 = bytes(i)
b2 = bytes(i + 1)
b3 = bytes(i + 2) If b1 = bEscape Then
If b2 = bDollar AndAlso b3 = bAt Then
'JIS_0208 1978
'JIS
Return System.Text.Encoding.GetEncoding(50220)
ElseIf b2 = bDollar AndAlso b3 = bB Then
'JIS_0208 1983
'JIS
Return System.Text.Encoding.GetEncoding(50220)
ElseIf b2 = bOpen AndAlso (b3 = bB OrElse b3 = bJ) Then
'JIS_ASC
'JIS
Return System.Text.Encoding.GetEncoding(50220)
ElseIf b2 = bOpen AndAlso b3 = bI Then
'JIS_KANA
'JIS
Return System.Text.Encoding.GetEncoding(50220)
End If
If i < len - 3 Then
b4 = bytes(i + 3)
If b2 = bDollar AndAlso b3 = bOpen AndAlso b4 = bD Then
'JIS_0212
'JIS
Return System.Text.Encoding.GetEncoding(50220)
End If
If i < len - 5 AndAlso _
b2 = bAnd AndAlso b3 = bAt AndAlso b4 = bEscape AndAlso _
bytes(i + 4) = bDollar AndAlso bytes(i + 5) = bB Then
'JIS_0208 1990
'JIS
Return System.Text.Encoding.GetEncoding(50220)
End If
End If
End If
Next 'should be euc|sjis|utf8
Dim sjis As Integer = 0
Dim euc As Integer = 0
Dim utf8 As Integer = 0
For i = 0 To len - 2
b1 = bytes(i)
b2 = bytes(i + 1)
If ((&H81 <= b1 AndAlso b1 <= &H9F) OrElse _
(&HE0 <= b1 AndAlso b1 <= &HFC)) AndAlso _
((&H40 <= b2 AndAlso b2 <= &H7E) OrElse _
(&H80 <= b2 AndAlso b2 <= &HFC)) Then
'SJIS_C
sjis += 2
i += 1
End If
Next
For i = 0 To len - 2
b1 = bytes(i)
b2 = bytes(i + 1)
If ((&HA1 <= b1 AndAlso b1 <= &HFE) AndAlso _
(&HA1 <= b2 AndAlso b2 <= &HFE)) OrElse _
(b1 = &H8E AndAlso (&HA1 <= b2 AndAlso b2 <= &HDF)) Then
'EUC_C
'EUC_KANA
euc += 2
i += 1
ElseIf i < len - 2 Then
b3 = bytes(i + 2)
If b1 = &H8F AndAlso (&HA1 <= b2 AndAlso b2 <= &HFE) AndAlso _
(&HA1 <= b3 AndAlso b3 <= &HFE) Then
'EUC_0212
euc += 3
i += 2
End If
End If
Next
For i = 0 To len - 2
b1 = bytes(i)
b2 = bytes(i + 1)
If (&HC0 <= b1 AndAlso b1 <= &HDF) AndAlso _
(&H80 <= b2 AndAlso b2 <= &HBF) Then
'UTF8
utf8 += 2
i += 1
ElseIf i < len - 2 Then
b3 = bytes(i + 2)
If (&HE0 <= b1 AndAlso b1 <= &HEF) AndAlso _
(&H80 <= b2 AndAlso b2 <= &HBF) AndAlso _
(&H80 <= b3 AndAlso b3 <= &HBF) Then
'UTF8
utf8 += 3
i += 2
End If
End If
Next
'utf8 += utf8 / 2; System.Diagnostics.Debug.WriteLine( _
String.Format("sjis = {0}, euc = {1}, utf8 = {2}", sjis, euc, utf8))
If euc > sjis AndAlso euc > utf8 Then
'EUC
Return System.Text.Encoding.GetEncoding(51932)
ElseIf sjis > euc AndAlso sjis > utf8 Then
'SJIS
Return System.Text.Encoding.GetEncoding(932)
ElseIf utf8 > euc AndAlso utf8 > sjis Then
'UTF8
Return System.Text.Encoding.UTF8
End If Return Nothing
End Function
'Button1Click
Private Sub Button1_Click(ByVal sender As Object, _
ByVal e As System.EventArgs) Handles Button1.Click
Dim fs As New System.IO.FileStream(TextBox1.Text, _
System.IO.FileMode.Open, System.IO.FileAccess.Read)
Dim bs(fs.Length - 1) As Byte
fs.Read(bs, 0, bs.Length)
fs.Close() Dim enc As System.Text.Encoding = GetCode(bs) RichTextBox1.Text = enc.GetString(bs)
End Sub