最近做一个文件批量处理的工具。但是碰到这样一个问题,比如有些源码文件是utf-8编码格式,假设我不知道这些编码。我就会以gb2312编码方式读取文件,那样会出乱码??怎么获取这个文件的编码类型

解决方案 »

  1.   

    通过前几个字节判断文本的编码格式,参考如下代码:
    public string LoadTextFile(string filename)
    {
        if (!File.Exists(filename)) return string.Empty;
        FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read);
        try
        {
            byte[] buffer = new byte[3];
            int l = fs.Read(buffer, 0, buffer.Length);
            if (l >= 2 && buffer[0] == 0xff && buffer[1] == 0xfe) // unicode
            {
                buffer = new byte[fs.Length - 2];
                fs.Seek(2, SeekOrigin.Begin);
                fs.Read(buffer, 0, buffer.Length);
                return Encoding.Unicode.GetString(buffer);
            }        if (l >= 2 && buffer[0] == 0xfe && buffer[1] == 0xff) // unicodeFFFE
            {
                buffer = new byte[fs.Length - 2];
                fs.Seek(2, SeekOrigin.Begin);
                fs.Read(buffer, 0, buffer.Length);
                return Encoding.GetEncoding("unicodeFFFE").GetString(buffer);
            }        if (l >= 3 && buffer[0] == 0xef && buffer[1] == 0xbb && buffer[2] == 0xbf) // utf8
            {
                buffer = new byte[fs.Length - 3];
                fs.Seek(3, SeekOrigin.Begin);
                fs.Read(buffer, 0, buffer.Length);
                return Encoding.UTF8.GetString(buffer);
            }        // default
            fs.Seek(0, SeekOrigin.Begin);
            buffer = new byte[fs.Length];
            fs.Read(buffer, 0, buffer.Length);
            return Encoding.Default.GetString(buffer);
        }
        finally
        {
            fs.Close();
        }
    }private void button1_Click(object sender, EventArgs e)
    {
        Console.WriteLine(LoadTextFile(@"c:\temp\tempa.txt"));
        Console.WriteLine(LoadTextFile(@"c:\temp\tempb.txt"));
        Console.WriteLine(LoadTextFile(@"c:\temp\tempc.txt"));
        Console.WriteLine(LoadTextFile(@"c:\temp\tempd.txt"));
    }
      

  2.   

    还是不行,,,如果是一个utf-8的php文件读出来成乱码
      

  3.   


    /// <summary>
     2        /// 获得文件编码
     3        /// </summary>
     4        /// <param name="content">文件流的字节数组</param>
     5        /// <returns>字符编码</returns>
     6        public static Encoding GetFileEncoding(byte[] content)
     7        {
     8            if (content.Length > 0)
     9            {
    10                switch (content[0])
    11                {
    12                    case 104:
    13                        return Encoding.Default;
    14                    case 255:
    15                        return Encoding.Unicode;
    16                    case 254:
    17                        return Encoding.BigEndianUnicode;
    18                    case 239:
    19                        return Encoding.UTF8;
    20                    default:
    21                        return Encoding.Default;
    22                }
    23            }
    24            return Encoding.Default;
    25        }<summary>
            /// 读文件流
            /// </summary>
            /// <param name="stream">文件流;如:File.OpenRead(fileCurrentPath)</param>
            /// <param name="encoding">字符编码;如:Encoding.UTF8</param>
            /// <returns>流字符串</returns>
            public static string ReadFromStream(FileStream stream, Encoding encoding)
            {
                byte[] content = new byte[stream.Length];
                stream.Read(content, 0, content.Length);
                stream.Close();
                stream = null;
                if (encoding == Encoding.Default)
                {
                    encoding = GetFileEncoding(content);
                }
                return encoding.GetString(content);
            }
           
            public static string ReadFromStream(FileStream stream,out Encoding encoding)
            {
                byte[] content = new byte[stream.Length];
                stream.Read(content, 0, content.Length);
                stream.Close();
                stream = null;
                encoding = GetFileEncoding(content);
                return encoding.GetString(content);
            } 1   /**//// <summary>
     2        /// 写文件流
     3        /// </summary>
     4        /// <param name="stream">文件流;如:File.OpenWrite(fileCurrentPath)</param>
     5        /// <param name="encoding">字符编码;如:Encoding.UTF8</param>
     6        /// <param name="Text">要写的字符串</param>
     7        /// <returns>bool</returns>
     8        public static bool WriteToStream(FileStream stream, Encoding encoding, string Text)
     9        {
    10            try
    11            {
    12
    13                byte[] content = encoding.GetBytes(Text.Replace("\n", "\r\n"));
    14                stream.SetLength(content.Length);
    15                stream.Write(content, 0, content.Length);
    16                stream.Close();
    17                return true;
    18            }
    19            catch
    20            {
    21                return false;
    22            }
    23        }