我原来的代码如下所示:
StreamReader sr = new StreamReader(path, Encoding.Default);
while (!sr.EndOfStream)
{
string line = sr.ReadLine().Trim();
string[] wordAndFrequency = Regex.Split(line, @"\s+",RegexOptions.ExplicitCapture);
if (!dic.ContainsKey(wordAndFrequency[0]))
{
if (wordAndFrequency.Length == 1)
{
dic.Add(wordAndFrequency[0], 0);//若有词无词频,默认词频为0
}
else
{
dic.Add(wordAndFrequency[0], Convert.ToInt64(wordAndFrequency[1]));
}
}
}
sr.Close();
StreamReader sr = new StreamReader(path, Encoding.Default);
while (!sr.EndOfStream)
{
string line = sr.ReadLine().Trim();
string[] wordAndFrequency = Regex.Split(line, @"\s+",RegexOptions.ExplicitCapture);
if (!dic.ContainsKey(wordAndFrequency[0]))
{
if (wordAndFrequency.Length == 1)
{
dic.Add(wordAndFrequency[0], 0);//若有词无词频,默认词频为0
}
else
{
dic.Add(wordAndFrequency[0], Convert.ToInt64(wordAndFrequency[1]));
}
}
}
sr.Close();
string text = srr.ReadToEnd();
srr.Close();
MemoryStream ms = new MemoryStream(Encoding.GetEncoding("GB2312").GetBytes(text));//放入内存流,以便逐行读取
using (StreamReader sr = new StreamReader(ms))
{
while (sr.Peek() > -1)
{
string line = sr.ReadLine().Trim();
...
}
}
我这样弄了以后是乱码,请教各位,该如何解决?
gb2312改utf-8好了