小弟是新人,但被要求一定要三天内把这段代码给看懂,求各位前辈帮助!
这好像是个贝叶斯的分类算法,里面的一些具体的语句弄不明白
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;namespace Dianping
{
    class Category
    {
        public int type = 0;                            // 每个分类对应的现有价值
        public int num = 0;                             // 记录此分类出现的次数
        public int[,] item = new int[6,3];              // 记录各个属性在此分类中的出现次数
        public double probability = 0;                  // 记录此分类的概率
        public double[,] prob_item = new double[6,3];   // 记录此分类各属性的概率
    }    class Bayes
    {
        public Category[] category = new Category[9];      // 共有9种分类结果,对应3种现有价值
        int totalRecordNum = 0;                            // 总的记录数        public Category[] currentValue = new Category[3];       
        }        /// <summary>
        /// 根据给定的训练数据集,训练Bayes分类器
        /// </summary>
        /// <param name="tRecords">训练数据集</param>
        public void TrainBayes(List<string> tRecords)
        {
            // 统计各个分类和各个属性的出现次数
            Statistics(tRecords);
            // 计算各个分类和各个属性的概率
            CalcProb();
        }        /// <summary>
        /// 统计某一个训练集中各个分类及该分类下各属性出现的次数
        /// </summary>
        /// <param name="tRecords">选定的训练样本集记录列表</param>
        private void Statistics(List<string> tRecords)
        {
            totalRecordNum = tRecords.Count();
            string[] tItems = new string[] { };
            for (int i = 0; i < totalRecordNum; i++)
            {
                tItems = tRecords[i].Split(',');
                // tItems属性顺序: gender,age,education,occupation,income,repast_reason,percapita_cons,cons_times
                if (tItems[6] == "100以下")     // 人均消费额100以下
                {
                    if (tItems[7] == "1-2次")
                    {
                        category[0].num++;
                        StatisticItemNum(0,tItems);                        currentValue[0].num++;
                        StatisticItemNum2(0, tItems);
                        
                    }
                    else if (tItems[7] == "2-4次")
                    {
                        category[1].num++;
                        StatisticItemNum(1, tItems);                        currentValue[0].num++;
                        StatisticItemNum2(0, tItems);
                    }
                    else
                    {
                        category[2].num++;
                        StatisticItemNum(2, tItems);                        currentValue[0].num++;
                        StatisticItemNum2(0, tItems);
                    }
                }
                else if (tItems[6] == "100-200")     // 人均消费额100-200
                {                    if (tItems[7] == "1-2次")
                    {
                        category[3].num++;
                        StatisticItemNum(3, tItems);                        currentValue[0].num++;
                        StatisticItemNum2(0, tItems);
                    }
                    else if (tItems[7] == "2-4次")
                    {
                        category[4].num++;
                        StatisticItemNum(4, tItems);                        currentValue[1].num++;
                        StatisticItemNum2(1, tItems);
                    }
                    else
                    {
                        category[5].num++;
                        StatisticItemNum(5, tItems);                        currentValue[1].num++;
                        StatisticItemNum2(1, tItems);
                    }
                }
                else     // 人均消费额200以上
                {                    if (tItems[7] == "1-2次")
                    {
                        category[6].num++;
                        StatisticItemNum(6, tItems);                        currentValue[0].num++;
                        StatisticItemNum2(0, tItems);
                    }
                    else if (tItems[7] == "2-4次")
                    {
                        category[7].num++;
                        StatisticItemNum(7, tItems);                        currentValue[1].num++;
                        StatisticItemNum2(1, tItems);
                    }
                    else
                    {
                        category[8].num++;
                        StatisticItemNum(8, tItems);                        currentValue[2].num++;
                        StatisticItemNum2(2, tItems);
                    }
                }
            }
        }        /// <summary>
        /// 统计一条某一分类下的记录中各个属性的出现次数
        /// </summary>
        /// <param name="cid">分类</param>
        /// <param name="items">记录</param>
        private void StatisticItemNum(int cid,string[] items)
        {
            int index = 0;
            for (int i = 0; i < 6; i++)
            {
                // MatchItem函数实现了将文字描述的属性值转化为数字(这个数字同时对应着customerProps数组的第二维下标)
                index = Dianping.MatchItem((Dianping.CustomerProps)(i + 1), items[i]);
                category[cid].item[i,index]++;            }
        }        private void StatisticItemNum2(int cid, string[] items)
        {
            int index = 0;
            for (int i = 0; i < 6; i++)
            {
                // MatchItem函数实现了将文字描述的属性值转化为数字(这个数字同时对应着customerProps数组的第二维下标)
                index = Dianping.MatchItem((Dianping.CustomerProps)(i + 1), items[i]);
                currentValue[cid].item[i, index]++;
            }
        }        /// <summary>
        /// 计算各个属性在各个分类中出现的先验概率
        /// </summary>
        private void CalcProb()
        {
            for (int i = 0; i < 9; i++)
            {
                category[i].probability = (double)category[i].num / totalRecordNum;
                for (int j = 0; j < 6; j++)
                {
                    category[i].prob_item[j, 0] = (double)category[i].item[j, 0] / category[i].num;
                    category[i].prob_item[j, 1] = (double)category[i].item[j, 1] / category[i].num;
                    category[i].prob_item[j, 2] = (double)category[i].item[j, 2] / category[i].num;
                }
            }
            for (int i = 0; i < 3; i++)
            {
                currentValue[i].probability = (double)currentValue[i].num / totalRecordNum;
                for (int j = 0; j < 6; j++)
                {
                    currentValue[i].prob_item[j, 0] = (double)currentValue[i].item[j, 0] / currentValue[i].num;
                    currentValue[i].prob_item[j, 1] = (double)currentValue[i].item[j, 1] / currentValue[i].num;
                    currentValue[i].prob_item[j, 2] = (double)currentValue[i].item[j, 2] / currentValue[i].num;
                }
            }
        }     

解决方案 »

  1.   

       /// <summary>
            /// 给定一条属性记录,得到分类结果
            /// </summary>
            /// <param name="record">记录数组</param>
            /// <returns>分类结果</returns>
            public int Classify(string[] record)
            {           
                double[] pro_class = new double[9];     // P(class[i]|record)    
                int max_class = 0;                      // 记录后验概率最大的分类索引号            for (int i = 0; i < 9; i++)
                {
                    pro_class[i] = 1;
                    // record = {item[0],...,item[6]}
                    // P(class[i]|record) = P(record|class[i])P(class[i])/P(record) = P(class[i].item[0])*...*P(class[i].item[6])*P(class[i])/P(record)
                    // 由于P(record)都一样,因此可以只比较P(record|class[i])P(class[i])的值
                    for (int j = 0; j < record.Length; j++)
                    {
                        int index = Dianping.MatchItem((Dianping.CustomerProps)(j + 1), record[j]);
                        pro_class[i] *= category[i].prob_item[j, index];                }
                    pro_class[i] *= category[i].probability;        // 到这里得到后验概率P(class[i]|record
                    if (pro_class[i] > pro_class[max_class])        // 选择后验概率最大的分类,记在max_class变量中
                        max_class = i;
                }
                return max_class;
            }        // 直接用现有价值高、中、低分类
            public int Classify2(string[] record)
            {
                double[] pro = new double[3];
                int max = 0;
                for (int i = 0; i < 3; i++)
                {
                    pro[i] = 1;
                    // record = {item[0],...,item[6]}
                    // P(class[i]|record) = P(record|class[i])P(class[i])/P(record) = P(class[i].item[0])*...*P(class[i].item[6])*P(class[i])/P(record)
                    // 由于P(record)都一样,因此可以只比较P(record|class[i])P(class[i])的值
                    for (int j = 0; j < record.Length && j<6; j++)
                    {
                        int index = Dianping.MatchItem((Dianping.CustomerProps)(j + 1), record[j]);
                        pro[i] *= currentValue[i].prob_item[j, index];                    
                    }                pro[i] *= currentValue[i].probability;                if (pro[i] > pro[max])
                        max = i;
                }
                return max;
            }        /// <summary>
            /// 获取当前价值的结果:人均消费额和月消费次数
            /// </summary>
            /// <param name="max_class"></param>
            /// <returns></returns>
            public int[] Consumption(int max_class)
            {
                int[] consumption = new int[2];
                switch (max_class)
                {
                    case 0:
                        consumption[0] = 0;
                        consumption[1] = 0;
                        break;
                    case 1:
                        consumption[0] = 0;
                        consumption[1] = 1;
                        break;
                    case 2:
                        consumption[0] = 0;
                        consumption[1] = 2;
                        break;
                    case 3:
                        consumption[0] = 1;
                        consumption[1] = 0;
                        break;
                    case 4:
                        consumption[0] = 1;
                        consumption[1] = 1;
                        break;
                    case 5:
                        consumption[0] = 1;
                        consumption[1] = 2;
                        break;
                    case 6:
                        consumption[0] = 2;
                        consumption[1] = 0;
                        break;
                    case 7:
                        consumption[0] = 2;
                        consumption[1] = 1;
                        break;
                    case 8:
                        consumption[0] = 2;
                        consumption[1] = 2;
                        break;
                    default:
                        break;
                }
                return consumption;
            }        #region 以下用于测试
            private void test(List<string> tRecords)
            {
                int result = 0;
                int correct = 0;
                int error = 0;            FileStream fs1 = new FileStream(@"C:\Users\wucs32\Documents\Visual Studio 2008\Projects\Dianping\Dianping\bin\Debug\App_Data\test2.txt", FileMode.OpenOrCreate, FileAccess.Write);
                StreamWriter sw = new StreamWriter(fs1, Encoding.Unicode);
                
                
                FileStream fs = new FileStream(@"C:\Users\wucs32\Documents\Visual Studio 2008\Projects\Dianping\Dianping\bin\Debug\App_Data\result2.txt", FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
                StreamReader sr = new StreamReader(fs, Encoding.Unicode);
                string tRecord = String.Empty;
                string s = "";
                
                string[] record = new string[] { };
                for (int i = 0; i < tRecords.Count; i++)
                {
                    record = tRecords[i].Split(',');
                    result = Classify2(record);
                    s = sr.ReadLine();
                    if (s == result.ToString())
                        correct++;
                    else
                        error++;
                    sw.WriteLine(s + " " + result.ToString());
                }            sw.WriteLine((double)correct / (correct+error));
                sr.Close();
                fs.Close();            sw.Flush();
                sw.Close();
                fs.Close();
            }
            #endregion
        }
      

  2.   

    public Category[] category = new Category[9];   currentValue[0].num++;
      StatisticItemNum2(0, tItems); int index = Dianping.MatchItem((Dianping.CustomerProps)(j + 1), record[j]);
    这些语句是什么意思? 
     
      

  3.   

    // MatchItem函数实现了将文字描述的属性值转化为数字(这个数字同时对应着customerProps数组的第二维下标这不都告诉你了吗,但是你这里好像没有这个方法的具体实现,是不是代码没copy全
      

  4.   


    eg:public Category[] category = new Category[9];
     category在最上面好像是个class吧,那为什么Category[9]; 这个是数组还是类?
    eg:StatisticItemNum2(0, tItems);括号里面这两个参数是什么意思
    eg:int index = Dianping.MatchItem((Dianping.CustomerProps)(j + 1), record[j]);这个函数是派什么用的为什么参数是((Dianping.CustomerProps)(j + 1), record[j]);新人问题很多,希望大家能帮帮忙
      
      

  5.   

      不好意思,
              static public int MatchItem(CustomerProps props, string item)
            {            
                int index = (int)props-1;
                int i = 0;
                for (i=0; i<3;i++)
                {
                    if (customerProps[index, i] == item)
                        return i;
                }
                return 0;
            }
      

  6.   

     
    public class Customer
        {
            public int id;                 // 编号
            public int gender;             // 性别
            public int age;                // 年龄
            public int education;          // 学历
            public int occupation;         // 职业
            public int income;             // 月收入
            public int repast_reason;      // 就餐原因        
            public int percapita_cons;     // 人均消费额
            public int cons_times;         // 月消费次数
            public int firstchoice;        // 客户是否将大众点评网作为您的首选
            public int rec_restaurant;     // 客户是否经常去大众点评网推荐的特约餐厅就餐
            public int intro_friend;       // 客户是否向周围人介绍过大众点评网
            public int rec_member;         // 客户是否成功地推荐过别人成为点评网会员
            public int usefulness;         // 客户是否认为餐厅的介绍信息和评论有用
            public int rationality;        // 客户觉得网站的积分制度和活动是否合理
            public int comment;            // 客户是否经常提交评论更新信息
            public int participation;      // 客户是否乐于参加网站的活动
            public int cons_amount;        // 客户最近半年累积消费金额
            public int current_value;      // 客户的当前价值
            public int history_value;      // 客户的历史价值
            public int potential_value;    // 客户的潜在价值
            public int value;              // 客户的价值分类                
        }
    public enum CustomerProps
            {
                GENDER = 1,
                AGE = 2,
                EDUCATION = 3,
                OCCUPATION = 4,
                INCOME = 5,
                REPAST_REASON = 6,            
                PERCAPITA_CONS = 7,
                CONS_TIMES = 8,
                FIRSTCHOICE = 9,
                REC_RESTAURANT = 10,
                INTRO_FRIEND = 11,
                REC_MEMBER = 12,
                USEFULNESS = 13,
                RATIONALITY = 14,
                COMMENT = 15,
                PARTICIPATION = 16,
                CONS_AMOUNT = 17,            CURRENT_VALUE = 101,
                POTENTIAL_VALUE = 102,
                HISTORY_VALUE = 103,
                VALUE = 104
            }