Udacity 3.朴素贝叶斯cpp代码解析 3
对类GNB()里面的代码进行分析:构造函数,析构函数,训练函数,预测函数1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160#include "classifier.h"//可以从.h文件里直接加载这个类,在这里对类里的函数进行构造
#include <math.h>
#include <string>
#include <vector>
using Eigen::ArrayXd;
using std::string;
using std::vector;
// Initializes GNB
GNB::GNB() {
/**
* TODO: Initialize GNB, if necessary. May depend on your implementation.
*/
left_means = ArrayXd(4);//定义一个数组,一维四个,因为是s d ds dd, 然后是左转的平均值,就是都是左转的操作的四个变量的平均值,有点像python里的tensor
left_means << 0,0,0,0;//初始化
left_sds = ArrayXd(4);//对左转操作的四个变量的标准差进行计算
left_sds << 0,0,0,0;//初始化标准差
left_prior = 0; // probability of left
keep_means = ArrayXd(4);
keep_means << 0,0,0,0;
keep_sds = ArrayXd(4);
keep_sds << 0,0,0,0;
keep_prior = 0;
right_means = ArrayXd(4);
right_means << 0,0,0,0;
right_sds = ArrayXd(4);
right_sds << 0,0,0,0;
right_prior = 0;
}
GNB::~GNB() {}
void GNB::train(const vector<vector<double>> &data,
const vector<string> &labels) {
/**
* Trains the classifier with N data points and labels.
* @param data - array of N observations
* - Each observation is a tuple with 4 values: s, d, s_dot and d_dot.
* - Example : [[3.5, 0.1, 5.9, -0.02],
* [8.0, -0.3, 3.0, 2.2],
* ...
* ]
* @param labels - array of N labels
* - Each label is one of "left", "keep", or "right".
*
* TODO: Implement the training function for your classifier.
*/
// For each label, compute ArrayXd of means, one for each data class
// (s, d, s_dot, d_dot).
// These will be used later to provide distributions for conditional
// probabilites.
// Means are stored in an ArrayXd of size 4.
float left_size = 0; //数量
float keep_size = 0;
float right_size = 0;
// For each label, compute the numerators of the means for each class
// and the total number of data points given with that label.
for (int i=0; i<labels.size(); ++i) {
if (labels[i] == "left") {
// conversion of data[i] to ArrayXd
left_means += ArrayXd::Map(data[i].data(), data[i].size());//创建一个data[i].size() 大小的ArrayXd,将data[i].data() 投射给这个数组,然后再与left平均值相加,这里还没开始求平均
left_size += 1;
} else if (labels[i] == "keep") {
keep_means += ArrayXd::Map(data[i].data(), data[i].size());
keep_size += 1;
} else if (labels[i] == "right") {
right_means += ArrayXd::Map(data[i].data(), data[i].size());
right_size += 1;
}
}
// Compute the means. Each result is a ArrayXd of means
// (4 means, one for each class)
left_means = left_means/left_size; //这里开始求得平均值
keep_means = keep_means/keep_size;
right_means = right_means/right_size;
// Begin computation of standard deviations for each class/label combination.
ArrayXd data_point;
// Compute numerators of the standard deviations.
for (int i=0; i<labels.size(); ++i) {
data_point = ArrayXd::Map(data[i].data(), data[i].size());
if (labels[i] == "left"){
left_sds += (data_point - left_means)*(data_point - left_means);//计算标准差的分子
} else if (labels[i] == "keep") {
keep_sds += (data_point - keep_means)*(data_point - keep_means);
} else if (labels[i] == "right") {
right_sds += (data_point - right_means)*(data_point - right_means);
}
}
// compute standard deviations
left_sds = (left_sds/left_size).sqrt();//分子除以size 再开根号,就是标准差
keep_sds = (keep_sds/keep_size).sqrt();
right_sds = (right_sds/right_size).sqrt();
//Compute the probability of each label
left_prior = left_size/labels.size();//得到这么多数据里每一个label占的概率
keep_prior = keep_size/labels.size();
right_prior = right_size/labels.size();
}
string GNB::predict(const vector<double> &sample) {
/**
* Once trained, this method is called and expected to return
* a predicted behavior for the given observation.
* @param observation - a 4 tuple with s, d, s_dot, d_dot.
* - Example: [3.5, 0.1, 8.5, -0.2]
* @output A label representing the best guess of the classifier. Can
* be one of "left", "keep" or "right".
*
* TODO: Complete this function to return your classifier's prediction
*/
//从 train之后可以得到 计算高斯贝叶斯的公式参数,公有变量
// Calculate product of conditional probabilities for each label.
double left_p = 1.0;
double keep_p = 1.0;
double right_p = 1.0;
for (int i=0; i<4; ++i) {//从s d s_pot d_pot开始循环计算,最后相乘
left_p *= (1.0/sqrt(2.0 * M_PI * pow(left_sds[i], 2)))
* exp(-0.5*pow(sample[i] - left_means[i], 2)/pow(left_sds[i], 2));// 计算出左转的条件概率并且相乘
keep_p *= (1.0/sqrt(2.0 * M_PI * pow(keep_sds[i], 2)))
* exp(-0.5*pow(sample[i] - keep_means[i], 2)/pow(keep_sds[i], 2));// 计算出保持的条件概率并且相乘
right_p *= (1.0/sqrt(2.0 * M_PI * pow(right_sds[i], 2)))
* exp(-0.5*pow(sample[i] - right_means[i], 2)/pow(right_sds[i], 2));// 计算出右转的条件概率并且相乘
}
// Multiply each by the prior //再乘以这个label发生的概率
left_p *= left_prior;
keep_p *= keep_prior;
right_p *= right_prior;
double probs[3] = {left_p, keep_p, right_p};//将这三个数放到一个数组里,比较求得最大的那个数
double max = left_p;
double max_index = 0;
for (int i=1; i<3; ++i) {//比较得到最大的概率
if (probs[i] > max) {
max = probs[i];
max_index = i;
}
}
return this -> possible_labels[max_index];//返回标签
}