-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.m
277 lines (174 loc) · 7.18 KB
/
main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
%{
get the original GMM parameters.
Inputs: None
Outputs:
N - The number of vectors to sample from the GMM. This is a scalar.
comp_PMF_true - A 1 x M vector of probabilities representing the
component probabilities of the GMM. M is the number of components in the
GMM
mu_true - A M x d array containing the invidual mean vectors for each
mixture component. d is the dimensionality of each mean vector.
Each row of this array is a mean vector.
Sigma_true - A d x d x M array containing the covariance matrices for each
mixture component. Each page (3rd dimension) of this array contains an
individual covariance matrix.
%}
[N,comp_PMF_true,mu_true,Sigma_true] = get_GMM_parameters();
%{
sample N random vectors from the GMM.
Inputs:
N - The number of vectors to sample from the GMM. This is a scalar.
comp_PMF_true - A 1 x M vector of probabilities representing the
component probabilities of the GMM. M is the number of components in the
GMM
mu_true - A M x d array containing the invidual mean vectors for each
mixture component. Each row of this array is a mean vector.
Sigma_true - A d x d x M array containing the covariance matrices for each
mixture component. Each page (3rd dimension) of this array contains an
individual covariance matrix.
Outputs:
X - A N x d array containing the random vectors that were sampled from the
GMM. N is the number of vectors to be sampled and d is the dimensionality
of each random vector.
comp_idx - A N x 1 array containing the index of the component that the
corresponding random vector was sampled from. This can be used to estimate
the component probabilities so that they are compared to comp_PMF_true.
For example: sum(comp_idx == 1)/N
Note that it is assumed that beyond this point, only the samples in X
are available, and that the true parameters above are not.
%}
[X,comp_idx] = sample_GMM(N,comp_PMF_true,mu_true,Sigma_true);
%{
get the initial estimates of the GMM parameters.
Inputs: None
Outputs:
comp_PMF - A 1 x M vector of probabilities representing the initial
guesses for the component probabilities of the GMM. M is the number of
components in the GMM.
mu - A M x d array containing the initial guesses for the invidual mean
vectors for each mixture component. d is the dimensionality of each mean
vector. Each row of this array is a mean vector.
Sigma - A d x d x M array containing the initial guesses for the
covariance matrices for each mixture component. Each page (3rd dimension)
of this array contains an individual covariance matrix.
%}
[comp_PMF,mu,Sigma] = get_init_est();
% store all parameter estimates for future plotting
comp_PMF_save = [];
mu_save = [];
Sigma_save = [];
%{
initialize the value that will be used to detect the convergence of the EM
algorithm. This value was chosen to start the while loop later in the
program. epsilon will then be computed using the old and new parameter
estimates.
Note that it is assumed here that the true parameter values are not
available. If they were available, then epsilon could have been initialized
as follows:
epsilon = sum(abs(comp_PMF_true - comp_PMF),'all') + ...
sum(abs(mu_true - mu),'all') + ...
sum(abs(Sigma_true - Sigma),'all');
%}
epsilon = 10;
% record epsilons for future plotting
epsilon_save = [];
% count the number of iterations required for convergence
num_iter = 0;
%{
while the sum of absolute differences between the old parameters and the
new parameters is greater than a certain threshold
%}
while epsilon > 1e-5
num_iter = num_iter + 1;
epsilon_save = [epsilon_save,epsilon];
comp_PMF_save = cat(3,comp_PMF_save,comp_PMF);
mu_save = cat(3,mu_save,mu);
Sigma_save = cat(4,Sigma_save,Sigma);
% store the old parameters for future comparison
comp_PMF_old = comp_PMF;
mu_old = mu;
Sigma_old = Sigma;
%{
initialize the array that will store each of the gamma_k vectors for
each component. The gamma_k vector contains values for gamma_jk for
all j
%}
gamma_k = zeros(length(X),length(comp_PMF));
%{
initialize the array that will store each of the N_k values for each
component
%}
N_k = zeros(1,length(comp_PMF));
%{
compute parameter estimates for each component in the GMM
%}
for k = 1:length(comp_PMF)
%{
get all gamma_jk values for all j and the k^th component.
This is a N x 1 array, where N is the number of sampled vectors
from the GMM. Store this column vector in the k^th column of the
gamma_k array.
%}
gamma_k(:,k) = get_gamma_k(k,X,comp_PMF,mu,Sigma);
%{
compute N_k for the k^th component by summing up all gamma_jk
%}
N_k(k) = sum(gamma_k(:,k));
% compute the mean vector estimate for the k^th component
mu(k,:) = (1/N_k(k))*sum(X.*gamma_k(:,k),1);
%{
compute the covariance matrix estimate for the k^th component of
the GMM as an inner product (sum) of outer products, which is
simply matrix multiplication.
transpose((X - mu(k,:)).*gamma_k(:,k)) is a d x N matrix, where d
is the dimensionality of the sampled vectors and N is the number
of sampled vectors, which means that X - mu(k,:) is a N x d
matrix. This means that the multiplication of these two matrices
yields a d x d matrix. This is equivalent to the sum of the
individual outer products resulting from the multiplication of the
column vectors in the d x N matrix by the row vectors in the N x d
matrix.
%}
Sigma(:,:,k) = (1/N_k(k)) *...
(transpose((X - mu(k,:)).*gamma_k(:,k)) *...
(X - mu(k,:)));
% compute the component probability estimates
comp_PMF(k) = N_k(k)/N;
end
% check for convergence by comparing the old parameter values to the
% new parameter values
epsilon = sum(abs(comp_PMF_old - comp_PMF),'all') + ...
sum(abs(mu_old - mu),'all') + ...
sum(abs(Sigma_old - Sigma),'all');
end
% plot epsilon against iteration number
figure
plot(epsilon_save)
set(gcf,'color','w')
xlim([1,inf])
xlabel('Iteration Number')
ylabel('Sum of absolute differences')
%{
create an animation showing the shape of the PDF for different GMM's for
each iteration of the EM algorithm. This shows how the EM algorithm
converges. Additionally, save this animation as 'GMM.gif' in your working
directory.
%}
frames = save_GIF(X,num_iter,mu_save,Sigma_save,comp_PMF_save);
% play this animation 3 times at 2 frames per second
figure
movie(gcf,frames,3,2)
figure
plot(X(:,1),X(:,2),'+')
set(gcf,'color','w')
lim = axis;
axis([lim(1)-1,lim(2)+1,lim(3)-1,lim(4)+1])
xlabel('$x_1$','Interpreter','latex')
ylabel('$x_2$','Interpreter','latex')
% create a gmdistribution object and get its PDF
gm = gmdistribution(mu,Sigma,comp_PMF);
gm_PDF = @(x,y)reshape(pdf(gm,[x(:) y(:)]),size(x));
% plot the contours for the PDF of the GMM on the same figure
hold on
fc = fcontour(gm_PDF);
hold off