# 7.28（周日）中午之前要：如何使用Matlab或python或其他语言解决机器学习中KNN与GMM的问题？

• 写回答

#### 3条回答默认 最新

• 不在今天 2019-08-02 02:49
关注

1 Data Preparation
pareto displays the first 95% of the cumulative distribution, some elements in y are not displayed.
the smallest number of principal components is 3

2 Unsupervised Clustering on Colorbird Image with Mean Shift
How does your choice of bandw effect the final number of clusters?

Inappropriate bandwidth can cause modes to be merged. And as the picture shown, as the bandwidth size increase, the number of clusters will decrease.
Comment on the reproducibility of these results versus new random selection of vectors
Try two times use different new random selection of vectors
Try 1(Number of clusters) :
Try 2(Number of clusters) :
As we can see from the table, the number of clusters will slightly different, but the Overall trend is the same.
The smallest number of principal components needed to produce a reasonable result is 2.
3 Colorbird Image Segmentation with Unsupervised Clustering: KNN

4 Colorbird Image Segmentation with Unsupervised Clustering: GMM
k-mean segmentation is simpler
segmentation result is not very good compared

with GMM
We can infer it from the tree branch that if some pixels are white label, the surrounding pixels will also become the same label.
GMM segmentation is based on probability, so when we see the tree branch, it will not lead to the same situation.
CODE
1:
% this script imports the color bird image
clear
%
% form the raw features
nc= size(data,2);% column size of image
nr= size(data,1);% row size of image featureSize= nc*nr;%numberoffeaturevectors
%numSamples = round(featureSize*randSampFrac); % random sample size
raw_feature=zeros(featureSize,5); idx=0;
for rowcount=1:nr
for colcount=1:nc
idx=idx+1;
raw_feature(idx,:)=[rowcount colcount double(data(rowcount,colcount,1)) double(data(rowcount,colcount,2)) double(data(rowcount,colcount,3))];
end end

% normalize the feature vectors
[feature,mu,sigma]=zscore(raw_feature);
% find the principal components of the features
[coeff,score,latent] = pca(feature);
%plot pareto
fig2 = figure(2);
pareto(latent)
Itransformed = feature*coeff;
%Plot approximations images
Ipc1 = reshape(Itransformed(:,1),size(data,1),size(data,2)); Ipc2 = reshape(Itransformed(:,2),size(data,1),size(data,2)); Ipc3 = reshape(Itransformed(:,3),size(data,1),size(data,2)); Ipc4 = reshape(Itransformed(:,4),size(data,1),size(data,2)); Ipc5 = reshape(Itransformed(:,5),size(data,1),size(data,2)); figure(1),subplot(232),imshow(Ipc1),title('pc1') figure(1),subplot(233),imshow(Ipc2),title('pc2') figure(1),subplot(234),imshow(Ipc3),title('pc3') figure(1),subplot(235),imshow(Ipc4),title('pc4') figure(1),subplot(236),imshow(Ipc5),title('pc5')
%randomly select 5% of the feature vectors
threeC = Itransformed(:,1:3);
rand_index = randperm(size(threeC,1));
random_part_index = rand_index(round(1:size(threeC,1)*0.05)); random_part = threeC(random_part_index,:);

%mean shift
i=1;
for bandwidth=0.1:0.05:2;
x(i)=bandwidth;
random_part = transpose(random_part);
clustCent = HGMeanShiftCluster(random_part,bandwidth,'gaussian'); Ncluster=size(clustCent,2);
y(i)=Ncluster;
i=i+1;
end
%plot
fig3 = figure(3); plot(x(1:2:40),y(1:2:40))
mean shift:
function [shiftedClusterCenter] = mean_shift(clusterCenter,bandw,weight)
% this computes one iteration of the mean shift algorithm
% using a Gaussian kernel with standard deviation given by 'bandw' shiftedClusterCenter=clusterCenter;
% form averaging distributions, one per cluster center, in columns of 'distMatrix'
distMatrix = exp(-pdist2(clusterCenter,clusterCenter)/(2*bandw^2)); distMatrix = (weight*ones(1,size(distMatrix,2))).*distMatrix; normalization = sum(distMatrix,1);
distMatrix = distMatrix ./ (ones(size(distMatrix,1),1)*normalization); for count = 1:size(shiftedClusterCenter,1)
shiftedClusterCenter(count,:) = sum(distMatrix(:,count)*ones(1,size(clusterCenter,2)).*clusterCenter,

1);
end
end
gauss function:
function out = gaussfun(x,d,bandWidth) % approximate Gaussian kernel
% x - value
% d - location
% bandWidth - band width of the kernel
% out - contribtion to the kernel mean %
% Copyright 2015 Han Gong, University of East Anglia
ns = 1000; % resolution of guassian approximation xs = linspace(0,bandWidth,ns+1); % approximate ticks kfun = exp(-(xs.^2)/(2*bandWidth^2));
w = kfun(round(d/bandWidth*ns)+1);
w = w/sum(w); % normalise
out = sum( bsxfun(@times, x, w ), 2 );
end
HGmeanshiftcluster:
function [clustCent,data2cluster,cluster2dataCell] = HGMeanShiftCluster(dataPts,bandWidth,kernel,plotFlag); %HGMEANSHIFTCLUSTER performs MeanShift Clustering of data using a chosen kernel
%
% ---INPUT---
% dataPts
% bandWidth
% kernel

• input data, (numDim x numPts)
• is bandwidth parameter (scalar)
• kernel type (flat or gaussian)

% plotFlag
% ---OUTPUT---
% clustCent
numClust)
% data2cluster
(numPts)
% cluster2dataCell - for every cluster which points are in it (numClust)
%
% Copyright 2015 Han Gong, University of East Anglia
% Copyright 2006 Bart Finkston %
% MeanShift first appears in
% K. Funkunaga and L.D. Hosteler, "The Estimation of the Gradient of a
% Density Function, with Applications in Pattern Recognition"
if nargin < 2
error('no bandwidth specified')
end
if nargin < 4
plotFlag = true;
plotFlag = false;
end
%**** Initialize stuff ***
[numDim,numPts] = size(dataPts); numClust = 0;
bandSq = bandWidth^2; initPtInds = 1:numPts;
maxPos = max(dataPts,[],2); % biggest size in each dimension minPos = min(dataPts,[],2); % smallest size in each dimension

• display output if 2 or 3 D (logical)
• is locations of cluster centers (numDim x
• for every data point which cluster it belongs to

boundBox = maxPos-minPos; % bounding box size
sizeSpace = norm(boundBox); % indicator of size of data space stopThresh = 1e-3*bandWidth; % when mean has converged
clustCent = []; % center of clust
beenVisited= false(1,numPts); % track if a points been seen already numInitPts = numPts; % number of points to posibaly use as initilization points
clusterVotes = zeros(1,numPts,'uint16'); % used to resolve conflicts on cluster membership
clustMembsCell = [];
%*** mean function with the chosen kernel ****
switch kernel
case 'flat' % flat kernel
kmean = @(x,dis) mean(x,2);
case 'gaussian' % approximated gaussian kernel kmean = @(x,d) gaussfun(x,d,bandWidth);
otherwise
error('unknown kernel type'); end
while numInitPts
tempInd = ceil( (numInitPts-1e-6)*rand); % pick a random seed
point
stInd = initPtInds(tempInd); % use this point as start of mean myMean = dataPts(:,stInd); % intilize mean to this points
location
myMembers = []; % points that will get added to this cluster thisClusterVotes = zeros(1,numPts,'uint16'); % used to resolve
conflicts on cluster membership
while true %loop untill convergence
sqDistToAll = sum(bsxfun(@minus,myMean,dataPts).^2); % dist

squared from mean to all points still active
inInds = find(sqDistToAll < bandSq); % points within bandWidth
myOldMean = myMean; % save the old mean
myMean = kmean(dataPts(:,inInds),sqrt(sqDistToAll(inInds))); % compute the new mean
myMembers = [myMembers inInds]; % add any point within bandWidth to the cluster
beenVisited(myMembers) = true; % mark that these points have been visited
%*** plot stuff ****
if plotFlag figure(12345),clf,hold on if numDim == 2
plot(dataPts(1,:),dataPts(2,:),'.') plot(dataPts(1,myMembers),dataPts(2,myMembers),'ys') plot(myMean(1),myMean(2),'go') plot(myOldMean(1),myOldMean(2),'rd')
pause(0.1);
end end
%**** if mean doesn't move much stop this cluster ***
if norm(myMean-myOldMean) < stopThresh %check for merge posibilities mergeWith = 0;
for cN = 1:numClust
distToOther = norm(myMean-clustCent(:,cN)); % distance to old clust max

if distToOther < bandWidth/2 % if its within bandwidth/2 merge new and old
mergeWith = cN;
break; end
end
if mergeWith > 0 % something to merge
nc = numel(myMembers); % current cluster's member
no = numel(clustMembsCell{mergeWith}); % old cluster's
nw = [nc;no]/(nc+no); % weights for merging mean
number
member number
clustMembsCell{mergeWith} = unique([clustMembsCell{mergeWith},myMembers]); %recordwhichpoints inside
clustCent(:,mergeWith) = myMean*nw(1) + myOldMean*nw(2);
else % it's a new cluster
numClust = numClust+1; %increment clusters clustCent(:,numClust) = myMean; %record the mean clustMembsCell{numClust} = myMembers; %store my members clusterVotes(numClust,:) = thisClusterVotes; % creates
a new vote
end
break; end
end

initPtInds = find(~beenVisited); % we can initialize with any of the points not yet visited
numInitPts = length(initPtInds); %number of active points in set end
[~,data2cluster] = max(clusterVotes,[],1); % a point belongs to the cluster with the most votes
%*** If they want the cluster2data cell find it for them
if nargout > 2
cluster2dataCell = cell(numClust,1); for cN = 1:numClust
myMembers = find(data2cluster == cN);
cluster2dataCell{cN} = myMembers;
end end
end
KNN:
I = double(imread('42049_colorBird.jpg')); %lab_I = rgb2lab(I);
ab = I(:,:,1:3);
ab = im2single(ab);
%K=2,3,4,5
K=2;
% repeat the clustering 3 times to avoid local minima pixel_labels = imsegkmeans(ab,K,'NumAttempts',3); imshow(pixel_labels,[])
title('Image Labeled by Cluster 2');

GMM:
clear; clc;
[X1,X2]=generateData();
K=3;
[a_init,mu_init,sigma_init]=initPara(X1); [a_GMM,mu_GMM,sigma_GMM]=GMM(X1,a_init,mu_init,sigma_init); [centroids,~] = Kmeans( X1,mu_init,K);
disp('kmeans1')
disp(centroids);
disp('mean-gaussian1');
disp('nmean');
disp(mu_GMM);
disp('matri')
disp(sigma_GMM);
disp(['mixn',num2str(a_GMM)]); [a_Kea_GMM,mu_Kea_GMM,sigma_Kea_GMM]=GMM(X1,a_init,centroids,sigma_in it);
disp('mean-gau2');
disp('meani');
disp(mu_Kea_GMM);
disp('nmatr')
disp(sigma_Kea_GMM);
disp(['mixn',num2str(a_Kea_GMM)]);
[a_init,mu_init,sigma_init]=initPara(X2); [a_fin,mu_fin,sigma_GMM]=GMM(X2,a_init,mu_init,sigma_init); [centroids,~] = Kmeans( X2,mu_init,K);

disp('meanx21')
disp(centroids);
disp('GMMg2');
disp('mean2');
disp(mu_GMM);
disp('mat')
disp(sigma_GMM);
disp(['matn2',num2str(a_GMM)]); [a_Kea_GMM,mu_Kea_GMM,sigma_Kea_GMM]=GMM(X2,a_init,centroids,sigma_in it);
disp('kmeansco2');
disp('mean2a');
disp(mu_Kea_GMM);
disp('matco2') disp(sigma_Kea_GMM); disp(['matn2',num2str(a_Kea_GMM)]);
function [X1,X2]=generateData()
mu=[1,1;4,4;8,1]; sigma=[2,0;0,2];
a1=[1/3,1/3,1/3]; a2=[0.6,0.3,0.1];
N=1000; rand1=randsrc(N,1,[[1,2,3];a1]); rand2=randsrc(N,1,[[1,2,3];a2]); X1=[];
X2=[];
mean1=[];
for i=1:size(a1,2)

X1_temp=mvnrnd(mu(i,:),sigma,length(find(rand1==i))); X1=[X1;X1_temp];
subplot(1,2,1);
plot(X1_temp(:,1),X1_temp(:,2),'+');
title('X1');
legend('m1','m2','m3');
xlabel('x');ylabel('y');
hold on;
mean1=[mean1;mean(X1_temp)]; cov1(:,:,i)=cov(X1_temp); X2_temp=mvnrnd(mu(i,:),sigma,length(find(rand2==i))); X2=[X2;X2_temp];
subplot(1,2,2); plot(X2_temp(:,1),X2_temp(:,2),'*'); title('X2'); legend('m1','m2','m3'); xlabel('x');ylabel('y');
hold on;
end
disp(['mean-ar1']); disp(mean1); disp(['mean-ar2']); disp(cov1);
end
function [ a,mu,sigma ] = initPara(X)
[m,n]=size(X);
r=randperm(m);
mu=X(r(1:3),:);
a=[1/3,1/3,1/3];
sigma=[1,0;0,1];
end

function [a_fin,mu_fin,sigma_fin]=GMM(X1,a_init,mu_init,sigma_init)
K=size(a_init,2); [M,N]=size(X1);
a=a_init; mu=mu_init; sigma=sigma_init; px=zeros(M,K); thre=1e-5; LLD_pro=inf; while true
for i=1:K px(:,i)=mvnpdf(X1,mu(i,:),sigma);
end
pGramm=repmat(a,M,1).*px; pGramm=pGramm./repmat(sum(pGramm,2),1,K); Nk=sum(pGramm,1); %1*K mu=diag(1./Nk)*pGramm'*X1;
for kk=1:K
Xshift=X1-repmat(mu(kk,:),M,1);
sigma(:,:,kk)=(Xshift'*diag(pGramm(:,kk))*Xshift)/Nk(:,kk);
end
a=Nk/M; LLD=sum(log(a*px')); if (LLD-LLD_pro)<thre
break; end
LLD_pro=LLD;

end
a_fin=a;
mu_fin=mu;
sigma_fin=sigma;
end

本回答被题主选为最佳回答 , 对您是否有帮助呢?
评论

#### 悬赏问题

• ¥15 求购HI3519AV100开发板
• ¥15 请问1553 RT怎么测试，没有BC有方法吗
• ¥100 业务编程如何选择学习方向和内容？
• ¥15 wamp3.3.5安装完成后图标正常显示绿色，鼠标左右键点击图标均无反应。求解决方法。
• ¥15 鼠标点击的这条记录了什么？
• ¥15 在写pid调速的程序时，电机始终维持最大速度
• ¥15 【有偿】调用DXGI截图初始化失败，提示0xf 887a0004
• ¥15 请问如何查看手机root记录？
• ¥15 商城小程序订单号重复
• ¥15 学校优化算法sbo和蚁群算法怎么结合