3条回答 默认 最新
- 不在今天 2019-08-02 02:49关注
1 Data Preparation
pareto displays the first 95% of the cumulative distribution, some elements in y are not displayed.
the smallest number of principal components is 32 Unsupervised Clustering on Colorbird Image with Mean Shift
How does your choice of bandw effect the final number of clusters?Inappropriate bandwidth can cause modes to be merged. And as the picture shown, as the bandwidth size increase, the number of clusters will decrease.
Comment on the reproducibility of these results versus new random selection of vectors
Try two times use different new random selection of vectors
Try 1(Number of clusters) :
Try 2(Number of clusters) :
As we can see from the table, the number of clusters will slightly different, but the Overall trend is the same.
The smallest number of principal components needed to produce a reasonable result is 2.
3 Colorbird Image Segmentation with Unsupervised Clustering: KNN4 Colorbird Image Segmentation with Unsupervised Clustering: GMM
k-mean segmentation is simpler
segmentation result is not very good comparedwith GMM
We can infer it from the tree branch that if some pixels are white label, the surrounding pixels will also become the same label.
GMM segmentation is based on probability, so when we see the tree branch, it will not lead to the same situation.
CODE
1:
% this script imports the color bird image
clear
%
data=imread('42049_colorBird.jpg'); figure(1),subplot(231),imshow(data),title('42049_colorBird.jpg')
% form the raw features
nc= size(data,2);% column size of image
nr= size(data,1);% row size of image featureSize= nc*nr;%numberoffeaturevectors
%numSamples = round(featureSize*randSampFrac); % random sample size
raw_feature=zeros(featureSize,5); idx=0;
for rowcount=1:nr
for colcount=1:nc
idx=idx+1;
raw_feature(idx,:)=[rowcount colcount double(data(rowcount,colcount,1)) double(data(rowcount,colcount,2)) double(data(rowcount,colcount,3))];
end end% normalize the feature vectors
[feature,mu,sigma]=zscore(raw_feature);
% find the principal components of the features
[coeff,score,latent] = pca(feature);
%plot pareto
fig2 = figure(2);
pareto(latent)
Itransformed = feature*coeff;
%Plot approximations images
Ipc1 = reshape(Itransformed(:,1),size(data,1),size(data,2)); Ipc2 = reshape(Itransformed(:,2),size(data,1),size(data,2)); Ipc3 = reshape(Itransformed(:,3),size(data,1),size(data,2)); Ipc4 = reshape(Itransformed(:,4),size(data,1),size(data,2)); Ipc5 = reshape(Itransformed(:,5),size(data,1),size(data,2)); figure(1),subplot(232),imshow(Ipc1),title('pc1') figure(1),subplot(233),imshow(Ipc2),title('pc2') figure(1),subplot(234),imshow(Ipc3),title('pc3') figure(1),subplot(235),imshow(Ipc4),title('pc4') figure(1),subplot(236),imshow(Ipc5),title('pc5')
%randomly select 5% of the feature vectors
threeC = Itransformed(:,1:3);
rand_index = randperm(size(threeC,1));
random_part_index = rand_index(round(1:size(threeC,1)*0.05)); random_part = threeC(random_part_index,:);%mean shift
i=1;
for bandwidth=0.1:0.05:2;
x(i)=bandwidth;
random_part = transpose(random_part);
clustCent = HGMeanShiftCluster(random_part,bandwidth,'gaussian'); Ncluster=size(clustCent,2);
y(i)=Ncluster;
i=i+1;
end
%plot
fig3 = figure(3); plot(x(1:2:40),y(1:2:40))
mean shift:
function [shiftedClusterCenter] = mean_shift(clusterCenter,bandw,weight)
% this computes one iteration of the mean shift algorithm
% using a Gaussian kernel with standard deviation given by 'bandw' shiftedClusterCenter=clusterCenter;
% form averaging distributions, one per cluster center, in columns of 'distMatrix'
distMatrix = exp(-pdist2(clusterCenter,clusterCenter)/(2*bandw^2)); distMatrix = (weight*ones(1,size(distMatrix,2))).*distMatrix; normalization = sum(distMatrix,1);
distMatrix = distMatrix ./ (ones(size(distMatrix,1),1)*normalization); for count = 1:size(shiftedClusterCenter,1)
shiftedClusterCenter(count,:) = sum(distMatrix(:,count)*ones(1,size(clusterCenter,2)).*clusterCenter,1);
end
end
gauss function:
function out = gaussfun(x,d,bandWidth) % approximate Gaussian kernel
% x - value
% d - location
% bandWidth - band width of the kernel
% out - contribtion to the kernel mean %
% Copyright 2015 Han Gong, University of East Anglia
ns = 1000; % resolution of guassian approximation xs = linspace(0,bandWidth,ns+1); % approximate ticks kfun = exp(-(xs.^2)/(2*bandWidth^2));
w = kfun(round(d/bandWidth*ns)+1);
w = w/sum(w); % normalise
out = sum( bsxfun(@times, x, w ), 2 );
end
HGmeanshiftcluster:
function [clustCent,data2cluster,cluster2dataCell] = HGMeanShiftCluster(dataPts,bandWidth,kernel,plotFlag); %HGMEANSHIFTCLUSTER performs MeanShift Clustering of data using a chosen kernel
%
% ---INPUT---
% dataPts
% bandWidth
% kernel- input data, (numDim x numPts)
- is bandwidth parameter (scalar)
- kernel type (flat or gaussian)
% plotFlag
% ---OUTPUT---
% clustCent
numClust)
% data2cluster
(numPts)
% cluster2dataCell - for every cluster which points are in it (numClust)
%
% Copyright 2015 Han Gong, University of East Anglia
% Copyright 2006 Bart Finkston %
% MeanShift first appears in
% K. Funkunaga and L.D. Hosteler, "The Estimation of the Gradient of a
% Density Function, with Applications in Pattern Recognition"
if nargin < 2
error('no bandwidth specified')
end
if nargin < 4
plotFlag = true;
plotFlag = false;
end
%**** Initialize stuff ***
[numDim,numPts] = size(dataPts); numClust = 0;
bandSq = bandWidth^2; initPtInds = 1:numPts;
maxPos = max(dataPts,[],2); % biggest size in each dimension minPos = min(dataPts,[],2); % smallest size in each dimension- display output if 2 or 3 D (logical)
- is locations of cluster centers (numDim x
- for every data point which cluster it belongs to
boundBox = maxPos-minPos; % bounding box size
sizeSpace = norm(boundBox); % indicator of size of data space stopThresh = 1e-3*bandWidth; % when mean has converged
clustCent = []; % center of clust
beenVisited= false(1,numPts); % track if a points been seen already numInitPts = numPts; % number of points to posibaly use as initilization points
clusterVotes = zeros(1,numPts,'uint16'); % used to resolve conflicts on cluster membership
clustMembsCell = [];
%*** mean function with the chosen kernel ****
switch kernel
case 'flat' % flat kernel
kmean = @(x,dis) mean(x,2);
case 'gaussian' % approximated gaussian kernel kmean = @(x,d) gaussfun(x,d,bandWidth);
otherwise
error('unknown kernel type'); end
while numInitPts
tempInd = ceil( (numInitPts-1e-6)*rand); % pick a random seed
point
stInd = initPtInds(tempInd); % use this point as start of mean myMean = dataPts(:,stInd); % intilize mean to this points
location
myMembers = []; % points that will get added to this cluster thisClusterVotes = zeros(1,numPts,'uint16'); % used to resolve
conflicts on cluster membership
while true %loop untill convergence
sqDistToAll = sum(bsxfun(@minus,myMean,dataPts).^2); % distsquared from mean to all points still active
inInds = find(sqDistToAll < bandSq); % points within bandWidth
thisClusterVotes(inInds) = thisClusterVotes(inInds)+1; % add a vote for all the in points belonging to this cluster
myOldMean = myMean; % save the old mean
myMean = kmean(dataPts(:,inInds),sqrt(sqDistToAll(inInds))); % compute the new mean
myMembers = [myMembers inInds]; % add any point within bandWidth to the cluster
beenVisited(myMembers) = true; % mark that these points have been visited
%*** plot stuff ****
if plotFlag figure(12345),clf,hold on if numDim == 2
plot(dataPts(1,:),dataPts(2,:),'.') plot(dataPts(1,myMembers),dataPts(2,myMembers),'ys') plot(myMean(1),myMean(2),'go') plot(myOldMean(1),myOldMean(2),'rd')
pause(0.1);
end end
%**** if mean doesn't move much stop this cluster ***
if norm(myMean-myOldMean) < stopThresh %check for merge posibilities mergeWith = 0;
for cN = 1:numClust
distToOther = norm(myMean-clustCent(:,cN)); % distance to old clust maxif distToOther < bandWidth/2 % if its within bandwidth/2 merge new and old
mergeWith = cN;
break; end
end
if mergeWith > 0 % something to merge
nc = numel(myMembers); % current cluster's member
no = numel(clustMembsCell{mergeWith}); % old cluster's
nw = [nc;no]/(nc+no); % weights for merging mean
number
member number
clustMembsCell{mergeWith} = unique([clustMembsCell{mergeWith},myMembers]); %recordwhichpoints inside
clustCent(:,mergeWith) = myMean*nw(1) + myOldMean*nw(2);
clusterVotes(mergeWith,:) = clusterVotes(mergeWith,:) + thisClusterVotes; %add these votes to the merged cluster
else % it's a new cluster
numClust = numClust+1; %increment clusters clustCent(:,numClust) = myMean; %record the mean clustMembsCell{numClust} = myMembers; %store my members clusterVotes(numClust,:) = thisClusterVotes; % creates
a new vote
end
break; end
endinitPtInds = find(~beenVisited); % we can initialize with any of the points not yet visited
numInitPts = length(initPtInds); %number of active points in set end
[~,data2cluster] = max(clusterVotes,[],1); % a point belongs to the cluster with the most votes
%*** If they want the cluster2data cell find it for them
if nargout > 2
cluster2dataCell = cell(numClust,1); for cN = 1:numClust
myMembers = find(data2cluster == cN);
cluster2dataCell{cN} = myMembers;
end end
end
KNN:
I = double(imread('42049_colorBird.jpg')); %lab_I = rgb2lab(I);
ab = I(:,:,1:3);
ab = im2single(ab);
%K=2,3,4,5
K=2;
% repeat the clustering 3 times to avoid local minima pixel_labels = imsegkmeans(ab,K,'NumAttempts',3); imshow(pixel_labels,[])
title('Image Labeled by Cluster 2');GMM:
clear; clc;
[X1,X2]=generateData();
K=3;
[a_init,mu_init,sigma_init]=initPara(X1); [a_GMM,mu_GMM,sigma_GMM]=GMM(X1,a_init,mu_init,sigma_init); [centroids,~] = Kmeans( X1,mu_init,K);
disp('kmeans1')
disp(centroids);
disp('mean-gaussian1');
disp('nmean');
disp(mu_GMM);
disp('matri')
disp(sigma_GMM);
disp(['mixn',num2str(a_GMM)]); [a_Kea_GMM,mu_Kea_GMM,sigma_Kea_GMM]=GMM(X1,a_init,centroids,sigma_in it);
disp('mean-gau2');
disp('meani');
disp(mu_Kea_GMM);
disp('nmatr')
disp(sigma_Kea_GMM);
disp(['mixn',num2str(a_Kea_GMM)]);
[a_init,mu_init,sigma_init]=initPara(X2); [a_fin,mu_fin,sigma_GMM]=GMM(X2,a_init,mu_init,sigma_init); [centroids,~] = Kmeans( X2,mu_init,K);disp('meanx21')
disp(centroids);
disp('GMMg2');
disp('mean2');
disp(mu_GMM);
disp('mat')
disp(sigma_GMM);
disp(['matn2',num2str(a_GMM)]); [a_Kea_GMM,mu_Kea_GMM,sigma_Kea_GMM]=GMM(X2,a_init,centroids,sigma_in it);
disp('kmeansco2');
disp('mean2a');
disp(mu_Kea_GMM);
disp('matco2') disp(sigma_Kea_GMM); disp(['matn2',num2str(a_Kea_GMM)]);
function [X1,X2]=generateData()
mu=[1,1;4,4;8,1]; sigma=[2,0;0,2];
a1=[1/3,1/3,1/3]; a2=[0.6,0.3,0.1];
N=1000; rand1=randsrc(N,1,[[1,2,3];a1]); rand2=randsrc(N,1,[[1,2,3];a2]); X1=[];
X2=[];
mean1=[];
for i=1:size(a1,2)X1_temp=mvnrnd(mu(i,:),sigma,length(find(rand1==i))); X1=[X1;X1_temp];
subplot(1,2,1);
plot(X1_temp(:,1),X1_temp(:,2),'+');
title('X1');
legend('m1','m2','m3');
xlabel('x');ylabel('y');
hold on;
mean1=[mean1;mean(X1_temp)]; cov1(:,:,i)=cov(X1_temp); X2_temp=mvnrnd(mu(i,:),sigma,length(find(rand2==i))); X2=[X2;X2_temp];
subplot(1,2,2); plot(X2_temp(:,1),X2_temp(:,2),'*'); title('X2'); legend('m1','m2','m3'); xlabel('x');ylabel('y');
hold on;
end
disp(['mean-ar1']); disp(mean1); disp(['mean-ar2']); disp(cov1);
end
function [ a,mu,sigma ] = initPara(X)
[m,n]=size(X);
r=randperm(m);
mu=X(r(1:3),:);
a=[1/3,1/3,1/3];
sigma=[1,0;0,1];
endfunction [a_fin,mu_fin,sigma_fin]=GMM(X1,a_init,mu_init,sigma_init)
K=size(a_init,2); [M,N]=size(X1);
a=a_init; mu=mu_init; sigma=sigma_init; px=zeros(M,K); thre=1e-5; LLD_pro=inf; while true
for i=1:K px(:,i)=mvnpdf(X1,mu(i,:),sigma);
end
pGramm=repmat(a,M,1).*px; pGramm=pGramm./repmat(sum(pGramm,2),1,K); Nk=sum(pGramm,1); %1*K mu=diag(1./Nk)*pGramm'*X1;
for kk=1:K
Xshift=X1-repmat(mu(kk,:),M,1);
sigma(:,:,kk)=(Xshift'*diag(pGramm(:,kk))*Xshift)/Nk(:,kk);
end
a=Nk/M; LLD=sum(log(a*px')); if (LLD-LLD_pro)<thre
break; end
LLD_pro=LLD;end
a_fin=a;
mu_fin=mu;
sigma_fin=sigma;
end本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报