潮生明月
2015-12-01 14:08
采纳率: 0%
浏览 1.7k

Kmeans算法,C++实现,但是有点问题,想请大家帮忙解决一下,谢谢

#include
#include
#include
using namespace cv;
using namespace std;

vector split(string str, string p)
{
string::size_type pos=1;
vector data;
while(pos!=string::npos)
{
pos=str.find(p,0);
string s = str.substr(0,pos);
str=str.substr(pos+1,str.size());
data.push_back(s);
}
return data;

}

template
Type stringToNum(const string& str)
{
istringstream iss(str);
Type num;
iss >> num;
return num;

}

//选取随机点
vector> selectPoint(vector> str, int k)
{
RNG rng(10);
int *a=NULL;
vector> center;

    for(int i=0; i<k; i++)
    {
        a[i] = rng.uniform(0,str.size());
        center.push_back(str[a[i]]);
    }
    return center;

}
//计算距离
float distance(vector str1, vector str2)
{
float dist=0;
for(vector::size_type i=0; i {
dist+=sqrt((str1[i]-str2[i])*(str1[i]-str2[i]));
}
return dist;
}
//根据质心进行分类
vector> classifier(vector> datas, int k, vector> center)
{
vector> classData = datas;
vector> dist;
//根据初始中心点个数来生成存储距离的vector,有k个vector存在一个总的vector中
for(int l=0; l {
vector dist1;
dist.push_back(dist1);
}
//先计算每个样本到所有点的距离存在dist中
for(int i=0; i {
for(vector::size_type j=0; j {
dist[i].push_back(distance(datas[j],center[i]));
}
}
//根据距离来区分所属类别
for(vector::size_type i=0; i {
float *d=NULL;
float max=0;
int number;
for(int j=0; j {
d[j]=dist[j][i];
}
max=d[0];
number=0;
for(int num=0; num {
if(d[num]>max)
number=num;
}

    classData[i].push_back(number);
}

return classData;

}
//计算新的质心
vector> newCenter(vector> datas, int num)
{
vector> v;
int t = datas[0].size(); //每个样本的维数
int *number=NULL; //用于存储每个样本的数目
//生成用于存新的质点的vector
for(int k=0; k {
vector v1;
for(int j=0; j {
v1[j]=0;
}
v.push_back(v1);
}
//先将同一样本的对应维的数据相加
for(vector::size_type i=0; i {
for(int j=0; j {
if(datas[i][t-1]==j)
{
number[j]++;
for(int s=0; s {
v[j][s]+=datas[i][s];
}
}
}
}
//把同一样本的对应维的数据除以样本数目
for(vector::size_type var=0; var {
for(int ss=0; ss {
v[var][ss]=v[var][ss]/number[var];
}
}
return v;
}
//kmeans
vector> kmeans(vector> datas, int k, vector> center)
{
vector> classData;
vector> centerOld;
float dist;
do
{
classData = classifier(datas,k,center);
centerOld=center;
center = newCenter(classData,k);
dist = distance(center,centerOld);
}while(dist>0.1);
return classData;
}

int main()
{

 //样本数据
 /*5.1,3.5,1.4,0.2,Iris-setosa
 4.9,3.0,1.4,0.2,Iris-setosa
 4.7,3.2,1.3,0.2,Iris-setosa
 4.6,3.1,1.5,0.2,Iris-setosa
 5.0,3.6,1.4,0.2,Iris-setosa*/
 //因此在下面读取数据的时候,存储的只有数字,字母不保存
 ifstream infile;
 infile.open("D:/研究生/iris.txt",ios_base::in);
 string data;
 vector<vector<float>> datas;
 vector<string> p;
 getline(infile,data);

 //p=split(data,",");
 //for(size_t i=0; i<p.size();i++)
    // cout<<p[i]<<endl;、

 //数据读取
 while(infile.good())
 {  
     vector<float> d;
     getline(infile,data);
     p=split(data,",");
     for(size_t i=0; i<p.size()-1; i++)
     {
         float ff=stringToNum<float>(p[i]);
         d.push_back(ff);
     }

     datas.push_back(d);
 }

 //Kmeans
 int k=0;
 cout<<"print the center number:";
 cin>>k;
 vector<vector<float>> center;
 center=selectPoint(datas,k);
 datas=kmeans(datas,k,center);
 for(size_t i=0; i<datas.size(); i++)
 {
     for(size_t j=0; j<datas[i].size(); j++)
     {
         cout<<datas[i][j]<<",";
     }
     cout<<endl;
 } 

}

相关推荐 更多相似问题