#include
#include
#include
using namespace cv;
using namespace std;
vector split(string str, string p)
{
string::size_type pos=1;
vector data;
while(pos!=string::npos)
{
pos=str.find(p,0);
string s = str.substr(0,pos);
str=str.substr(pos+1,str.size());
data.push_back(s);
}
return data;
}
template
Type stringToNum(const string& str)
{
istringstream iss(str);
Type num;
iss >> num;
return num;
}
//选取随机点
vector> selectPoint(vector> str, int k)
{
RNG rng(10);
int *a=NULL;
vector> center;
for(int i=0; i<k; i++)
{
a[i] = rng.uniform(0,str.size());
center.push_back(str[a[i]]);
}
return center;
}
//计算距离
float distance(vector str1, vector str2)
{
float dist=0;
for(vector::size_type i=0; i
{
dist+=sqrt((str1[i]-str2[i])*(str1[i]-str2[i]));
}
return dist;
}
//根据质心进行分类
vector> classifier(vector> datas, int k, vector> center)
{
vector> classData = datas;
vector> dist;
//根据初始中心点个数来生成存储距离的vector,有k个vector存在一个总的vector中
for(int l=0; l
{
vector dist1;
dist.push_back(dist1);
}
//先计算每个样本到所有点的距离存在dist中
for(int i=0; i
{
for(vector::size_type j=0; j
{
dist[i].push_back(distance(datas[j],center[i]));
}
}
//根据距离来区分所属类别
for(vector::size_type i=0; i
{
float *d=NULL;
float max=0;
int number;
for(int j=0; j
{
d[j]=dist[j][i];
}
max=d[0];
number=0;
for(int num=0; num
{
if(d[num]>max)
number=num;
}
classData[i].push_back(number);
}
return classData;
}
//计算新的质心
vector> newCenter(vector> datas, int num)
{
vector> v;
int t = datas[0].size(); //每个样本的维数
int *number=NULL; //用于存储每个样本的数目
//生成用于存新的质点的vector
for(int k=0; k
{
vector v1;
for(int j=0; j
{
v1[j]=0;
}
v.push_back(v1);
}
//先将同一样本的对应维的数据相加
for(vector::size_type i=0; i
{
for(int j=0; j
{
if(datas[i][t-1]==j)
{
number[j]++;
for(int s=0; s
{
v[j][s]+=datas[i][s];
}
}
}
}
//把同一样本的对应维的数据除以样本数目
for(vector::size_type var=0; var
{
for(int ss=0; ss
{
v[var][ss]=v[var][ss]/number[var];
}
}
return v;
}
//kmeans
vector> kmeans(vector> datas, int k, vector> center)
{
vector> classData;
vector> centerOld;
float dist;
do
{
classData = classifier(datas,k,center);
centerOld=center;
center = newCenter(classData,k);
dist = distance(center,centerOld);
}while(dist>0.1);
return classData;
}
int main()
{
//样本数据
/*5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa*/
//因此在下面读取数据的时候,存储的只有数字,字母不保存
ifstream infile;
infile.open("D:/研究生/iris.txt",ios_base::in);
string data;
vector<vector<float>> datas;
vector<string> p;
getline(infile,data);
//p=split(data,",");
//for(size_t i=0; i<p.size();i++)
// cout<<p[i]<<endl;、
//数据读取
while(infile.good())
{
vector<float> d;
getline(infile,data);
p=split(data,",");
for(size_t i=0; i<p.size()-1; i++)
{
float ff=stringToNum<float>(p[i]);
d.push_back(ff);
}
datas.push_back(d);
}
//Kmeans
int k=0;
cout<<"print the center number:";
cin>>k;
vector<vector<float>> center;
center=selectPoint(datas,k);
datas=kmeans(datas,k,center);
for(size_t i=0; i<datas.size(); i++)
{
for(size_t j=0; j<datas[i].size(); j++)
{
cout<<datas[i][j]<<",";
}
cout<<endl;
}
}