今天看别人的代码,写的是处理mysql的数据,想改成处理mongo数据怎么改啊
我稍微尝试了一下,其实就改了个数据库连接那块
import pandas as pd
from sklearn.cluster import KMeans
from pandas import DataFrame
from scipy.cluster.hierarchy import linkage,dendrogram
import matplotlib.pyplot as plt
import pymongo
if __name__ == '__main__':
conn = pymongo.MongoClient()
# cursor = conn.cursor()
# sql = "select * from DB"
# cursor.execute(sql)
results = conn.db.xinxi.find()
resultList = []
for result in results:
resultList.append(result)
# print(resultList)
df = DataFrame(resultList)
# print(df.head())
# 绘制散点图 评分,参与评论人数
plt.figure(figsize=(10, 8))
plt.scatter(df[8].astype(float), df[7].astype(float))
plt.show()
scoreDf = pd.DataFrame(df, columns=[7, 8])
scoreDf.head()
k = 6 # 聚类的类别
iteration = 500 # 聚类最大循环次数
model = KMeans(n_clusters=k,
n_jobs=1,
max_iter=iteration) # 分为k类,并发数1,数值大系统卡死
model.fit(scoreDf) # 开始聚类
# 详细输出原始数据及其类别
res = pd.concat([df,
pd.Series(model.labels_, index=df.index)],
axis=1) # 详细输出每个样本对应的类别
res.columns = list(df.columns) + [u'class'] # 重命名表头
# 根据聚类画出分类统计图
for col in res.columns:
if col in [u'class']:
fig = plt.figure()
res[col].hist(bins=20)
fig.show()
res.to_excel('knn_result.xls') # 保存结果
这个是运行成果,请问该怎么写呢,我刚学
D:\Anaconda\Anaconda\python.exe D:/PyCharm/DouBanScrapy-master/code/FisrtKNN.py
Traceback (most recent call last):
File "D:\Anaconda\Anaconda\lib\site-packages\pandas\core\indexes\base.py", line 3361, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 76, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 8
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "D:\PyCharm\DouBanScrapy-master\code\FisrtKNN.py", line 30, in <module>
plt.scatter(df[8].astype(float), df[7].astype(float))
File "D:\Anaconda\Anaconda\lib\site-packages\pandas\core\frame.py", line 3458, in __getitem__
indexer = self.columns.get_loc(key)
File "D:\Anaconda\Anaconda\lib\site-packages\pandas\core\indexes\base.py", line 3363, in get_loc
raise KeyError(key) from err
KeyError: 8