# 特征缩放 目的:为了平和个属性之间的一个权重 方法:标准化、最小值-最大值归一化
# age和frare相差太大需要缩放
from sklearn.preprocessing import StandardScaler
ss_model=StandardScaler()
df1_after_impute_ss=ss_model.fit_transform(df1_after_impute[['Age','Fare']])
df1_after_impute_ss=pd.DataFrame(df1_after_impute_ss,columns=['Age','Fare'])
df1_after_impute_ss
结果
Age Fare
0 -0.493883 -0.502445
1 0.775956 0.786845
2 -0.176423 -0.488854
3 0.537861 0.420730
4 0.537861 -0.486337
... ... ...
886 -0.097058 -0.386671
887 -0.731978 -0.044381
888 -0.335153 -0.176263
889 -0.176423 -0.044381
890 0.299766 -0.492378
891 rows × 2 columns
更改age和fare数据
df1_after_impute['Age']=df1_after_impute_ss['Age']
df1_after_impute['Fare']=df1_after_impute_ss['Fare']
执行
df1_after_impute.describe(include='all')结果变成了居然变成nan了