机器学习,用的Polynomial Regression,在jupyter运行,思路是先从excel读数据,再用LabelEncoder重新排序并索引,最后进行regression
问题:不管怎么改degree的数值,只能输出degree=2的图像,但是每次MSE都不一样
import numpy as np
import sklearn.linear_model as lm
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
df=pd.read_excel('data.xlsx',sheet_name='T5',header=9,nrows=1)
df.isnull()
data=df.iloc[0] #read 2022 to 1950 into data
Y=data[1:]
le=LabelEncoder()
X=le.fit_transform(df.columns[1:])
datasets_X=np.arange(1950,2014,step=1)
dataset_length=len(datasets_X)
datasets_Y=data[2023-datasets_X]
datasets_Y=np.array(datasets_Y)
datasets_X=np.array(datasets_X).reshape([dataset_length,1])
test_X=np.arange(2014,2023,step=1)
test_length=len(test_X)
test_X=np.array(test_X).reshape([test_length,1])
poly_reg=PolynomialFeatures(degree=2) ##reset degree
X_poly=poly_reg.fit_transform(datasets_X)
lin_reg=lm.LinearRegression()
lin_reg.fit(X_poly,datasets_Y)
data1=poly_reg.fit_transform(test_X)
pred=lin_reg.predict(data1)
TEST=[5469724,5535002,5607283,5612253,5638676,5703569,5685807,5453566,5637022]
mse=mean_squared_error(TEST, pred)
plt.scatter(datasets_X,datasets_Y,s=10)
my_x_ticks=np.arange(1950,2023,10)
plt.xticks(my_x_ticks)
X=np.arange(1950,2023).reshape([-1,1])
plt.plot(X,lin_reg.predict(poly_reg.fit_transform(X)),color='black')
plt.xlabel("Year")
plt.ylabel("Total Population(*$10^6)$")
plt.title("Multiple Regression")
plt.scatter(test_X,TEST,s=10)
plt.show()
print(mse)