2022-03-13 17:18

# 学校要求手动用python实现lasso回归，我把随即出来的数据集换成学校给的数据之后那根红色的拟合线就出不来了，那根黄色的线性回归拟合现正常，想知道是哪里出现了问题

###### 我想要达到的结果

import numpy as np
from matplotlib import pyplot as plt
import sklearn.datasets
import pandas as pd

#生成100个一元回归数据集
#x, y = sklearn.datasets.make_regression(n_features=1, noise=5, random_state=2020)下面我换成了自己的数据
x = df['T2C_attarchment_F_sum'].values.reshape(-1,1)
y = df['T2C_attarchment_M_sum'].values
plt.scatter(x, y)
plt.show()

#加5个异常数据
a = np.linspace(1,2,5).reshape(-1,1)
b = np.array([350,380,410,430,480])

#生成新的数据集
x_1 = np.r_[x, a]
y_1 = np.r_[y, b]

plt.scatter(x_1, y_1)
plt.show()

class normal():
def init(self):
pass

``````def fit(self, x, y):
m = x.shape[0]
X = np.concatenate((np.ones((m, 1)), x), axis=1)
xMat = np.mat(X)
yMat = np.mat(y.reshape(-1, 1))

xTx = xMat.T * xMat
# xTx.I为xTx的逆矩阵
ws = xTx.I * xMat.T * yMat

# 返回参数
return ws
``````

import copy

def CoordinateDescent(x, y, epochs, learning_rate, Lambda):
m = x.shape[0]
X = np.concatenate((np.ones((m, 1)), x), axis=1)
xMat = np.mat(X)
yMat = np.mat(y.reshape(-1, 1))

``````w = np.ones(X.shape[1]).reshape(-1, 1)

for n in range(epochs):

out_w = copy.copy(w)
for i, item in enumerate(w):
# 在每一个W值上找到使损失函数收敛的点
for j in range(epochs):
h = xMat * w
gradient = xMat[:, i].T * (h - yMat) / m + Lambda * np.sign(w[i])
w[i] = w[i] - gradient * learning_rate
break
out_w = np.array(list(map(lambda x: abs(x) < 1e-3, out_w - w)))
if out_w.all():
break
return w
``````

w = CoordinateDescent(x_1, y_1,epochs=250

,learning_rate=0.001,Lambda=0)
print(w)

plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
clf1 =normal()
#拟合原始数据
w1 = clf1.fit(x,y)
print(w1)
#预测数据
y_pred = x * w1[1] + w1[0]

#计算新的拟合值
y_1_pred = x_1 * w[1] + w[0]

ax1= plt.subplot()
ax1.scatter(x_1,y_1,label='样本分布')
ax1.plot(x,y_pred,c='y',label='原始样本拟合')
ax1.plot(x_1,y_1_pred,c='r',label='新样本拟合')
ax1.legend(prop = {'size':15}) #此参数改变标签字号的大小
plt.show()