如题,我在用CVX和GD两种方法来求解一个逻辑回归问题,问题如下
在常规逻辑回归的基础上添加了L2正则项来约束w,b

我的代码如下
import numpy as np
import cvxpy as cp
N = 50
mi = 5
# 数据点一共N * mi个,用的是集中训练
C = 1
alpha = 0.001
eps = 1e-8
def logistic_loss(w, b, x, y):
z = np.dot(x, w) + b
return np.log(1 + np.exp(-y * z))
def gradient_logistic_loss(w, b, x, y):
z = np.dot(x, w) + b
common_term = -y / (1 + np.exp(y * z))
grad_w = common_term * x
grad_b = common_term
return grad_w, grad_b
def optimal_solution_overall_GD(pc, labels):
d = pc[0][0].shape[0]
w_opt = np.zeros(d)
b_opt = 0
g_w = np.zeros(d)
g_b = 0
iteration = 0
while True:
sum_w = np.zeros(d)
sum_b = 0
iteration += 1
for i in range(N):
for j in range(mi):
grad_w, grad_b = gradient_logistic_loss(w_opt, b_opt, pc[i][j], labels[i][j])
sum_w += grad_w#累加梯度
sum_b += grad_b
g_w = sum_w / (N * mi) + C * w_opt
g_b = sum_b / (N * mi) + C * b_opt
w_opt -= alpha * g_w
b_opt -= alpha * g_b
if(iteration % 2000 == 0):
print("iteration_GD", format(iteration))
print("g_w", format(g_w))
print("g_b", format(g_b))
print(w_opt)
print(b_opt)
if np.all(np.abs(g_w) < eps) and np.all(np.abs(g_b) < eps):
break
return w_opt, b_opt
def logistic_loss_cvx(w, b, x, y):
z = x @ w + b
return cp.logistic(-y * z)
def optimal_solution_cvx(pc, labels, C):
d = pc[0][0].shape[0]
w = cp.Variable(d)
b = cp.Variable()
total_loss = 0
for i in range(N):
for j in range(mi):
total_loss += logistic_loss_cvx(w, b, pc[i][j], labels[i][j])
# 添加L2正则项
total_loss += C/2 * (cp.norm(w, 2) + cp.norm(b, 2))
# 定义优化问题
problem = cp.Problem(cp.Minimize(total_loss))
# 求解优化问题
problem.solve()
return w.value, b.value
def loss_caculate(w, b, pc, labels):
loss = 0
for i in range(N):
for j in range(mi):
loss += np.log(1 + np.exp(-labels[i][j] * (np.dot(w, pc[i][j]) + b)))
loss = loss + C/2 * (np.linalg.norm(w, 2) + b**2)
return loss
def main():
# 数据集pc和标签labels都是随机的
pc = np.random.rand(N, mi, 2)*100
labels = np.random.choice([1, -1], size=(N, mi))
w_GD, b_GD = optimal_solution_overall_GD(pc, labels)
w_cvx, b_cvx = optimal_solution_cvx(pc, labels, C)
loss1 = loss_caculate(w_GD, b_GD, pc, labels)
loss2 = loss_caculate(w_cvx, b_cvx, pc, labels)
print("w_GD", w_GD)
print("w_cvx", w_cvx)
print("b_GD", b_GD)
print("b_cvx", b_cvx)
print("loss1", loss1)
print("loss2", loss2)
if __name__ == "__main__":
main()
部分的输出如下:(因为数据随机,所以结果不固定)|
w_GD [ 0.00223391 -0.00296834]
w_cvx [ 0.00309932 -0.00200936]
b_GD -0.005593343606514088
b_cvx -0.11333700307375219
loss1 172.80617003109705
loss2 172.7096589915864
可以看出wb的值,两种方法得出的结果并不同
目前不知道是哪一个方法出了问题
问题目标是让两个结果达成近似的一致