# 反向传播函数
# - x:包含输入数据的numpy数组,形状为(N,d_1,...,d_k)
# - w:形状(D,M)的一系列权重
# - b:偏置,形状为(M,)
def affine_backward(dout, cache):
x, w, b = cache # 读取缓存
dx, dw, db = None, None, None # 返回值初始化
dx = np.dot(dout, w.T) # (N,D)
dx = np.reshape(dx, x.shape) # (N,d1,...,d_k)
x_row = x.reshape(x.shape[0], -1) # (N,D)
dw = np.dot(x_row.T, dout) # (D,M)
db = np.sum(dout, axis=0, keepdims=True) # (1,M)
return dx, dw, db
反向传播函数为什么要对dx求导,不应该是对w和b求导吗?