def affine_backward(dout, cache):
x, w, b = cache # 读取缓存
dx, dw, db = None, None, None # 返回值初始化
dx = np.dot(dout, w.T) # (N,D),dout(N,M),w.T(M,D),矩阵相乘
dx = np.reshape(dx, x.shape) # (N,d1,...,d_k)
x_row = x.reshape(x.shape[0], -1) # (N,D)
dw = np.dot(x_row.T, dout) # (D,M)
db = np.sum(dout, axis=0, keepdims=True) # (1,M)
return dx, dw, db
反向传播函数,激活函数是relu
有两行代码依然不能理解,第一行是因为求导,第二行的dx又进行了变形,变形为什么是变成这样,x.shape的值又是多少呢?似懂非懂,被绕进去了。