以下是我的代码,想问一下为什么会出现mat1 and mat2 shapes cannot be multiplied (256x256 and 10x256)这个错误呢?该如何修改呢?
import torch
import numpy as np
import torchvision
import torchvision.transforms as transforms
import sys
sys.path.append("C:/Users/zyx20/Desktop/深度学习编程/pythonProject")
import d2lzh_pytorch as d2l
batch_size = 256
if sys.platform.startswith('win'):
num_workers = 0 # 0表示不用额外的进程来加速读取数据
else:
num_workers = 4
mnist_train = torchvision.datasets.FashionMNIST(root='C:/Users/zyx20/Desktop/深度学习编程/MNIST/raw', train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='C:/Users/zyx20/Desktop/深度学习编程/MNIST/raw', train=False, download=True, transform=transforms.ToTensor())
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
#定义模型参数
num_inputs,num_outputs,num_hiddens=784,10,256
W1=torch.tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens)))
b1=torch.zeros(num_hiddens)
W2=torch.tensor(np.random.normal(0,0.01,(num_outputs,num_hiddens)))
b2=torch.zeros(num_outputs)
params=[W1,b1,W2,b2]
for param in params:
param.requires_grad_(requires_grad=True)
#定义激活函数
def relu(X):
return torch.max(input=X,other=torch.tensor(0.0))
#定义模型
def net(X):
X=X.view((-1,num_inputs))
H=relu(torch.matmul(X,W1)+b1)
return torch.matmul(H,W2)+b2
#定义损失模型
loss=torch.nn.CrossEntropyLoss()
#训练模型
num_epochs,lr=5,100.0
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,optimizer=None):
for epoch in range(num_epochs):
train_l_sum,train_acc_sum,n=0.0,0.0,0
for X,y in train_iter:
y_hat=net(X)
l=loss(y_hat,y).sum()
#梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
if optimizer is None:
d2l.sgd(params,lr,batch_size)
else:
optimizer.step()
train_l_sum+=l.item()
train_acc_sum+=(y_hat.argmax(dim=1)==y).sum().item()
n+=y.shape[0]
test_acc=evaluate_accuracy(test_iter,net)
print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f'%(epoch+1,train_l_sum/n,train_acc_sum/n,test_acc))
train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)