以下是我的代码,想问一下运行后为何会出现TypeError: zeros(): argument 'size' must be tuple of ints, but found element of type Linear at pos 2的错误呢?该如何修改呢?
import time
import math
import zipfile
import numpy as np
import torch
from torch import nn,optim
import torch.nn.functional as F
import sys
sys.path.append("C:/Users/zyx20/Desktop/深度学习编程/pythonProject")
import d2lzh_pytorch as d2l
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with zipfile.ZipFile('C:/Users/zyx20/Desktop/深度学习编程/data20201205-master/Data20201205/jaychou_lyrics.txt.zip') as zin:
with zin.open('jaychou_lyrics.txt') as f:
corpus_chars=f.read().decode('utf-8')
corpus_chars=corpus_chars.replace('\n','').replace('\r','')
idx_to_char=list(set(corpus_chars))
char_to_idx=dict([(char,i) for i,char in enumerate(idx_to_char)])
vocab_size=len(char_to_idx)
corpus_indices=[char_to_idx[char] for char in corpus_chars]
num_hiddens=256
rnn_layer=nn.RNN(input_size=vocab_size,hidden_size=num_hiddens)
class RNNModel(nn.Module):
def __init__(self,rnn_layer,vocab_size):
super(RNNModel,self).__init__()
self.rnn=rnn_layer
self.hidden_size=rnn_layer.hidden_size*(2 if rnn_layer.bidirectional else 1)
self.vocab_size=nn.Linear(self.hidden_size,vocab_size)
self.state=None
def forward(self,inputs,state):
x=d2l.to_onehot(inputs,self.vocab_size)
y,self.state=self.rnn(torch.stack(x),state)
output=self.dense(y.view(-1,y.shape[-1]))
return output,self.state
#定义预测函数
def predict_rnn_pytorch(prefix,num_chars,model,vocab_size,device,idx_to_char,char_to_idx):
state=None
output=[char_to_idx[prefix[0]]]
for t in range(num_chars+len(prefix)-1):
x=torch.tensor([output[-1]],device=device).view(1,1)
if state is not None:
if isinstance(state,tuple):
state=(state[0].to(device),state[1].to(device))
else:
state=state.to(device)
(y,state)=model(x,state)
if t<len(prefix)-1:
output.append(char_to_idx[prefix[t+1]])
else:
output.append(int(y.argmax(dim=1).item()))
return ''.join([idx_to_char[i] for i in output])
model=RNNModel(rnn_layer,vocab_size).to(device)
#定义模型训练函数
def train_and_predict_rnn(model,num_hiddens,vocab_size,device,corpus_indices,
idx_to_char,char_to_idx,num_epochs,num_steps,lr,clipping_theta,
batch_size,pred_period,pred_len,prefixes):
optimizer=torch.optim.Adam(model.parameters(),lr=lr)
model.to(device)
state=None
loss=nn.CrossEntropyLoss()
for epoch in range(num_epochs):
l_sum,n,start=0.0,0,time.time()
data_iter=d2l.data_iter_consecutive(corpus_indices,batch_size,num_steps,device)
for x,y in data_iter:
if state is not None:
if isinstance(state,tuple):
state=(state[0].to(device),state[1].to(device))
else:
state=state.detach()
(output,state)=model(x,state)
y=torch.transpose(y,0.1).contiguous.view(-1)
l=loss(output,y.long())
optimizer.zero_grad()
l.backward()
d2l.grad_clipping(model.parameters(), clipping_theta, device)
optimizer.step()
l_sum += l.item() * y.shape[0]
n += y.shape[0]
if (epoch+1)%pred_period==0:
print('epoch %d,perplexity %f,time %.2f sec'%(epoch+1,math.exp(l_sum/n),time.time()-start))
for prefix in prefixes:
print(' -',predict_rnn_pytorch(prefix,pred_len,model,vocab_size,device,idx_to_char,char_to_idx))
num_epochs,num_steps,batch_size,lr,clipping_theta=250,35,32,1e-3,1e-2
pred_period,pred_len,prefixes=50,50,['分开','不分开']
train_and_predict_rnn(model,num_hiddens,vocab_size,device,corpus_indices,
idx_to_char,char_to_idx,num_epochs,num_steps,lr,clipping_theta,
batch_size,pred_period,pred_len,prefixes)