问下如何使用 https://github.com/EleutherAI/lm-evaluation-harness 的框架对自己实现的模型进行评估。比如说自己现在就实现了一个LSTM算法。代码如下,那么如何能调用这个框架对这个模型进行评估。始终没有查到相关的数据集和API接口该怎么调用
class SimpleLstmNetWork(nn.Module):
def __init__(self, taskName, isBatchFirst, trainDataDim, labelDataDim, hiddenDim, layerNum, batchSize):
# 初始化父类
super(SimpleLstmNetWork, self).__init__()
self.taskName = taskName
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 定义 LSTM 模型的参数
self.trainDataDim = trainDataDim
self.hiddenDim = hiddenDim
self.layerNum = layerNum
self.batchSize = batchSize
# 定义 LSTM 层
self.trainModule = nn.LSTM(trainDataDim, hiddenDim, layerNum, batch_first = isBatchFirst).to(self.device)
# 定义输出层
self.fullConnectLayer = nn.Linear(hiddenDim, labelDataDim).to(self.device)
def forward(self, inputData):
# LSTM 前向传播
out, _ = self.trainModule(inputData)
# 模型输出
out = self.fullConnectLayer(out[:, -1, :])
return out
def SetCriterion(self, func):
self.criterion = func
def SetOptimizer(self, func):
self.optimizer = func
def SetLstmTrainData(self, inputData, labelData):
# 将数据转换为 PyTorch Dataset 对象并放到 GPU 上
data = TensorDataset(inputData.to(self.device), labelData.to(self.device))
# 加载数据并自动进行批处理
self.dataloader = DataLoader(data, batch_size = self.batchSize, shuffle = False)
def TrainLstmModule(self, epochNum, statPeriod):
# 模型训练
for epoch in range(epochNum):
for trainData, labelData in self.dataloader:
self.optimizer.zero_grad()
output = self.forward(trainData)
loss = self.criterion(output, labelData)
#torch.autograd.set_detect_anomaly(True)
loss.backward()
#torch.autograd.set_detect_anomaly(True)
self.optimizer.step()
if (epoch + 1) % statPeriod == 0:
print(f"taskName = {self.taskName}, Epoch[{epoch + 1}/{epochNum}], loss:{loss.item()}")
def GetLstmModuleTrainRst(self, verifyData):
#模型预测
with torch.no_grad():
verifyData = verifyData.to(self.device)
output = self.forward(verifyData)
return output.cpu()
def HandleSimpleLstmNetWorkProcess(taskName, isBatchFirst, trainData, labelData, trainDataDim, labelDataDim, hiddenDim, layerNum, batchSize, epochNum, learnRate, weightDecay, statPeriod, modulePath):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#print(f"trainData = {trainData.shape}, labelData = {labelData.shape}, trainDataDim = {trainDataDim}, labelDataDim = {labelDataDim}, hiddenDim = {hiddenDim}, batchSize = {batchSize}")
model = SimpleLstmNetWork(taskName, isBatchFirst, trainDataDim, labelDataDim, hiddenDim, layerNum, batchSize).to(device)
if os.path.exists(modulePath):
checkpoint = torch.load(modulePath)
model.load_state_dict(checkpoint['model_state_dict'])
return model
else:
print(f"HandleSimpleLstmNetWorkProcess path not exist, path = {modulePath}")
model.SetCriterion(nn.MSELoss())
#model.SetCriterion(nn.CrossEntropyLoss())
model.SetOptimizer(torch.optim.Adam(model.parameters(), lr = learnRate, weight_decay = weightDecay))
model.to(device)
model.SetLstmTrainData(trainData, labelData)
model.TrainLstmModule(epochNum, statPeriod)
torch.save({'model_state_dict': model.state_dict()}, modulePath)
return model