tianjiaozilong 2021-03-15 15:12 采纳率: 100%
浏览 358
已采纳

请各位大神帮帮忙,因为这是毕业论文所用的代码,我想问问到底是否是对的,看看是我代码错了,还是逻辑错了

首先我这里有一个yunfuzai_main.py:

from dqn_agent import Agent
from model import QNetwork
import matplotlib.pyplot as plt
import  numpy as np
import torch
import xlrd
from openpyxl import load_workbook
import gc


STATE_SIZE = 10
EPISODE_COUNT = 1000

# def dqn(n_episodes=EPISODE_COUNT,eps_start=2.0,eps_end=0.03,eps_decay=0.990):
#     scores = []
#     for i_episode in range(1, n_episodes + 1):
#         print("Episode" + str(i_episode))
#         state = getState(stockData, 0, STATE_SIZE + 1)
#         total_profit = 0
#         agent.inventory = []
#         eps = eps_start
#
#         for t in range(l):
#             action = agent.act(state, eps)
#             next_state = getState(stockData, t + 1, STATE_SIZE + 1)
#             reward = 0
#
#             if action == 1:  # 买入
#                 agent.inventory.append(stockData[t])
#                 # print("buy" + str(stockData[t]))
#             elif action == 2 and len(agent.inventory) > 0:  # 卖出
#                 bought_price = agent.inventory.pop(0)
#                 total_profit += stockData[t] - bought_price
#                 # reward = max(stockData[t] - bought_price, 0)
#                 reward = stockData[t] - bought_price
#                 # print("Sell: " + str(stockData[t]) + " | Profit: " + str(stockData[t] - bought_price))
#             done = 1 if t == l - 1 else 0
#             agent.step(state, action, reward, next_state, done)
#             eps = max(eps_end, eps * eps_decay)
#             state = next_state
#
#             # if done:
#             #     print("------------------------------")
#             #     print("total_profit = " + str(total_profit))
#             #     print("------------------------------")
#         scores.append(total_profit)
#     return scores

def dqn1(n_episodes1=EPISODE_COUNT, eps_start1=1.0, eps_end1=0.01, eps_decay1=0.9995):
    scores1 = []
    for i_episode in range(1,n_episodes1+1):
        print("Episode" + str(i_episode))

        state = getState(stockData, 0, STATE_SIZE + 1)
        agent.inventory = []
        eps = eps_start1
        #global loss
        print(1)
        for t in range(l):
            print(2)
            action = agent.act(state,eps)
            print(1)
            next_state = getState(stockData, t + 1, STATE_SIZE + 1)
            reward = 0
            done = 1 if t == l - 1 else 0
            global loss
            loss = agent.step(state, action, reward, next_state, done)

            print("loss", + str(loss))
            scores1.append(loss)
            gc.collect()
        gc.collect()

    return scores1

            # if action == 1:# 过载
            #     agent.inventory.append(stockData[t])
            # elif action == 2 and len(agent.inventory) > 0:


def getState(data, t, n):
    d = t - n + 1
    # block = data[d:t + 1] if d>= 0 else -d * [data[0]]+ data[0:t+1]
    block = data[d:t + 1]
    #res = [0 for x in range(0, n)]
    #res = []
    buffer = []
    for i in range(len(block) - 1):
        print("res=",buffer[i])
        buffer.append(block[i + 1]-block[i])
        #print("res=",res[i])
        return np.array([buffer])
    #return np.array([res])

if __name__== '__main__':
    print(1)
    #stockData = []
    #stockData = []
    stockData = [None]*801
    datas1 =xlrd.open_workbook(r'C:\Users\86138\Desktop\zi_ding_yi.xlsx',{'constant_memory':True})

    #datas1 = xlrd.open_workbook(r'C:\Users\86138\Desktop\zi_ding_yi.xlsx')
    print(2)
    table = datas1.sheets()[0]
    row_num = 0
    print(3)
    # for item in table[1:]:
    #      stockData.append(float(table.col_values(6)))
    col = table.col_values(5)
    while row_num <= 800 :
        # stockData[row_num] = table.col_values(5,0,row_num)
        #stockData[row_num] = table.cell_value(row_num,5)\
        stockData.append(table.cell_value(row_num,5))
        row_num += 1
     # stockData{row_num} = table.col_values(5,0,row_num)
     # row_num =+1

    agent = Agent(state_size=STATE_SIZE, action_size=3)
    print(4)
    l = len(stockData) - 1

    # scores = dqn()
    scores1 =dqn1()

它说我的错误是:

C:\Users\86138\anaconda3\python.exe C:/Users/86138/Desktop/stockPrediction-master/yunfuzai_main.py
1
2
3
4
Episode1
1
2
Traceback (most recent call last):
  File "C:/Users/86138/Desktop/stockPrediction-master/yunfuzai_main.py", line 123, in <module>
    scores1 =dqn1()
  File "C:/Users/86138/Desktop/stockPrediction-master/yunfuzai_main.py", line 61, in dqn1
    action = agent.act(state,eps)
  File "C:\Users\86138\Desktop\stockPrediction-master\dqn_agent.py", line 136, in act
    state = torch.tensor(state).float.unsqueeze(0).to(device)
RuntimeError: Could not infer dtype of NoneType

而我的dqn_agent.py:

import numpy as np
import random
from collections import namedtuple, deque

from model import QNetwork

import torch
import torch.nn.functional as F
import torch.optim as optim

#初始化超参数
BUFFER_SIZE = int(1e5)
#缓冲去大小,重播缓冲区大小
BATCH_SIZE = 64
#批处理大小,最小批量大小,minbatch size
GAMMA = 0.99
##折扣率
TAU = 1e-3
#用于目标参数的软更新
LR = 5e-4
#学习率
UPDATE_EVERY = 4
#更新网络的快慢
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#首先看下你的设备有无cuda可用:

class ReplayBuffer:
    def __init__(self, action_size, buffer_size, batch_size):# 初始化记忆库
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        #使用 deque(maxlen=N) 构造函数会创建一个固定大小的队列。当新的元素加入并且这个队列已满的时候,最老的元素会自动被移除掉
        self.batch_size = batch_size
        #batch字面上是批量的意思,在深度学习中指的是计算一次cost需要的输入数据个数。
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        #经验池的作用就是把每次进行的游戏回合transition(episode,step)记录下来存储起来。
        # 在训练的时候则是在经验池中随机取一组transition batch对Q网络进行优化。同时,也需要及时丢掉过老的记录,及时更新。
        #首先,定义了一个名为Experience的namedtuple。包括内容除了上面算法中提到的(s,a,r,s').还有结束的标识’done’。
    def add(self, state, action, reward, next_state, done):# 向记忆库中加入一个记忆
        e = self.experience(state, action, reward, next_state, done)
        self.memory.append(e)

    def sample(self):# 随机取出一个minibatch
        experiences = random.sample(self.memory, k=self.batch_size)
        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
        dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None])).float().to(device)

        return (states, actions, rewards, next_states, dones)

    def __len__(self):
        return len(self.memory)


class Agent:

    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size

        # Q-Network
        #在学习过程中,我们使用两个不相关的Q网络(Q_network_local和Q_network_target)来计算预测值(权重θ)和目标值(权重θ’)。
        # 经过若干步骤后,目标网络会被冻结,然后拷贝实际的Q网络的权重到目标网络权重。
        # 冻结目标Q网络一段时间再用实际Q网络的权重更新其权重,可以稳定训练过程
        self.qnetwork_local = QNetwork(state_size, action_size).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size).to(device)#目标策略,智能体要学习的策略
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        #在线性回归或者监督学习中,我们会计算预测值与真实值之间的差距,也就是loss
        #在计算得出loss之后,通常会使用Optimizer对所构造的数学模型/网络模型进行参数优化,
        #通常情况下,优化的最终目的是使得loss趋向于最小。


        # Replay Buffer,所以我们设置一个replay_buffer,获得新的交互数据,抛弃旧的数据,
        # 并且每次从这个replay_buffer中随机取一个batch,来训练我们的系统
        self.memory = ReplayBuffer(action_size, buffer_size=BUFFER_SIZE,batch_size=BATCH_SIZE)
        # 初始化迭代步数
        self.t_step = 0
        # 初始化持仓
        self.inventory = []
    # Experience Replay就是这样的一种技术,在游戏数据的采集过程中,所有的经验数据<script type="math/tex" id="MathJax-Element-85">< s, a, r, s'
    # ></script>都被存储到一个回放池(replay memory)中。当训练网络时,从回放池中随机地取一小撮数据,
    # 而不是最新连续的几条转换数据,进行训练。

    def step(self, state, action, reward, next_state, done):
        # 每一步需要先存储记忆库
        self.memory.add(state, action, reward, next_state, done)

        # 每隔若干步学习一次
        self.t_step = (self.t_step + 1) % UPDATE_EVERY
        if self.t_step == 0:
            if len(self.memory) > BATCH_SIZE:
                experience = self.memory.sample()
                self.learn(experience, GAMMA)

    def learn(self, experience, gamma):
        # 更新迭代
        states, actions, rewards, next_states, dones = experience

        # target network:compute and minimize the loss.计算并最小化损失
        # Get max predicted Q values(for next states) from target model:从目标模型得到最大的预测Q值(下一个状态)
        Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
        # compute Q target for current states:计算当前状态的Q目标。
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))

        Q_expected = self.qnetwork_local(states).gather(1, actions.long())# 固定行号,确认列
        # Compute loss
        loss = F.mse_loss(Q_expected, Q_targets)
        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        # update target network
        self.soft_update(self.qnetwork_local, self.qnetwork_target, tau=TAU)
        return loss


    def soft_update(self, local_model, target_model, tau):
        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
            target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
    #实现Q'到Q的逼近 use .data and .data.copy#
    def act(self, state, eps = 0.):
        #Returns actions for given state as per current policy.
        #Params
        # state (array_like): current state
        # eps (float): epsilon, for epsilon-greedy action selection
        #参数个数
        #状态(array_like):当前状态
        #eps (float):用于epsilon-贪婪动作选择


        # 返回动作值orch.unsqueeze()这个函数主要是对数据维度进行扩充
        #state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        state = torch.tensor(state).float.unsqueeze(0).to(device)
        self.qnetwork_local.eval()
        with torch.no_grad():
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()
# # Epsilon-greedy action selection
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))
        #eps:根据当前策略返回给定状态的操作参数个数

我也是真没辙了,所以我想问问,到底是怎么回事,因为我真实想法是把那个loss输出来。但似乎不行,想请各位大神帮我看看,因为是毕业论文用的代码,所以就想问问。拜托了

  • 写回答

4条回答 默认 最新

  • shifenglv 2021-03-15 18:03
    关注

    应该是没有读取进来数据。在getstate函数里,for循环没有运行,所以state是None。也可能len(block)刚好等于1,而你又用len(block)-1,所以循环没有进行。我不知道你是不是想用for去枚举block,如果是,应该是for i in range(len(block))

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(3条)

报告相同问题?