tianjiaozilong 2021-03-15 15:12 采纳率: 100%

# 请各位大神帮帮忙,因为这是毕业论文所用的代码,我想问问到底是否是对的,看看是我代码错了,还是逻辑错了

from dqn_agent import Agent
from model import QNetwork
import matplotlib.pyplot as plt
import  numpy as np
import torch
import xlrd
import gc

STATE_SIZE = 10
EPISODE_COUNT = 1000

# def dqn(n_episodes=EPISODE_COUNT,eps_start=2.0,eps_end=0.03,eps_decay=0.990):
#     scores = []
#     for i_episode in range(1, n_episodes + 1):
#         print("Episode" + str(i_episode))
#         state = getState(stockData, 0, STATE_SIZE + 1)
#         total_profit = 0
#         agent.inventory = []
#         eps = eps_start
#
#         for t in range(l):
#             action = agent.act(state, eps)
#             next_state = getState(stockData, t + 1, STATE_SIZE + 1)
#             reward = 0
#
#             if action == 1:  # 买入
#                 agent.inventory.append(stockData[t])
#             elif action == 2 and len(agent.inventory) > 0:  # 卖出
#                 bought_price = agent.inventory.pop(0)
#                 total_profit += stockData[t] - bought_price
#                 # reward = max(stockData[t] - bought_price, 0)
#                 reward = stockData[t] - bought_price
#                 # print("Sell: " + str(stockData[t]) + " | Profit: " + str(stockData[t] - bought_price))
#             done = 1 if t == l - 1 else 0
#             agent.step(state, action, reward, next_state, done)
#             eps = max(eps_end, eps * eps_decay)
#             state = next_state
#
#             # if done:
#             #     print("------------------------------")
#             #     print("total_profit = " + str(total_profit))
#             #     print("------------------------------")
#         scores.append(total_profit)
#     return scores

def dqn1(n_episodes1=EPISODE_COUNT, eps_start1=1.0, eps_end1=0.01, eps_decay1=0.9995):
scores1 = []
for i_episode in range(1,n_episodes1+1):
print("Episode" + str(i_episode))

state = getState(stockData, 0, STATE_SIZE + 1)
agent.inventory = []
eps = eps_start1
#global loss
print(1)
for t in range(l):
print(2)
action = agent.act(state,eps)
print(1)
next_state = getState(stockData, t + 1, STATE_SIZE + 1)
reward = 0
done = 1 if t == l - 1 else 0
global loss
loss = agent.step(state, action, reward, next_state, done)

print("loss", + str(loss))
scores1.append(loss)
gc.collect()
gc.collect()

return scores1

# if action == 1:# 过载
#     agent.inventory.append(stockData[t])
# elif action == 2 and len(agent.inventory) > 0:

def getState(data, t, n):
d = t - n + 1
# block = data[d:t + 1] if d>= 0 else -d * [data[0]]+ data[0:t+1]
block = data[d:t + 1]
#res = [0 for x in range(0, n)]
#res = []
buffer = []
for i in range(len(block) - 1):
print("res=",buffer[i])
buffer.append(block[i + 1]-block[i])
#print("res=",res[i])
return np.array([buffer])
#return np.array([res])

if __name__== '__main__':
print(1)
#stockData = []
#stockData = []
stockData = [None]*801
datas1 =xlrd.open_workbook(r'C:\Users\86138\Desktop\zi_ding_yi.xlsx',{'constant_memory':True})

#datas1 = xlrd.open_workbook(r'C:\Users\86138\Desktop\zi_ding_yi.xlsx')
print(2)
table = datas1.sheets()[0]
row_num = 0
print(3)
# for item in table[1:]:
#      stockData.append(float(table.col_values(6)))
col = table.col_values(5)
while row_num <= 800 :
# stockData[row_num] = table.col_values(5,0,row_num)
#stockData[row_num] = table.cell_value(row_num,5)\
stockData.append(table.cell_value(row_num,5))
row_num += 1
# stockData{row_num} = table.col_values(5,0,row_num)
# row_num =+1

agent = Agent(state_size=STATE_SIZE, action_size=3)
print(4)
l = len(stockData) - 1

# scores = dqn()
scores1 =dqn1()

C:\Users\86138\anaconda3\python.exe C:/Users/86138/Desktop/stockPrediction-master/yunfuzai_main.py
1
2
3
4
Episode1
1
2
Traceback (most recent call last):
File "C:/Users/86138/Desktop/stockPrediction-master/yunfuzai_main.py", line 123, in <module>
scores1 =dqn1()
File "C:/Users/86138/Desktop/stockPrediction-master/yunfuzai_main.py", line 61, in dqn1
action = agent.act(state,eps)
File "C:\Users\86138\Desktop\stockPrediction-master\dqn_agent.py", line 136, in act
state = torch.tensor(state).float.unsqueeze(0).to(device)
RuntimeError: Could not infer dtype of NoneType

import numpy as np
import random
from collections import namedtuple, deque

from model import QNetwork

import torch
import torch.nn.functional as F
import torch.optim as optim

#初始化超参数
BUFFER_SIZE = int(1e5)
#缓冲去大小，重播缓冲区大小
BATCH_SIZE = 64
#批处理大小，最小批量大小，minbatch size
GAMMA = 0.99
##折扣率
TAU = 1e-3
#用于目标参数的软更新
LR = 5e-4
#学习率
UPDATE_EVERY = 4
#更新网络的快慢
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#首先看下你的设备有无cuda可用：

class ReplayBuffer:
def __init__(self, action_size, buffer_size, batch_size):# 初始化记忆库
self.action_size = action_size
self.memory = deque(maxlen=buffer_size)
#使用 deque(maxlen=N) 构造函数会创建一个固定大小的队列。当新的元素加入并且这个队列已满的时候，最老的元素会自动被移除掉
self.batch_size = batch_size
#batch字面上是批量的意思，在深度学习中指的是计算一次cost需要的输入数据个数。
self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
#经验池的作用就是把每次进行的游戏回合transition（episode，step）记录下来存储起来。
# 在训练的时候则是在经验池中随机取一组transition batch对Q网络进行优化。同时，也需要及时丢掉过老的记录，及时更新。
#首先，定义了一个名为Experience的namedtuple。包括内容除了上面算法中提到的（s,a,r,s'）.还有结束的标识’done’。
def add(self, state, action, reward, next_state, done):# 向记忆库中加入一个记忆
e = self.experience(state, action, reward, next_state, done)
self.memory.append(e)

def sample(self):# 随机取出一个minibatch
experiences = random.sample(self.memory, k=self.batch_size)
states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None])).float().to(device)

return (states, actions, rewards, next_states, dones)

def __len__(self):
return len(self.memory)

class Agent:

def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size

# Q-Network
#在学习过程中，我们使用两个不相关的Q网络（Q_network_local和Q_network_target）来计算预测值（权重θ）和目标值（权重θ’）。
# 经过若干步骤后，目标网络会被冻结，然后拷贝实际的Q网络的权重到目标网络权重。
# 冻结目标Q网络一段时间再用实际Q网络的权重更新其权重，可以稳定训练过程
self.qnetwork_local = QNetwork(state_size, action_size).to(device)
self.qnetwork_target = QNetwork(state_size, action_size).to(device)#目标策略，智能体要学习的策略
#在线性回归或者监督学习中，我们会计算预测值与真实值之间的差距，也就是loss
#在计算得出loss之后，通常会使用Optimizer对所构造的数学模型/网络模型进行参数优化，
#通常情况下，优化的最终目的是使得loss趋向于最小。

# Replay Buffer，所以我们设置一个replay_buffer，获得新的交互数据，抛弃旧的数据，
# 并且每次从这个replay_buffer中随机取一个batch，来训练我们的系统
self.memory = ReplayBuffer(action_size, buffer_size=BUFFER_SIZE,batch_size=BATCH_SIZE)
# 初始化迭代步数
self.t_step = 0
# 初始化持仓
self.inventory = []
# Experience Replay就是这样的一种技术，在游戏数据的采集过程中，所有的经验数据<script type="math/tex" id="MathJax-Element-85">< s, a, r, s'
# ></script>都被存储到一个回放池(replay memory)中。当训练网络时，从回放池中随机地取一小撮数据，
# 而不是最新连续的几条转换数据，进行训练。

def step(self, state, action, reward, next_state, done):
# 每一步需要先存储记忆库

# 每隔若干步学习一次
self.t_step = (self.t_step + 1) % UPDATE_EVERY
if self.t_step == 0:
if len(self.memory) > BATCH_SIZE:
experience = self.memory.sample()
self.learn(experience, GAMMA)

def learn(self, experience, gamma):
# 更新迭代
states, actions, rewards, next_states, dones = experience

# target network:compute and minimize the loss.计算并最小化损失
# Get max predicted Q values(for next states) from target model:从目标模型得到最大的预测Q值(下一个状态)
Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
# compute Q target for current states:计算当前状态的Q目标。
Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))

Q_expected = self.qnetwork_local(states).gather(1, actions.long())# 固定行号，确认列
# Compute loss
loss = F.mse_loss(Q_expected, Q_targets)
# Minimize the loss
loss.backward()
self.optimizer.step()
# update target network
self.soft_update(self.qnetwork_local, self.qnetwork_target, tau=TAU)
return loss

def soft_update(self, local_model, target_model, tau):
for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
#实现Q'到Q的逼近 use .data and .data.copy#
def act(self, state, eps = 0.):
#Returns actions for given state as per current policy.
#Params
# state (array_like): current state
# eps (float): epsilon, for epsilon-greedy action selection
#参数个数
#状态(array_like):当前状态
#eps (float):用于epsilon-贪婪动作选择

# 返回动作值orch.unsqueeze()这个函数主要是对数据维度进行扩充
#state = torch.from_numpy(state).float().unsqueeze(0).to(device)
state = torch.tensor(state).float.unsqueeze(0).to(device)
self.qnetwork_local.eval()
action_values = self.qnetwork_local(state)
self.qnetwork_local.train()
# # Epsilon-greedy action selection
if random.random() > eps:
return np.argmax(action_values.cpu().data.numpy())
else:
return random.choice(np.arange(self.action_size))
#eps:根据当前策略返回给定状态的操作参数个数

• 写回答

#### 4条回答默认 最新

• shifenglv 2021-03-15 18:03
关注

应该是没有读取进来数据。在getstate函数里，for循环没有运行，所以state是None。也可能len(block)刚好等于1，而你又用len(block)-1,所以循环没有进行。我不知道你是不是想用for去枚举block，如果是，应该是for i in range(len(block))

本回答被题主选为最佳回答 , 对您是否有帮助呢?
评论

#### 悬赏问题

• ¥20 求后天龙八部经典脚本辅助
• ¥15 tensorflow运行时显示TypeError
• ¥30 layui怎么实现子窗口修改完成后，用原来查询条件刷新父页面
• ¥15 perl解释器装上之后如何使用
• ¥20 ctf find_fake_fast错位偏移
• ¥15 pycharm无法启动打开
• ¥15 freertos中断使用队列传输后不能退出
• ¥15 Could not find artifact io.renren:renren-dynamic-datasource:jar:5.0.0
• ¥15 8乘8点阵显示I Love you，要求看下文
• ¥15 uniapp地图组件show-location的方向箭偏差