import ray
from ray import tune
from ray.rllib.agents.dqn import DQNTrainer
from ray.tune.schedulers import ASHAScheduler
import gym
# 初始化Ray
ray.init(num_cpus=8)
def train_dqn(config):
print("on train")
trainer = DQNTrainer(config=config, env="CartPole-v1")
result = trainer.train()
return result
# 设置训练配置
config = {
"log_level": "DEBUG",
"num_workers": 4,
"framework": "torch",
"lr": 2e-3,
"replay_buffer_config": {
"capacity": 10000,
"learning_starts": 500
},
"train_batch_size": 64,
"target_network_update_freq": 10,
"gamma": 0.99,
}
exp_config = {
"run_or_experiment": train_dqn,
"checkpoint_freq": 5,
"checkpoint_at_end": True,
"local_dir": "C:/Users/jinsong.shao/Desktop/model/checkpoints",
"stop": {"training_iteration": 1000},
"config": config,
"scheduler":ASHAScheduler(metric="episode_reward_mean", mode="max"),
"resources_per_trial":{"cpu": 4, "gpu": 0},
}
# 运行Tune实验进行训练
tune.run(**exp_config)
我是ray框架的初学者,尝试构建一个简单的训练,代码如上,遇到的问题是并没有如我期望的5次迭代保存一个checkpoint文件以及在达到最大迭代次数后也没有停止训练
ps.请不要使用AI作答,我已经和gpt纠结了很久都没有答案,期待各位的解惑,十分感谢!