niceAlise 2021-10-18 18:05
浏览 136
已结题

用基于模型的DDPG强化学习算法控制调节三个比例系数,agent采用DDPG算法,agent动作输出设置为3*1的矩阵,但是仿真结果只显示1*1,无法直接控制三个

问题:用基于模型强化学习调节控制系统的三个比例系数,agent采用DDPG算法,agent的动作输出设置为3×1的矩阵,但是仿真结果只显示1×1,无法直接控制三个比例系数,程序和simulink图如下所示,请问如何解决呢?

img

img

img

#代码

%% SET UP ENV自定义环境模型
%create the action info
numAct = 3;
actionInfo = rlNumericSpec([numAct 1],...
    'LowerLimit',[-Inf -Inf -Inf]',...
    'UpperLimit',[-Inf -Inf -Inf]');
actionInfo.Name = 'action';
%Create the observation info
numObs = 7;
observationInfo = rlNumericSpec([numObs 1],...
    'LowerLimit',-Inf,...
    'UpperLimit',Inf);
observationInfo.Name = 'observation';

%Environment
mdl = 'hybridpowersystemRL1';
open_system(mdl);
env = rlSimulinkEnv(mdl,[mdl '/RL Agent'],observationInfo,actionInfo);
%%这样就会在simulink模型文件中绑定agent模块了,接下来就是设置agent参数
% %
numObs = observationInfo.Dimension(1);
%% 设置仿真时间Tf和智能体采样时间Ts
Ts = 0.02;
Tf = 25;
%为复现结果,固定随机生成器种子
rng(0)
%% 初始化agent CREATE DDPG NETWORKS
%定义具有两个输入(观测量和动作)和一个输出(价值)的神经网络,

statePath = [
    imageInputLayer([numObs 1 1],'Normalization','none','Name',...
    'observation')
    fullyConnectedLayer(128,'Name','CriticStateFC1')
    reluLayer('Name','CriticRelu1')
    fullyConnectedLayer(200,'Name','CriticStateFC2')];
actionPath = [
    imageInputLayer([numAct 1 1],'Normalization','none','Name','action')
    fullyConnectedLayer(200,'Name','CriticActionFC1','BiasLearnRateFactor',0)];
commonPath = [
    additionLayer(2,'Name','add')
    reluLayer('Name','CriticCommonRelu')
    fullyConnectedLayer(1,'Name','CriticOutput')];

% Connect the layer graph
criticNetwork = layerGraph(statePath);
criticNetwork = addLayers(criticNetwork,actionPath);
criticNetwork = addLayers(criticNetwork,commonPath);
criticNetwork = connectLayers(criticNetwork,'CriticStateFC2','add/in1');
criticNetwork = connectLayers(criticNetwork,'CriticActionFC1','add/in2');

%查看网络结构
figure
plot(criticNetwork)
criticOptions = rlRepresentationOptions('LearnRate',1e-03,'GradientThreshold',1);
critic = rlRepresentation(criticNetwork,obsInfo,actInfo,...
    'Observation',{'observation'},'Action',{'action'},criticOptions);
% if useGPU
%    criticOptions.UseDevice = 'gpu'; 
% end
                      
%% ACTOR 动作网络结构
%定义一个具有一个输入(状态或观测量)和一个输出(动作)的神经网络,决定采取动作

actorNetwork = [
    imageInputLayer([numObs 1 1],'Normalization','none','Name',...
    'observation')
    fullyConnectedLayer(128,'Name','ActorFC1')
    reluLayer('Name','ActorRelu1')
    fullyConnectedLayer(200,'Name','ActorFC2')
    reluLayer('Name','ActorRelu2')
    fullyConnectedLayer(1,'Name','ActorFC3')
    tanhLayer('Name','ActorTanh1')
    scalingLayer('Name','ActorScaling','Scale',max(actInfo.UpperLimit))];
actorOptions = rlRepresentationOptions('LearnRate',5e-04,'GradientThreshold',1);
actor = rlRepresentation(actorNetwork,obsInfo,actInfo,...
    'Observation',{'observation'},'Action',{'ActorScaling'},actorOptions);

%% 设置训练参数
%% DDPG Agent Options
agentOptions = rlDDPGAgentOptions;
agentOptions.SampleTime = Ts;%采样时间
agentOptions.DiscountFactor = 0.99;%折扣因子
agentOptions.MiniBatchSize = 128;
agentOptions.ExperienceBufferLength = 5e5;
agentOptions.TargetSmoothFactor = 1e-3;
agentOptions.NoiseOptions.MeanAttractionConstant = 5;%随机化探索
agentOptions.NoiseOptions.Variance = 0.5;
agentOptions.NoiseOptions.VarianceDecayRate = 1e-5;
%% Training Options

maxepisodes = 2000;
maxsteps = ceil(Tf/Ts);
trainingOptions = rlTrainingOptions(...
    'MaxEpisodes',maxepisodes,...
    'MaxStepsPerEpisode',maxsteps,...
    'ScoreAveragingWindowLength',5,...
    'Verbose',false,...
    'Plots','training-progress',...
    'StopTrainingCriteria','AverageReward',...
    'StopTrainingValue',400,...
    'SaveAgentCriteria','EpisodeReward',...
    'SaveAgentValue',400);

% trainingOptions = rlTrainingOptions;
% trainingOptions.MaxEpisodes =1000;
% trainingOptions.MaxStepsPerEpisode = Tf/Ts;
% trainingOptions.ScoreAveragingWindowLength = 1000;
% trainingOptions.StopTrainingCriteria = 'AverageReward';
% trainingOptions.StopTrainingValue = 110;
% trainingOptions.SaveAgentCriteria = 'EpisodeReward';
% trainingOptions.SaveAgentValue = 150;
% trainingOptions.Plots = 'training-progress';
% trainingOptions.Verbose = true;
% if useParallel
%     trainingOptions.Parallelization = 'async';%异步
%     trainingOptions.ParallelizationOptions.StepsUntilDataIsSent = 32;%每32个时间步发送给代理;
% end
%% 并行学习设置
trainingOptions.UseParallel = true;
trainingOptions.ParallelizationOptions.Mode = "async";
trainingOptions.ParallelizationOptions.DataToSendFromWorkers = "Experiences";
trainingOptions.ParallelizationOptions.StepsUntilDataIsSent = -1;
%% 训练
agent = rlDDPGAgent(actor,critic,agentOptions);
trainingStats = train(agent,env,trainingOptions);
%% SAVE AGENT
reset(agent); % Clears the experience buffer
curDir = pwd;
saveDir = 'savedAgents';
cd(saveDir)
save(['trainedAgent' datestr(now,'mm_DD_YYYY_HHMM')],'agent');
save(['trainingResults' datestr(now,'mm_DD_YYYY_HHMM')],'trainingResults');
cd(curDir)
%% 结果展示验证训练好的仿真,可以在仿真中对环境和智能体的组合模型仿真
simOptions = rlSimulationOptions('MaxSteps',500);%创建默认选项集
experience = sim(env,agent,simOptions);
totalReward = sum(experience.Reward);
% bdclose(mdl) 

  • 写回答

0条回答 默认 最新

    报告相同问题?

    问题事件

    • 系统已结题 10月26日
    • 创建了问题 10月18日

    悬赏问题

    • ¥15 有赏,i卡绘世画不出
    • ¥15 如何用stata画出文献中常见的安慰剂检验图
    • ¥15 c语言链表结构体数据插入
    • ¥40 使用MATLAB解答线性代数问题
    • ¥15 COCOS的问题COCOS的问题
    • ¥15 FPGA-SRIO初始化失败
    • ¥15 MapReduce实现倒排索引失败
    • ¥15 ZABBIX6.0L连接数据库报错,如何解决?(操作系统-centos)
    • ¥15 找一位技术过硬的游戏pj程序员
    • ¥15 matlab生成电测深三层曲线模型代码