Skip to content

Commit

Permalink
Dynamic programming added to NDP schemes in DEM toolbox/DEM/spm_MDP_DP.m
Browse files Browse the repository at this point in the history
SVN r6451
  • Loading branch information
Friston committed May 26, 2015
1 parent 3a0d58f commit cd8603d
Show file tree
Hide file tree
Showing 3 changed files with 318 additions and 137 deletions.
14 changes: 7 additions & 7 deletions spm_MDP_da.m
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ function spm_MDP_da(MDP)
% Copyright (C) 2015 Wellcome Trust Centre for Neuroimaging

% Karl Friston
% $Id: spm_MDP_da.m 6343 2015-02-18 16:46:00Z spm $
% $Id: spm_MDP_da.m 6451 2015-05-26 09:26:03Z karl $

% deconvolve to simulate dopamine responses
%--------------------------------------------------------------------------
gamma = MDP.d;
da = pinv( tril(toeplitz(exp(-((1:length(gamma)) - 1)'/8))) )*gamma;
da = da(4:end);
wn = MDP.d;
da = pinv( tril(toeplitz(exp(-((1:length(wn)) - 1)'/8))) )*wn;
da = da(4:end);

% peristimulus time histogram
%--------------------------------------------------------------------------
pst = (1:length(da))*256;
r = 256;
psth = r*da + randn(size(da)).*sqrt(r*da);
pst = (1:length(da))*256;
r = 256;
psth = r*da + randn(size(da)).*sqrt(r*da);
bar(pst,psth,1)
title('Simulated dopamine responses','FontSize',16)
xlabel('Peristimulus time (ms)','FontSize',12)
Expand Down
176 changes: 176 additions & 0 deletions toolbox/DEM/spm_MDP_DP.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
function [B0,BV] = spm_MDP_DP(MDP,OPTION)
% dynamic programming using active inference
% FORMAT [MDP] = spm_MDP_DP(MDP,OPTION,W)
%
% MDP.A(O,N) - Likelihood of O outcomes given N hidden states
% MDP.B{M}(N,N) - transition probabilities among hidden states (priors)
% MDP.C(N,1) - prior preferences (prior over future states)
%
% MDP.V(T - 1,P) - P allowable policies (control sequences)
%
% OPTION - {'Free Energy' | 'KL Control' | 'Expected Utility'};
%
% B0 - optimal state action policy or transition matrix
% BV - corresponding policy using value iteration
%__________________________________________________________________________
% Copyright (C) 2005 Wellcome Trust Centre for Neuroimaging

% Karl Friston
% $Id: spm_MDP_DP.m 6451 2015-05-26 09:26:03Z karl $

% set up and preliminaries
%==========================================================================

% options
%--------------------------------------------------------------------------
if nargin < 2, OPTION = 'Free Energy'; end

% generative model and initial states
%--------------------------------------------------------------------------
T = size(MDP.V,1) + 1; % number of outcomes
Ns = size(MDP.B{1},1); % number of hidden states
Nu = size(MDP.B,2); % number of hidden controls
p0 = exp(-16); % smallest probability

% likelihood model (for a partially observed MDP implicit in G)
%--------------------------------------------------------------------------
try
A = MDP.A + p0;
catch
A = speye(Ns,Ns) + p0;
end
A = A*diag(1./sum(A)); % normalise
lnA = log(A); % log probabilities
H = sum(A.*lnA)'; % negentropy of observations

% transition probabilities (priors)
%--------------------------------------------------------------------------
for j = 1:Nu
B{j} = MDP.B{1,j} + p0;
B{j} = B{j}*diag(1./sum(B{j}));
lnB{j} = log(B{j});
end

% terminal probabilities (priors)
%--------------------------------------------------------------------------
try
C = MDP.C + p0;
if size(C,2) ~= T
C = C(:,end)*ones(1,T);
end
catch
C = ones(Ns,T);
end
C = C*diag(1./sum(C));
lnC = log(C);

% policies, states and their expectations
%--------------------------------------------------------------------------
V = MDP.V;
Np = size(V,2); % number of allowable policies



% policy iteration
%==========================================================================
for s = 1:Ns


% Variational iterations (hidden states)
%======================================================================
x = zeros(Ns,T,Np);
for k = 1:Np

for i = 1:2

% hiddens states (x)
%--------------------------------------------------------------
x(s,1,k) = 1;

% future states
%--------------------------------------------------------------
for j = 2:(T - 1)
v = lnB{V(j - 1,k)} *x(:,j - 1,k) + ...
lnB{V(j, k)}'*x(:,j + 1,k);
x(:,j,k) = spm_softmax(v);
end

% last state
%--------------------------------------------------------------
v = lnB{V(T - 1,k)} *x(:,T - 1,k);
x(:,T,k) = spm_softmax(v);

end

end

% value of policies (Q)
%======================================================================
Q = zeros(Np,1);
for k = 1:Np

% path integral of expected free energy
%------------------------------------------------------------------
for j = 2:T

switch OPTION
case{'Free Energy','FE'}
v = lnC(:,j) - log(x(:,j,k)) + H;

case{'KL Control','KL'}
v = lnC(:,j) - log(x(:,j,k));

case{'Expected Utility','EU','RL'}
v = lnC(:,j);

otherwise
disp(['unkown option: ' OPTION])
end
Q(k) = Q(k) + v'*x(:,j,k);

end
end

% optimal transition from this state
%======================================================================
[u,k] = max(Q);
B0(:,s) = lnB{V(1,k)}(:,s);

end

if nargout < 2, return, end

% value iteration
%==========================================================================
V = zeros(Ns,1);
lnC = lnC(:,end);
g = 1 - 1/T;
for i = 1:32
for s = 1:Ns

% value of actions (Q)
%------------------------------------------------------------------
Q = zeros(Nu,1);
for k = 1:Nu
Q(k) = B{1,k}(:,s)'*(lnC + g*V);
end

% optimal transition from this state
%------------------------------------------------------------------
[u,k] = max(Q);
BV(:,s) = B{1,k}(:,s);

end

% optimal transition from this state
%----------------------------------------------------------------------
dV = BV'*(lnC + g*V) - V;
V = V + dV;

% convergence
%----------------------------------------------------------------------
if norm(dV) < 1e-2, break, end

end


Loading

0 comments on commit cd8603d

Please sign in to comment.