% GMRK_methods.m
% Author: Michael Schober (mschober@tue.mpg.de)
% Date: 2014-10-23
% Version: 1.0
% Purpose: Prove the connection between integrated Wiener GP models and RK

%% Setup

disp(['This code proves the connection between integrated Wiener ', ...
      'Gaussian process models and Runge-Kutta methods. ']);
disp(['The code requires Matlab''s symbolic math toolbox and might ', ...
      'take some time run. A sound will play upon completion.']);
disp('==================================================');

clear;
reset(symengine);

load handel;
beep = @() sound(y(1:2e4),Fs);
clear y Fs;

%% Configuration

syms h u v positive; % variables of the Butcher tableau
syms t tp positive; % used in function and kernel definitions;
t0 = sym('t_0', 'positive'); % integration domain start

% Expected values for results
w_x0 = [1]; % read: 1*x0
b_Euler = [1, h]; % read: [1, h] * [x0; Y1] = x0 + h*Y1

w_Y1 = [1, h*u];
b_2ndOrder = [1, h*(1-1/(2*u)), h/(2*u)];

w_Y2 = ...
  [1, h*(v - v*(v-u)/(u*(2-3*u))), h*v*(v-u)/(u*(2-3*u))];
b_3rdOrder = [1;
              h*(1 - (2-3*v)/(6*u*(u-v)) - (2-3*u)/(6*v*(v-u)));
              h*(2-3*v)/(6*u*(u-v));
              h*(2-3*u)/(6*v*(v-u))].';

% Helper functions
minimum(t, tp) = 1/2 * (t + tp - abs(t - tp));
maximum(t, tp) = 1/2 * (t + tp + abs(t - tp));

ind_gt(t, tp) = maximum(sign(t - tp), 0); % 'indicator greater or equal'
ind_le(t, tp) = 1 - ind_gt(t, tp); % 'indicator less than'

% Kernel functions
% ----------------
% Once integrated Wiener process kernel
k1 (t, tp) = 1/3 * minimum(t, tp)^3 + 1/2 * abs(t-tp) * minimum(t, tp)^2;
% Derivative wrt to the second argument
k1d(t, tp) = ind_le(t, tp) * (t^2/2) ...
           + ind_gt(t, tp) * (t*tp - tp^2/2);
% Derivative wrt to both arguments = Wiener process kernel
dk1d(t,tp) = minimum(t, tp);

% Twice integrated Wiener process kernel
k2(t, tp) = ...
  1/20 * minimum(t, tp)^5 ...
  + abs(t-tp)/12 * ((t+tp)*minimum(t, tp)^3 - 1/2 * minimum(t, tp)^4);
k2d(t, tp) = ind_gt(t, tp) * (tp^2/24 * (tp^2 - 4*t*tp + 6*t^2)) ...
           + ind_le(t, tp) * (-t^4/24 + tp*t^3/6);

dk2d(t, tp) = 1/3 * minimum(t, tp)^3 + 1/2 * abs(t-tp) * minimum(t, tp)^2;

% Thrice integrated Wiener process kernel
k3(t, tp) = 1/252 * minimum(t, tp)^7 ...
          + abs(t - tp) * minimum(t, tp)^4/720 ...
          * (5*maximum(t,tp)^2 + 2*t*tp + 3*minimum(t, tp)^2);
      
k3d(t, tp) = ind_gt(t, tp) * (tp^3/720 * ...
                              (20*t^3 - 15*t^2*tp + 6*t*tp^2 - tp^3)) ...
           + ind_le(t, tp) * (t^4/720 * (15*tp^2 - 6*t*tp + t^2));
dk3d(t, tp) = ...
  1/20 * minimum(t, tp)^5 ...
  + abs(t-tp)/12 * ((t+tp)*minimum(t, tp)^3 - 1/2 * minimum(t, tp)^4);

%% Building posterior predictive mean and covariance for 1-WP

assumeAlso(u <= 1); % evaluation nodes t0+h*u are within [t0, t0+h]
assumeAlso(v <= 1);
hu = h*u; % shorthands
hv = h*v;

syms s sp positive; % arguments for the posterior predictives

% Covariance matrix of the once integrated Wiener process kernel
K_1WP = [k1(t0, t0),  k1d(t0, t0);
         k1d(t0, t0), dk1d(t0, t0)];
       
k_1WP = [k1(t0+s, t0), k1d(t0+s, t0)];
kp_1WP = [k1(t0+sp, t0), k1d(t0+sp, t0)].';

mu_1 = @(idx) k_1WP(1:idx) / K_1WP(1:idx, 1:idx);
cov_1 = @(idx) ...
  k1(t0+s,t0+sp) - k_1WP(1:idx) / K_1WP(1:idx, 1:idx) * kp_1WP(1:idx);

disp('Proof: Once integrated Wiener process gives Euler''s method');
disp('-----------------------------------------------------------');

% After adding initial value
mu_1WP_x0(s) = mu_1(1);
disp('mu^1|x0(0) ==');
w_x0_1WP = mu_1WP_x0(0) % c1 is always == 0
disp('Expected was: ');
w_x0

% After gradient observation
mu_1WP_Y1(s) = mu_1(2);
disp('mu^1|x0,Y1(t0+h) ==');
w_Y1_1WP = mu_1WP_Y1(h)
disp('Expected was: ');
b_Euler

%% Building posterior predictive for twice integrated Wiener process

c2 = k2(t0, t0);
v2 = [k2d(t0, t0);
      k2d(t0, t0+hu)];
U2 = [dk2d(t0, t0),   dk2d(t0, t0+hu);
      dk2d(t0, t0+hu), dk2d(t0+hu, t0+hu)];
 
K_2WP = [c2, v2.';
         v2, U2];
       
k_2WP = [k2(t0+s, t0), k2d(t0+s, t0), k2d(t0+s, t0+hu)];
kp_2WP = [k2(t0+sp, t0), k2d(t0+sp, t0), k2d(t0+sp, t0+hu)].';

mu_2 = @(idx) k_2WP(1:idx) / K_2WP(1:idx,1:idx);
cov_2 = @(idx) ...
  k2(t0+s,t0+sp) - (k_2WP(1:idx) / K_2WP(1:idx, 1:idx)) * kp_2WP(1:idx);

disp('Proof: Twice integrated Wiener process gives second-order methods');
disp('-----------------------------------------------------------------');

% After adding initial value
mu_2WP_x0(s) = mu_2(1);
disp('mu^2|x0(0) ==');
w_x0_2WP = limit(mu_2WP_x0(0),t0,inf);
w_x0_2WP
disp('Expected was: ');
w_x0

% After gradient observation
mu_2WP_Y1(s) = mu_2(2);
disp('mu^2|x0,Y1(t0+h*u) ==');
w_Y1_3WP = collect(limit(mu_2WP_Y1(hu),t0,inf),h);
w_Y1_3WP
disp('Expected was: ');
w_Y1

% At t0 + h
mu_2WP_Y2(s) = mu_2(3);
disp('mu^2|x0,Y1,Y2(t0+h) ==');
w_Y2_2WP = simplify(limit(mu_2WP_Y2(h),t0,inf));
w_Y2_2WP
disp('Expected was: ');
b_2ndOrder

%% Building posterior predictive for thrice integrated Wiener process

c3 = k3(t0, t0);
r3 = [k3d(t0, t0);
      k3d(t0, t0 + hu);
      k3d(t0, t0 + hv)];
U3 = [dk3d(t0, t0),      dk3d(t0, t0 + hu),      dk3d(t0, t0 + hv);
      dk3d(t0, t0 + hu), dk3d(t0 + hu, t0 + hu), dk3d(t0 + hu, t0 + hv);
      dk3d(t0, t0 + hv), dk3d(t0 + hu, t0 + hv), dk3d(t0 + hv, t0 + hv)];
 
K_3WP = [c3, r3.';
     r3, U3];

k_3WP = [k3(t0+s, t0), k3d(t0+s, t0), k3d(t0+s, t0+hu), k3d(t0+s, t0+hv)];
kp_3WP = [k3(t0+sp, t0), k3d(t0+sp, t0), ...
          k3d(t0+sp, t0+hu), k3d(t0+sp, t0+hv)].';
  
mu_3 = @(idx) k_3WP(1:idx) / K_3WP(1:idx,1:idx);
cov_3 = @(idx) ...
  k3(t0+s,t0+sp) - (k_3WP(1:idx) / K_3WP(1:idx, 1:idx)) * kp_3WP(1:idx);

disp('Proof: Twice integrated Wiener process gives second-order methods');
disp('-----------------------------------------------------------------');

% After adding initial value
mu_3WP_x0(s) = mu_3(1);
disp('mu^3|x0(0) ==');
w_x0_3WP = limit(mu_3WP_x0(0),t0,inf);
w_x0_3WP
disp('Expected was: ');
w_x0

% After gradient observation
mu_3WP_Y1(s) = mu_3(2);
disp('mu^3|x0,Y1(t0+h*u) ==');
w_Y1_3WP = collect(limit(mu_3WP_Y1(hu),t0,inf),h);
w_Y1_3WP
disp('Expected was: ');
w_Y1

% At second gradient observation
mu_3WP_Y2(s) = mu_3(3);
disp('mu^3|x0,Y1,Y2(t0+hv) ==');
w_Y2_3WP = simplify(limit(mu_3WP_Y2(hv),t0,inf));
w_Y2_3WP
disp('Expected was: ');
w_Y2

disp('*** These two values are not the same! There is a residual term: ');
residual = simplify(w_Y2 - w_Y2_3WP);
residual
disp('*** However, the residual vanishes for v=2/3: ');
residual_v_23 = subs(residual,v,2/3)

% At t0+h
disp('The SMT has problems with the relative positions of u vs. v.');
disp('Using case analysis to simplify computations.');

disp('1st case: u < v');
assumeAlso(u < v);
mu_3WP_Y3(s) = mu_3(4);
disp('mu^3|x0,Y1,Y2,Y3(t0+h) ==');
w_Y3_3WP = simplify(limit(mu_3WP_Y3(h),t0,inf));
w_Y3_3WP
disp('Expected was: ');
b_3rdOrder
disp('b_3rdOrder - w_Y3_3WP');
result = simplify(b_3rdOrder - w_Y3_3WP);
result

disp('2nd case: u > v');
sym('u','clear');
assumeAlso(u <= 1);
assumeAlso(u > v);
mu_3WP_Y3(s) = mu_3(4);
disp('mu^3|x0,Y1,Y2,Y3(t0+h) ==');
w_Y3_3WP = simplify(limit(mu_3WP_Y3(h),t0,inf));
w_Y3_3WP
disp('Expected was: ');
b_3rdOrder
disp('b_3rdOrder - w_Y3_3WP');
result = simplify(b_3rdOrder - w_Y3_3WP);
result

%% Wrapping up
beep();
disp('==================================================');
disp(['This concludes the proofs. For further details use the ', ...
      'defined variables to search for desired results.']);
disp('Beware: computations take very long, always use case analysis');
disp('This code is part of the publication:');

disp(' ');
disp('Schober, Michael, Duvenaud, David and Hennig, Philipp: ');
disp('Probabilistic ODE Solvers with Runge-Kutta Means.');
disp('In: Advances of Neural Information Processing Systems (NIPS) 2014');

disp(' ');
disp('For more information visit the website:');
disp(' ');
disp('http://www.probabilistic-numerics.org/ODEs.html');

