function [Th,P1,P2,P3]=SGDUpdate(ThI,UpP1,UpP2,Der,P1I,P2I,P3I)
%Stocastic Gradient Descent Update
%ThI: vector of params
%UpP1: Update Parameter 1, Nabla (0.7*1e-2), ro (0.95)
%UpP2: Update Parameter2, Mom (0.5), eps (1e-6)

% Standard (No Momentum)
%P1: not used
%P2: not used
% Th=ThI-Nab*Der;

 
% % %With Momentum
% P1=Mom*P1I+Nab*Der; %P1: velocity
% P2=P2I; %not used
% P3=P3I; %not used
% Th=ThI-P1;
% % mean(abs(Th-ThI))=0.0070 for mean and std



%Adadelta
%ro=Nab; %decay rate 0.99 (if many oscilations), 0.90 (if no oscilation)
%eps=Mom; %constant 1e-6
ro=0.95; %decay rate 
eps=1e-6; %constant

Eg2_1=P1I; %accumulate gradient t-1
Edx2_1=P2I; %accumulate update t-1
g=Der; %derivative

Eg2=ro*Eg2_1+(1-ro)*g.^2;
dx=-(ARms(Edx2_1,eps)./ARms(Eg2,eps)).*g;
Edx2=ro*Edx2_1+(1-ro)*dx.^2;
Th=ThI+dx;

P1=Eg2; %accumulate gradient t
P2=Edx2; %accumulate update t
P3=P3I; %Not used








%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Rms=ARms(Eg2,eps)
%Adadelta Rms
Rms=sqrt(Eg2+eps);



