//       

//---------------------------------------------------------
// stats::reg_lazy_abs =  utility for stats::reg.
//
// stats::reg uses a *symbolic* expression
// chis := sum( abs(f(x[i], p[j]) - y[i])^2, i)
// with float values x[i], y[i] and *symbolic*
// values p[j]. These symbols make abs(expression in p[j])
// **extremely** expensive due to the investigation of
// properties of the symbols p[j]. Since we know a priori
// that the symbols p[j] (generated by genident()) do not
// have properties, we can avoid the expensive abs and
// use a much faster lazy_abs. Because of procname, we
// do not install this as a subprocedure of stats::reg,
// but as a global procedure using stats:: as a namespace:
//---------------------------------------------------------

stats::reg_lazy_abs:= proc(x)
local fx;
begin
  fx:= float(x);
  case domtype(fx) 
  of DOM_FLOAT do
  of DOM_COMPLEX do
     return(specfunc::abs(fx))
  otherwise
     return(procname(args()));
  end_case;
end_proc:

//------------------------------------------------------------
//------------------------------------------------------------
/*-- main routine stats::reg: nonlinear regression ------------

Calls:
    reg(data, model, v, p, <Symbolic>, <CovarianceMatrix>)
    reg(data, model, v, p, StartingValues = p_init, <Symbolic>, <CovarianceMatrix>)

    // The following syntax is obsolete, but still accepted,
    // because TCI uses it:
    reg("advanced", data, model, v, p, p_init, <Symbolic>)

nonlinear least-squares-fit of an arbitrary function
(nonlinear concerning independent variables and nonlinear concerning 
 the fit-parameters)

Parameters:
data  data - this may be a sample with specified columns 
                      or a sequence of columns
                      or a list of rows.
             the number of columns depends on the number of 
             independent variables.
             the order of columns is:
             independent variables (x_i), dependent variable (y),
                      errors/standard deviations of the dependent 
                      variable (sigma_y)
             the sigma_y are optional
model        the function to fit: an expression
v            list of independent variables
p            list of fit-parameters
p_init       list of starting values for fit-parameters
Symbolic     this option prevents conversion of input data
             to floats. Only has an effect if the regression
             is linear, i.e., if the model depends linearly
             on the fit-parameters
CovarianceMatrix
             with this option, the covariance matrix
             C[i,j] = cov(p[i],p[j]) is returned as well: the
             return value is
               [[p1, p2, ..], chisquare, C].
             C is defined by
               inverse(C)[i,j] = 1/2* diff(chisquare, p[i], p[j])
             with chisquare = sum(w[i]*(y[i] - f(x[i], p)^2).
---------------------------------------------------------------*/

stats::reg := proc()
  local llll, x, y, fitf, fitderiv, xvar_, xvar, param, 
        linearRegression, nargs, withCovariance,
        N, n, i, j, k, m, p, a, deltaa, al, alpha, alphal,
        lambda, bet, betav, chis, chisqu, chisqu_old, w,
        counter_out, counter_in, max_out, max_in,
        macheps, conv, sigmay, nvars,
        dummy, dummy2, tmp, kern, S0, S1, S2; 
save DIGITS;

  /*
  x[i]          x-data
  y[i]          y-data
  fitf          fitting function
  fitderiv[i]   fitting function's derivatives
  xvar_         user's fitting variable
  xvar          symbolic fitting variable in fitting function
  param[i]      user's fitting parameters
  N             number of (x,y)-pairs
  n             number of fitting parameters
  i, j, k, m    indices
  p[i]          symbolic fitting parameters in fitting function
  a[i]          current values of the fitting parameters
  aout[i]       list of converged parameters
  errout[i]     list of errors of those parameters (sigma_aout)
  deltaa[i]     vector in parameter space (step to next parameter)
  al[i,j]       symbolic matrix alpha
  alpha[i,j]    matrix alpha
  alphal[i,j]   matrix alpha(lambda)
  lambda        marquard-factor
  bet[i]        symbolic gradient vector
  betav[i]      gradient vector
  chis          symbolic chisquare
  chisqu        chisquare
  w             weighting factor
  minv()        matrix inversion of a symmetric square matrix
  det           determinant of alphal
  counter_out   counter for outer loop
  counter_in    counter for inner loop
  macheps       10^(-DIGITS) = relative machine precision
  conv          converged? TRUE/FALSE
  max_out       maximum number of outer loops
  max_in        maximum number of inner loops
  sigmay        TRUE if x-y-data includes sigma_y
  C             inverse of the covariance matrix of the fit parameters
  */

begin
  nargs:= args(0):

  if nargs < 4 then
     //need:  data, model, variables, parameters
     error("expecting at least four arguments");
  end_if:

  if args(nargs) = CovarianceMatrix then
     withCovariance:= TRUE;
     nargs:= nargs - 1;
  else
     withCovariance:= FALSE;
  end_if;

//DIGITS:= DIGITS + 2;

  // -------------------------------------------------------------
  // Starting values provided by the user ?
  // If not, set default StartingValues = [1.0, 1.0, ..]
  // Note that StartingValues = [0.0, 0.0, ..] would be
  // a rather unfortunate choice, because for model functions
  // with polynomial dependence on the fit parameters the
  // curvature matrix will be singular if all parameters 
  // vanish!

  //-----------------------------------
  // obsolete input syntax, used by TCI.
  if args(1) = "advanced" then
    return(stats::reg(args(2..nargs-1), StartingValues = args(nargs)));
  end_if;
  //-----------------------------------

  if not has([args(nargs)], StartingValues) then
    if withCovariance then
      return(stats::reg(args(1..nargs), 
             StartingValues = [float(1) $ nops(args(nargs))],
             CovarianceMatrix))
    else
      return(stats::reg(args(), StartingValues = [float(1) $ nops(args(nargs))]))
    end_if;
  end_if;

  // -------------------------------------------------------------
  // now we can assume that starting values
  // for the fit parameters are provided.

  //----------------------------------------------------------------------
  // check the arguments

  if testargs() then
    if domtype(args(nargs-3)) <> DOM_EXPR then 
      error("expecting a model function of type DOM_EXPR")
    elif domtype(args(nargs-2)) <> DOM_LIST then
      error("expecting independent variables provided as a list")
    elif domtype(args(nargs-1)) <> DOM_LIST then
      error("expecting fit-parameters provided as a list")
    elif type(args(nargs)) <> "_equal" or
         op(args(nargs), 1)<> StartingValues or
         domtype(op(args(nargs), 2))<> DOM_LIST then
      error("expecting starting values for the fit-parameters provided as a list")
    end_if
  end_if;

  //----------------------------------------------------------------------
  // get the arguments

  fitf := args(nargs-3);// the model function
  xvar_:= args(nargs-2);// the independent variables
  param:= args(nargs-1);// the fit parameters
  a:= op(args(nargs),2);// starting values for the fit parameters 
  a:= map(a, float);
  nvars:= nops(xvar_);  // number of variables
  n:= nops(param);      // number of fit parameters

  //----------------------------------------------------------------------
  // further check of the arguments

  if testargs() then
     map(xvar_, proc(x)
            name stats::reg::checkVariables;
            begin 
              if domtype(x) <> DOM_IDENT and type(x) <> "_index" then
                 error("Expecting symbolic variables"):
              end_if;
            end_proc);
     map(param, proc(p)
            name stats::reg::checkParameters;
            begin 
              if domtype(p) <> DOM_IDENT and type(p) <> "_index" then
                 error("Expecting symbolic parameters"):
              end_if;
            end_proc);
     map(a, proc(a)
            name stats::reg::checkStartingValues;
            begin 
              if domtype(a) <> DOM_FLOAT and domtype(a) <> DOM_COMPLEX then
                 error("Starting values are not numerical"):
              end_if;
            end_proc);
   end_if;

  //----------------------------------------------------------------------
  // First try to get the data: use FALSE as first argument: Note that
  // an intentional error occurs if weights are provided.
  // This intentional error should not lead to abortion.

  llll:= stats::getdata(FALSE, "numeric_only", nvars+1, args(1..nargs-4));
  if domtype(llll) = DOM_STRING then  // an error occurred, because weights are provided
       llll:= stats::getdata(FALSE, "numeric_only", nvars+2, args(1..nargs-4));
       sigmay:= TRUE;
  else sigmay:= FALSE;
  end_if;
  if domtype(llll) = DOM_STRING then error(llll) end_if;
  
  N:= nops(llll[1]);      // the number of sample points

  //----------------------------------------------------------------------
  // First check, whether the equations for the parameters are degenerate.
  // This is definitely the case if N < n or if the model function does
  // not contain one of the parameters:

  if N < n then 
     error("need at least as many sample points as parameters") 
  end_if;
  for i from 1 to n do
    if iszero((diff(fitf, op(param, i))))
      then error("the model function does not depend on parameter ".expr2text(op(param,i)))
    end_if
  end_for;

  //----------------------------------------------------------------------
  // get the data and convert to floats

  for i from 1 to nvars do
    x[i]:= map(llll[i], float); // x[i] = list of sample points for the i-th variable
  end_for;
  y:= map(llll[nvars+1],float); // y = list of sample points for the dependent variable
  if sigmay then 
       w:= map(llll[nvars+2], float); // w = list of weights
  else w:= [float(1) $ N];
  end_if;

  //----------------------------------------------------------------------
  // Substitute the symbols xvar_ and param provided by the user
  // by our own names xvar and p, respectively:

  xvar:= genident("X");
  p:= genident("p");
  fitf:= subs(float(fitf), [xvar_[i]=xvar[i] $i=1..nvars, op(param, i)=p[i] $i=1..n]);

  //---------------------------------------------------------
  // calculate all partial derivatives fitderiv[i] of function to fit.

  fitderiv:= [diff(fitf, p[i]) $ i=1..n];

  //---------------------------------------------------------
  // Check, whether the regression is linear, 
  // i.e., whether fitf is linear in the parameters,
  // i.e., whether fitderiv does not contain p

  if has(fitderiv, p) then
       linearRegression:= FALSE;
  else linearRegression:= TRUE;
  end_if;

  //---------------------------------------------------------
  // special implementation for linear regression. Need to
  // solve a linear system of equations only *once*. Avoid the
  // iterative process for non-linear systems below:

  if linearRegression then
     userinfo(1, "using special routine for linear regression");
     bet:= [0 $ n];
     for i from 1 to N do
         dummy:=  subs(fitderiv, [xvar[m]=x[m][i] $m=1..nvars],EvalChanges);
         dummy2:= subs(fitf,     [xvar[m]=x[m][i] $m=1..nvars],EvalChanges);
         dummy2:= w[i]*(y[i] - dummy2);
         for k from 1 to n do
           bet[k]:= bet[k] + dummy2*dummy[k];
         end_for;
     end_for;
     a:=  numeric::linsolve(bet, [p[i] $ i=1..n]);
     if a = FAIL or  // matrix not invertible
        nops(a) <> n // rank of matrix was not maximal
        then
        return(FAIL)
     end_if;

     // compute quadratic error
     // Substitute optimized parameters into model function
     fitf:= subs(fitf, a, EvalChanges);
     chis:= float(0);
     for i from 1 to N do
       // substitute sample data into model function
       dummy2:= subs(fitf, [xvar[m]=x[m][i] $m=1..nvars],EvalChanges);
       chis:= chis + float(w[i]*stats::reg_lazy_abs(y[i] - dummy2)^2);
     end_for;

     // convert [p[1]= value1, p[2]= value2, ..] to [value1, value2, ..]:
     a:= map(a, op, 2);

     if has(chis, hold(stats::reg_lazy_abs)) then
        chis:= subs(chis, hold(stats::reg_lazy_abs) = abs);
     end_if;
     //----------------------------
     // return
     //----------------------------
     if withCovariance then
        fitderiv:= subs(fitderiv, [p[i] = a[i] $ i=1..n], EvalChanges);
        S2:= array(1..n, 1..n):
        for i from 1 to n do
          for j from i to n do 
            S2[i,j]:= _plus(w[k]*subs(fitderiv[i]*fitderiv[j], [xvar[m]=x[m][k] $ m=1..nvars]) $ k=1..N);
            S2[j,i]:= S2[i,j];
          end_for:
        end_for:
        S2:= numeric::inverse(S2, NoWarning, ReturnType = Dom::/*Dense*/Matrix()):
        return([a, chis, S2]);
     else
        return([a, chis]);
     end_if;
          //-------------------------
  end_if: // end of linear regression
          //-------------------------
  //---------------------------------------------------------
  // Now do the non-linear case:
  //---------------------------------------------------------
  // calculate symbolic matrix al[i,j] 
  // and symbolic gradient bet[i,j] 
  userinfo(1, "entering the Levenberg-Marquadt algorithm"):

  bet:= array(1..n, [0 $ n]):
  al:= array(1..n, 1..n, [[0 $ n] $ n]):
  for i from 1 to N do
    dummy:=  subs(fitderiv, [xvar[m]=x[m][i] $m=1..nvars], EvalChanges);
    dummy2:= subs(fitf, [xvar[m]=x[m][i] $m=1..nvars], EvalChanges);
    dummy2:= w[i]*(y[i] - dummy2);
    for k from 1 to n do
      bet[k]:= bet[k] + dummy2*dummy[k];
      tmp:= w[i]*dummy[k];
      for j from 1 to n do
        al[j, k]:= al[j, k] + tmp*dummy[j];
      end_for
    end_for
  end_for;

  //---------------------------------------------------------
  // calculate symbolic chisquare chis 
  //---------------------------------------------------------
  chis:= 0;
  for i from 1 to N do
    // Substitute sample data into model function.
    // (the parameters remain symbolically)
    dummy2:= subs(fitf, [xvar[m]=x[m][i] $m=1..nvars],EvalChanges);
    chis:= chis + w[i]*stats::reg_lazy_abs(y[i] - dummy2)^2;
  end_for;

  //---------------------------------------------------------
  // calculate initial chisquare(a) 
  //---------------------------------------------------------
  chisqu_old:= float(subs( chis, [p[i]=a[i] $i=1..n],EvalChanges));
  if domtype(chisqu_old) <> DOM_FLOAT then
     error("cannot evaluate the data numerically. ".
           "Are there symbolic data?");
  end_if;
  if chisqu_old < 0 then 
     // return(FAIL) // illegal starting parameters 
     error("illegal starting values for the fit parameters");
  end_if;

  //---------------------------------------------------------
  // Do the numerical search:
  //---------------------------------------------------------
  macheps:= 10.0^(2-DIGITS):
  max_out:= 1000;
  max_in:=    30;
  lambda:= 0.001;

  //---------------------------------------------------------------
  // begin outer loop (searching the fit parameter space)
  //---------------------------------------------------------------
  conv:= FALSE;   // not yet converged
  counter_out:= 0;
  while ((not conv) and (counter_out < max_out)) do
    counter_out:= counter_out+1;
    // calculate alpha and betav 
    alpha:= subs(al, [p[i]=a[i] $i=1..n], EvalChanges);
    alpha:= map(alpha, float);
    betav:= subs(bet, [p[i]=a[i] $i=1..n], EvalChanges);
    betav:= map(betav, float);

    //---------------------------------------------------------------
    // begin inner loop (computing step in parameter space (deltaa);
    // search for suitable lambda such that chisqu decreases)
    //---------------------------------------------------------------
    counter_in:= 0;
    while TRUE do
      counter_in:= counter_in+1;
      if counter_in > max_in then 
        userinfo(1, "no convergence in inner loop!");
        return(FAIL)
      end_if;

      // alphal(lambda) 
      alphal:= alpha;
      tmp:= 1 + lambda;
      for i from 1 to n do
        alphal[i,i]:= tmp*alphal[i,i]
      end_for;

      // solve alphal*da = betav for da
      [deltaa, kern]:= numeric::matlinsolve(alphal, betav);
      if deltaa = FAIL or kern <> 0 then
        userinfo(1, "the curvature matrix is singular");
        // increase the relaxation parameter
        lambda:= 10 * lambda;
        // try again with larger lambda
        next;
      end_if;

      // calculate chisquare(a+deltaa) 
      chisqu:= subs(chis, [p[i]=a[i] + deltaa[i, 1] $i=1..n], EvalChanges);

      if chisqu > chisqu_old then 
         // try again with increased relaxation parameter
         lambda:= 10 * lambda;
      else break;
      end_if;
    end_while;
    //---------------------------------------------------------------
    // end inner loop
    //---------------------------------------------------------------

    // check convergence 
    conv:= TRUE;
    for i from 1 to n do
      if specfunc::abs(deltaa[i, 1]) > macheps*(1+specfunc::abs(a[i])) then 
        conv:= FALSE;
        break;
      end_if
    end_for;

    lambda:= lambda/100;
    chisqu_old:= chisqu;
    for i from 1 to n do
      a[i]:= a[i] + deltaa[i, 1]
    end_for;
  end_while;
  //---------------------------------------------------------------
  // end outer loop
  //---------------------------------------------------------------

  if counter_out = max_out then 
    userinfo(1, "poor convergence!");
    return(FAIL);
  end_if;
  if chisqu_old < 0 then return(FAIL) end_if;
  if withCovariance then
     // compute the matrix
     // S2[i, j]:= diff(S, p[i], p[j])/2,
     // where S = chisquare = sum(w[k]*(f(x[k], p) - y[k])^2, $k=1..N):
     //   S2[i,j]:= sum(w[k]*(diff(f(x[k], p), p[i]) * diff(f(x[k], p), p[j])
     //                       + f(x[k],p) - y[k])* diff(f(x[k], p), p[i], p[j])
     //                 $ k=1..N)
     // The inverse of S2 is the desired covariance matrix
     // of the parameters: inverse(S2)[i,j] = cov(p[i], p[j]):

     // replace fit parameters p[i] in fitf by their optimized values a[i]
     S0 := subs(fitf, [p[i] = a[i] $ i=1..n],EvalChanges); 

     // replace fit parameters p[i] in fitderiv by their optimized values a[i]
     S1 := subs(fitderiv, [p[i] = a[i] $ i=1..n],EvalChanges); 

     // Compute S2 = diff(f,p[i], p[j] and replace fit parameters p[i] 
     // by their optimized values a[i]
     // S2 = diff(f(x, p), p[i], p[j])/2);
     S2:= array(1..n, 1..n):
     for i from 1 to n do
      for j from i to n do
        // compute second derivative of the mode function
        S2[i, j]:= diff(fitderiv[i], p[j]):
        // replace fit parameters p[i] by their optimized values a[i]
        S2[i, j]:= subs(S2[i,j], [p[k]=a[k] $ k=1..n], EvalChanges);
      end_for;
     end_for;

     // insert the measurements and add up. Overwrite S2:
     for i from 1 to n do
      for j from i to n do 
        S2[i, j]:= _plus(w[k]*(eval(
                        subs( (S0 - y[k])*S2[i,j] + S1[i]*S1[j],
                              [xvar[m]=x[m][k] $m=1..nvars])
                         )) $ k=1..N);
         S2[j, i]:= S2[i, j]:
      end_for:
     end_for:
     S2:= numeric::inverse(S2, NoWarning, ReturnType = Dom::/*Dense*/Matrix()):
     return([a, chisqu_old, S2]);
  else
     return([a, chisqu_old]);
  end_if;
end_proc:

// end of file 
