//       
// W. Oevel, 5.9.02: *) syntax extended (now, weigths are accepted).
//                   *) new return value [[a, b], chisquare] instead of [a, b]
//                   *) new option 'CovarianceMatrix'

/*--

Linear regression. Returns least square estimators a and b for
the model function y = f(x) = a+b*x for a data set (x[i], y[i])
together with the chi-square deviation sum(w[i]*(y[i] - a - b*x[i])^2)
and (optionally) the covariance matrix of the estimators a, b.

Calling Syntax:

   linreg([[x1,y1 <,w1>],[x2,y2 <,w2>], ..., <CovarianceMatrix>)
   linreg([x1, x2, ..], [y1, y2, ..], <[w1, w2, ..]>, <CovarianceMatrix>)
   linreg(s, cx, cy, <,cw>, <CovarianceMatrix>)
   linreg(s, [cx, cy, <,cw>], <CovarianceMatrix>)

Parameters:
   x[i], y[i] : data to fit: arithmetical expressions
   w[i]       : weights: arithmetical expressions
   s          : stats::sample
   cx, cy, cw : column indices providing the data x[i], y[i]
                and weights w[i], respectively
               (optional if sample contains only 2 data-columns)

ReturnValue
   linreg(data, weights) returns 
                   [[a, b], chisquare]
   linreg(data,, weights CovarianceMatrix) returns 
                   [[a, b], chisquare, covmatrix]
   where covmatrix is a Dom::/*Dense*/Matrix().

Details:
   Ref: Ph. R. Bevington + D.K. Robinson, 
        Data Reduction and Error Analysis for The Physical Sciences
   (or see: nedwww.ipac.caltech.edu/level5/Leo/Stats7_2.html)
   Let S = chisquare = sum( w[i]*(y[i] - (a + b*x[i]))^2, i=1..n ).
   Minimizing S one finds the least squares estimates
      a = (D*C - E*A)/(D*W - A^2)
      b = (E*W - C*A)/(D*W - A^2)
   for the parameters a and b, where
     A = sum(w[i]*x[i], i=1..n),
     W = sum(w[i], i=1..n)
     C = sum(w[i]*y[i], i=1..n),
     D = sum(w[i]*x[i]^2,    i=1..n),
     E = sum(w[i]*x[i]*y[i], i=1..n),
     F = sum(w[i]*y[i]^2,    i=1..n),
     Z = W*D - A^2 
     xx = sum(w[i]*x[i])/sum(w[i]) = A/W
     z = Z/n = sum(w[i]*(x - xx)^2, i=1..n),
   i.e.,
      a = (D*C - E*A)/ Z
      b = (E*n - C*A)/ Z
   i.e.,
      a = (D*yy*n - E*xx*n)/ Z
      b = (E*n - xx*yy*n^2)/ Z

   The covariance matrix of the estimators a,b is
        ( sigma(a)^2    cov(a, b)  ) = ( D/Z  ,-A/Z)
        ( cov(a, b)     sigma(b)^2 )   (-A/Z    W/Z)
--*/

stats::linReg := proc()
local withCovariance, withWeights,
      data, w, W, n, i,
      x, y, xx, yy, xxx, yyy, z, 
      a, b, chisquare;
begin
  if args(0) = 0 then 
     error("expecting data");
  end_if;
  if args(args(0)) = CovarianceMatrix then
    withCovariance:= TRUE;
    data:= stats::getdata(FALSE, "all_data", 2, args(1..args(0) - 1));
    if domtype(data) = DOM_STRING then  // an error occurred, because weights are provided
         data:= stats::getdata(FALSE, "all_data", 3, args(1..args(0) - 1));
         withWeights:= TRUE;
    else withWeights:= FALSE;
    end_if;
    // data:=stats::getdata(testargs(), "all_data", 2, args(1..args(0) - 1));
  else
    withCovariance:= FALSE;
    data:= stats::getdata(FALSE, "all_data", 2, args());
    if domtype(data) = DOM_STRING then  // an error occurred, because weights are provided
         data:= stats::getdata(FALSE, "all_data", 3, args());
         withWeights:= TRUE;
    else withWeights:= FALSE;
    end_if;
    // data:=stats::getdata(testargs(), "all_data", 2, args());
  end_if;
  if testargs() then
    if domtype(data) = DOM_STRING then error(data) end_if
  end_if;

  //------------------
  // start to compute:
  //------------------
  if op(data, 1) = [] then
    return(FAIL); // division by zero 
  end_if;

  if withWeights then
    [x, y, w]:= [data]; // x = [x1,x2,..], y = [y1,y2,..], w = [w1, w2, ..]
    n:= nops(x);
    if nops(y) <> n then
       error("data mismatch: expecting as many data for the dependent variable ".
             "as for the independent variable");
    end_if;
    if nops(w) <> n then
       error("data mismatch: expecting as many weights as data for the independent variable");
    end_if;
    // -----------------------------
    // The general case with weights.
    // -----------------------------
    W:= _plus(op(w)):  // W = w[1] + w[2] + ...
    if iszero(W) then return(FAIL) end_if;
    xx:= _plus(w[i]*x[i] $ i=1..n)/W; // mean of x values
    yy:= _plus(w[i]*y[i] $ i=1..n)/W; // mean of y values
    xxx:= map(x, _subtract, xx); // xxx = [x1-xx,x2-xx,..]
    yyy:= map(y, _subtract, yy); // yyy = [y1-yy,y2-yy,..]
    z:= _plus(w[i]*xxx[i]^2 $ i=1..n); // z = w1*(x1-xx)^2 + w2*(x2-xx)^2 + ...
    if iszero(z) then return(FAIL) end_if;
    a:= _plus(w[i]*xxx[i]*(yy*xxx[i]-xx*yyy[i]) $ i=1..n) / z ;
    b:= _plus(w[i]*xxx[i]*yyy[i] $ i=1..n) / z;
    chisquare:= _plus( w[i]*(y[i] - a - b*x[i])^2 $ i=1..n);
    if withCovariance then
       // The following could be tuned for speed because
       // _plus(w[i]*x[i]^2 $ i=1..n) and _plus(w[i]*x[i]*y[i] $ i=1..n)
       // are computed twice. However, the following representation
       // of the estimator for a is numerically more stable, so it
       // seems worthwhile to use
       //      _plus(w[i]*xxx[i]*(yy*xxx[i]-xx*yyy[i]))
       // instead of
       //      yy*_plus(w[i]*xxx[i]^2) - xx*plus(w[i]*xxx[i]*yyy[i]))
       //
       return([[a, b], chisquare,
               /*dense*/matrix([[_plus(w[i]*x[i]^2 $ i=1..n)/W/z, -xx/z],
                                [-xx/z, 1/z]
                               ])
              ]);
    else
       return([[a, b], chisquare]):
    end_if;
 else
    // ---------------------------------------------------
    // The special case without weights.
    // Repeat the code above for trivial weights w[i] = 1;
    // avoid unnecessary multiplications with w[i]:
    // ---------------------------------------------------
    [x, y]:= [data]; // x = [x1,x2,..], y = [y1,y2,..]
    n:= nops(x); // = nops(y)
    xx:= _plus(op(x))/n; // mean of x values
    yy:= _plus(op(y))/n; // mean of y values
    xxx:= map(x, _subtract, xx); // xxx = [x1-xx,x2-xx,..]
    yyy:= map(y, _subtract, yy); // yyy = [y1-yy,y2-yy,..]
    z:= _plus(op(map(xxx, x->x^2))); // z = (x1-xx)^2 + (x2-xx)^2 + ...
    if iszero(z) then return(FAIL) end_if;
    a:= _plus(xxx[i]*(yy*xxx[i]-xx*yyy[i]) $ i=1..n) / z ;
    b:= _plus(xxx[i]*yyy[i] $ i=1..n) / z;
    chisquare:= _plus( (y[i] - a - b*x[i])^2 $ i=1..n);
    if withCovariance then
       // The following could be tuned for speed because
       // _plus(x[i]^2 $ i=1..n) and _plus(x[i]*y[i] $ i=1..n)
       // are computed twice. However, the following representation
       // of the estimator for a is numerically more stable, so it
       // seems worthwhile to use
       //      _plus(xxx[i]*(yy*xxx[i]-xx*yyy[i]))
       // instead of
       //      yy*_plus(xxx[i]^2) - xx*plus(xxx[i]*yyy[i]))
       //
       return([[a, b], chisquare,
               /*dense*/matrix([[_plus(x[i]^2 $ i=1..n)/n/z, -xx/z],
                                [-xx/z, 1/z]
                               ])
              ]);
    else
       return([[a, b], chisquare]):
    end_if;
  end_if;
end_proc:

// end of file 
