//this procedure returns the covariance
// stats::covariance(x1, .., x.n, y1, .., y.n)=
//1/(n - 1)*(sum((x_i-x)*(y_i-y), i=1..n)-x*y
// where x is the mean of the data x1, .., x.n and
//y is mean of the data y1, .., y.n

/*----------------------------------------------------------
Calls:  stats::covariance([x1, x2, ..], [y1, y2, ..], <mode>)
        stats::covariance([[x1, y1], [x2, y2], ..], <mode>)
        stats::covariance(s, c1, c2, <mode>)
        stats::covariance(s, [c1, c2], <mode>)
        stats::covariance(s1 <, c1>, s2 <, c2>, <mode>)

Parameters:
    x.i, y.i  --  data: arithmetical expression
    s, s1, s2 --  stats::sample objects
    c1, c2    --  data-column indices
                  (optional if sample contains only 2 data-columns)
    mode      --  either Sample or Population. The
                  default is Sample.

----------------------------------------------------------*/

stats::covariance:=proc()
local a, b, smple, twoSamples, i, data1, data2, n, usefloats, X1, X2;
begin
  if args(0) < 1 then
     error("expecting at least one argument");
  end_if;

  // check arguments for the option Sample/Population
  case args(args(0)) 
  of Sample do
       smple:= 1:
       b:= args(0) - 1; // args(a + 1) .. args(b) are related to the 2nd sample
       break;
  of Population do
       smple:= 0:
       b:= args(0) - 1;
       break;
  otherwise
       smple:= 1:  // default is Sample
       b:= args(0);
       break;
  end_case;

  // Are there 2 stats::sample objects?
  a:= 1:  // args(0) .. args(a) are related to the 1st sample
          // args(a + 1) .. args(b) are related to the 2nd sample
  twoSamples:= FALSE;
  if domtype(args(1)) = stats::sample then
     for i from 2 to b do
       if domtype(args(i)) = stats::sample then
          twoSamples:= TRUE;
          a:= i - 1;
          break;
      end_if;
     end_for:
  end_if;

  if twoSamples then
     data1:= stats::getdata(testargs(), "all_data", 1, args(1..a)):
       // a string returned by stats::getdata indicates an error:
       if domtype(data1) = DOM_STRING then error(data1) end_if;
     data2:= stats::getdata(testargs(), "all_data", 1, args(a+1..b)):
     if domtype(data2) = DOM_STRING then error(data2) end_if;
  else
     data1:= [stats::getdata(testargs(), "all_data", 2, args(1..b))];
     if domtype(data1[1]) = DOM_STRING then error(data1[1]) end_if;
     // data1 is a list of 2 lists [1st column, 2nd column]
     // Split into separate lists:
     [data1, data2] := data1:
  end_if;

  //-----------------------------------------
  // data1 = [x1, x2, ..] = 1st data column,
  // data2 = [y1, y2, ..] = 2nd data column.
  //-----------------------------------------

  n:= nops(data1); // = nops(data2):
  if iszero(n - smple) then
     error("not enough data in the samples");
  end_if;
  usefloats:= FALSE:
  X1:= _plus(op(data1))/n; // = mean of 1st column
  if domtype(X1) = DOM_FLOAT then
     usefloats:= TRUE
  end_if;
  X2:= _plus(op(data2))/n; // = mean of 2nd column
  if domtype(X2) = DOM_FLOAT then
     usefloats:= TRUE
  end_if;

  if usefloats then
     // The floating point part:
     // Do not use sum(x.i*y.i, i = 1..n)/n - X*Y, because this
     // might be numerically unstable. Instead, use
     // sum((x.i - X1)*(y.i - X2), i = 1..n)/n;
     data1:= map(data1, _subtract, X1): // = [x1 - X1, x2 - X1, ..]
     data2:= map(data2, _subtract, X2): // = [y1 - X2, y2 - X2, ..]
     return(_plus(op(zip(data1, data2, _mult)))/(n - smple));
  else
     // The symbolic part: use sum(x.i*y.i, i = 1..n)/n - X1*X2
     // instead of sum((x.i - X1)*(y.i - X2), i = 1..n), because the
     // result tends to be simpler
     return(_plus(op(zip(data1, data2, _mult)))/(n - smple) - n/(n - smple)*X1*X2);
  end_if;
end_proc:
