//       

/*----------------------------------------------------------
Bravais-Pearson coefficient of correlation

Calls:  BPCorr([x1, x2, ..], [y1, y2, ..]) 
        BPCorr([[x1, y1], [x2, y2], ..]) 
        BPCorr(s <, c1, c2>)
        BPCorr(s <, [c1, c2]>)
        BPCorr(s1 <, c1>, s2 <, c2>)

Parameters:
    x.i, y.i  -  data: arithmetical expressions
    s, s1, s2 -  stats::sample
    c1, c2    -  data-column indices: positive integers
                 (optional if sample contains only 2 data-columns)

Details:   correlation(data1, data2)
         =  covariance(data1, data2)/ stdev(data1) / stdev(data2)
----------------------------------------------------------*/

stats::BPCorr := proc()
local a, twoSamples, i, data1, data2,
      n, usefloats, X1, X2, v1, v2;
begin
  if args(0) < 1 then 
     error("expecting at least one argument");
  end_if;

  // are there 2 stats::sample objects?
  a:= 1:
  twoSamples:= FALSE;
  if domtype(args(1)) = stats::sample then
     for i from 2 to args(0) do
         if domtype(args(i)) = stats::sample then
            twoSamples:= TRUE;
            break;
         end_if;
         a:= i: // count the arguments refering
                // to the first stats::sample
    end_for:
  end_if;

  if twoSamples then
     data1:= stats::getdata(testargs(), "all_data", 1, args(1..a)):
     if testargs() then
       if domtype(data1) = DOM_STRING then error(data1) end_if;
     end_if;
     data2:= stats::getdata(testargs(), "all_data", 1, args(a+1..args(0))):
     if testargs() then
       if domtype(data2) = DOM_STRING then error(data2) end_if;
     end_if;
  else
     data1:= [stats::getdata(testargs(), "all_data", 2, args())];
     // a string returned by stats::getdata indicates an error:
     if testargs() then
       if domtype(data1[1]) = DOM_STRING then error(data1[1]) end_if
     end_if;
     // data1 is a list of 2 lists [1st column, 2nd column]
     // Split into separate lists:
     [data1, data2] := data1:
  end_if;

  //-----------------------------------------
  // data1 = [x1, x2, ..] = 1st data column,
  // data2 = [y1, y2, ..] = 2nd data column.
  //-----------------------------------------

  n:= nops(data1); // = nops(data2):
  if n = 0 then
     return(FAIL); // would produce division by zero 
  end_if;

  usefloats:= FALSE:
  X1:= _plus(op(data1))/n; // = mean of 1st column
  if domtype(X1) = DOM_FLOAT then
     usefloats:= TRUE
  end_if;
  X2:= _plus(op(data2))/n; // = mean of 2nd column
  if domtype(X2) = DOM_FLOAT then
     usefloats:= TRUE
  end_if;

  if usefloats then
     // The floating point part: 
     // Do not use sum(x.i^2, i = 1..n) - n*X^2, because this
     // might be numerically unstable. Instead, use
     // sum((x.i - X)^2, i = 1..n)
     data1:= map(data1, _subtract, X1): // = [x1 - X1, x2 - X1, ..] 
     data2:= map(data2, _subtract, X2): // = [y1 - X2, y2 - X2, ..]
     v1:= _plus(op(map(data1, _power, 2))): // v1/n = variance of [x1, x2, ..]
     v2:= _plus(op(map(data2, _power, 2))): // v2/n = variance of [y1, y2, ..]
     v1:= sqrt(v1 * v2);
     if iszero(v1) then
        return(FAIL); // division by zero 
     end_if;
     return(_plus(op(zip(data1, data2, _mult)))/v1);
  else 
     // The symbolic part: use sum(x.i^2, i = 1..n) - n*X^2
     // instead of sum((x.i - X)^2, i = 1..n), because the
     // result tends to be simpler
     v1:= _plus(op(map(data1, _power, 2))) - n*X1^2: // v1/n = variance
     v2:= _plus(op(map(data2, _power, 2))) - n*X2^2: // v2/n = variance
     v1:= sqrt(v1 * v2);
     if iszero(v1) then
        return(FAIL); // division by zero 
     end_if;
     return((_plus(op(zip(data1, data2, _mult))) - n*X1*X2)/v1);
  end_if;
end_proc:

// end of file 
