/* -----------------------------------------------------
Kolmogorov-Smirnov test: 
  stats::ksGOFT tests the null hypothesis 
  'the data is a sample of independent f-distributed deviates'

Call: stats::ksGOFT(data, CDF = f)
      stats::ksGOFT(s, c, CDF = f)

Parameters:
  data - is a sequence or list of n sample data
    f  - some cumulative distribution function,
         e.g., f = stats::normalCDF(0, 1)
    s  - a stats::sample
    c  - a column index (use the c-th column of s as data)
  CDF  - the protected name CDF

RETURN: the list [K1, p1, K2, p2]

Details:
  K1, K2: 
     the Kolmogorov - Smirnov statistics
         K1 = sqrt(n)*max(j/n-f(x.j) $ j = 1 .. n),
         K2 = sqrt(n)*max(f(x.j)-(j-1)/n $ j = 1 .. n)
     (the x.j here are the elements of sort(data).
  p1, p2: the significance levels of K1 and K2

NB: At the end of this file, you find some code
    for checking the accuracy of the approximations
    used in this implementation of the Kolmogorov-Smirnov
    test.
------------------------------------------------ */

stats::ksGOFT:= proc()
local data, f, j, k1, k2, s, n, fsqrtn, p1, p2;
begin
  if args(0)< 2 then
     error("expecting at least two arguments")
  end_if:

 /* ----------------------- 
 // a check of the data is done below
 if testargs() then
    //stats::testdata tests whether the data of the following types:
    //integers, rationals, floats, expressions, identifiers, complex numbers
     if stats::testdata("all_data", args(1..args(0) - 1)) = FALSE then
        error("some data are of illegal type")
     end_if:
  end_if:
  ------------------------ */
  data:= stats::getdata(testargs(), "numeric_only", 1, args(1..args(0) - 1)):
  if domtype(data) = DOM_STRING then
     error(data)
  end_if:

  // now, the data are a list 

  if data = [] then
     error("expecting a non-empty sample of data")
  end_if:

  //check the last argument:

  f:=  args(args(0)): 
  if type(f) <> "_equal" then
     error("expecting an equation 'CDF = procedure' as last argument"):
  end_if;
  if op(f, 1) <> CDF then
     error("expecting an equation 'CDF = procedure' as last argument"):
  end_if:

  f:= op(f, 2):

  if {domtype(f)} minus {DOM_PROC, DOM_FUNC_ENV} <> {} then
     error("the cumulative distribution function must be specified ".
           "by a procedure");
  end_if:

  data:= map(data, float):

  // need do double check, because stats::testdata 
  // allowed complex values
  if map({op(data)}, testtype, DOM_FLOAT) <> {TRUE} then
     error("some data could not be converted to real floats");
  end_if:

  data:= sort(data):
  n:= nops(data);
  fsqrtn:= float(n)^(1/2):

  // apply the distribution function to the data, i.e.,
  //    data -> data:= probability values(data)
  // Apply another float to the probability values
  // in case f does not evaluate to a float automatically.

  data:= map(data, float@f);
  if map({op(data)}, testtype, DOM_FLOAT) <> {TRUE} then
     error("for some data, the cumulative distribution function ".
           "did not produce a numerical value");
  end_if:

  //---------------------------------------------------------
  //compute the Kolmogorov-Smirnov statistics:
  //---------------------------------------------------------
  // upper tail
  //---------------------------------------------------------
  s:= [j/n - data[j] $ j=1..n]:  // -(n-1)/2 < s < (n+1)/2
  k1:= fsqrtn*max(op(s)):
  //---------------------------------------------------------
  // lower tail
  //---------------------------------------------------------
  s:= [data[j] - (j-1)/n $ j=1..n]: // -(n-1)/2 < s < (n+1)/2
  k2:= fsqrtn*max(op(s)):

  //---------------------------------------------------------
  // compute percentage points for k1 und k2:
  // Let K1 = sqrt(n)*max(  j/n  - data[j] $ j=1..n),
  //     K2 = sqrt(n)*max(data[j] -(j-1)/n $ j=1..n).
  // The exact distribution for both K.i is given by:
  //   probability(K.i <= t) =
  //   1 - 
  //   t/n^(n-1/2)* _plus(binomial(n,k)*(k-t*sqrt(n))^k*(sqrt(n)*t+n-k)^(n-k-1) 
  //                          $ k = floor(sqrt(n)*t) + 1 .. n)
  // For large n, the following asymptotic formula is valid:
  //  p1 = probability(K1 > k1) = see below
  //  p2 = probability(K2 > k2) = see below
  // Note, however, that the approximations may produce
  // 'probability values' < 0 or > 1, which may be suprising
  // to the user. So, clip the values to the interval [0, 1]
  // The under/overshooting p < 0 or p > 1 is tiny, so this
  // does not make any difference in practice!
  //---------------------------------------------------------

  p1:= exp(-2*k1^2)*(1 - 2/3*k1/fsqrtn - (4/9*k1^4-2/3*k1^2)/n):
  if p1 < 0 then p1 := float(0) end_if;
  if p1 > 1 then p1 := float(1) end_if;

  p2:= exp(-2*k2^2)*(1 - 2/3*k2/fsqrtn - (4/9*k2^4-2/3*k2^2)/n):
  if p2 < 0 then p2 := float(0) end_if;
  if p2 > 1 then p2 := float(1) end_if;

  //-----------
  // the result
  //-----------
  [PValue1 = p1, StatValue1 = k1, PValue2 = p2, StatValue2 = k2];
end_proc:

/* -----------------------------------------------------
Utility for checking the precision of the approximation
used in ksGOFT:

exact:= (n, t) -> (( 
    fn:= float(n);
     1 - t/fn^(fn-1/2)*
         _plus(binomial(fn, k)*(k-t*sqrt(fn))^k*(sqrt(fn)*t+fn-k)^(fn-k-1) 
           $ k = floor(sqrt(fn)*t) + 1 .. n)
)):

approx:= (n, t) -> ((
  fn:= float(n):
  1 - exp(-2*t^2)*(1 - 2/3*t/fn^(1/2) - (4/9*t^4-2/3*t^2)/fn):
)):


print(n = 3);
exact(3, 0.10) - approx(3, 0.10),
exact(3, 0.50) - approx(3, 0.50),
exact(3, 0.90) - approx(3, 0.90),
exact(3, 0.95) - approx(3, 0.95),
exact(3, 0.99) - approx(3, 0.99),
exact(3, 0.9999999) - approx(3, 0.9999999);

print(n = 10^1);
exact(10^1, 0.10) - approx(10^1, 0.10),
exact(10^1, 0.50) - approx(10^1, 0.50),
exact(10^1, 0.90) - approx(10^1, 0.90),
exact(10^1, 0.95) - approx(10^1, 0.95),
exact(10^1, 0.99) - approx(10^1, 0.99);

print(n = 10^2);
exact(10^2, 0.10) - approx(10^2, 0.10),
exact(10^2, 0.50) - approx(10^2, 0.50),
exact(10^2, 0.90) - approx(10^2, 0.90),
exact(10^2, 0.95) - approx(10^2, 0.95),
exact(10^2, 0.99) - approx(10^2, 0.99);

print(n = 10^3);
exact(10^3, 0.10) - approx(10^3, 0.10),
exact(10^3, 0.50) - approx(10^3, 0.50),
exact(10^3, 0.90) - approx(10^3, 0.90),
exact(10^3, 0.95) - approx(10^3, 0.95),
exact(10^3, 0.99) - approx(10^3, 0.99);

print(n = 10^4);
exact(10^4, 0.10) - approx(10^4, 0.10),
exact(10^4, 0.50) - approx(10^4, 0.50),
exact(10^4, 0.90) - approx(10^4, 0.90),
exact(10^4, 0.95) - approx(10^4, 0.95),
exact(10^4, 0.99) - approx(10^4, 0.99);
-----------------------------------------------*/

