/* -------------------------------------------------------------------------
Stephan Huckemann Nov.5.01
Quantile function for the

hypergeometric cumulative probability for at least x successes in a sample of size n where 
population size is N with X successes in entire population

let a = max(0, n+X-N) and b = min(n,X), then

P(t <= k) = Sum(i= a ... [k]) of { binomial(X, i)* binomial(N-X, n-i) / binomial(N, n)}
          = 1 - Sum(i = [k]+1 ... b) of { binomial(X, i)* binomial(N-X, n-i) / binomial(N, n)}

Q(x) = min{k integer >= 0 : P(k) >= x}

-------------------------------------------------------------------------*/

stats::hypergeometricQuantile:=proc(N, X, n)

local fN, fX, fn;

option escape;
begin

  if args(0)<>3 then
     error("expecting three arguments")
  end_if:

  // ------------- check N -------------
  fN := float(N);
  if domtype(N) = DOM_INT and N < 1 then
     error("the 'population size' must be symbolic or a positive integer"):
  end_if;
  if domtype(fN) = DOM_FLOAT and domtype(N) <> DOM_INT then
     error("the 'population size' must be symbolic or a positive integer"):
  end_if;
  if domtype(fN) = DOM_COMPLEX then
     error("the 'population size' must be real"):
  end_if;
  
 // ------------- check X -------------
  fX := float(X);
  if domtype(X) = DOM_INT then
     if X < 0 or (domtype(N)= DOM_INT and X > N) then
        error("the 'success population size' must be symbolic or an integer".
              " between 0 and 'population size'"):
     end_if;
  end_if;
  if domtype(fX) = DOM_FLOAT and domtype(X) <> DOM_INT then
     error("the 'success population size' must be symbolic or an integer".
           " between 0 and 'population size'"):  
  end_if;
  if domtype(fX) = DOM_COMPLEX then
     error("the 'success population size' must be real"):
  end_if;
 
 // ------------- check n -------------
  fn := float(n);
  if domtype(n) = DOM_INT then
     if n < 0 or (domtype(N) = DOM_INT and n > N) then
          error("the 'sample size' must be symbolic or an integer".
          " between 0 and 'population size'"):
     end_if;
  end_if;
  if domtype(fn) = DOM_FLOAT and domtype(n) <> DOM_INT then
     error("the 'sample size' must be symbolic or an integer".
     " between 0 and 'population size'"):
  end_if;
  if domtype(fn) = DOM_COMPLEX then
     error("the 'sample size' must be real"):
  end_if;
 

  //-------------------------------
  // return the following procedure
  //-------------------------------
  proc(x)
  local NN, XX, nn, fNN, fXX, fnn, fx, a, b, c, tmp, tmpsum, k;
  begin
    if args(0)<>1 then
       error("expecting one argument")
    end_if:

    // double check the parameters N, X and n. They
    // might have changed since this procedure was
    // created:

    // ------------- check N -------------
    NN  := context(N):
    fNN := float(NN):
    if domtype(NN) = DOM_INT and NN < 1 then
     error("the 'population size' must be symbolic or a positive integer"):
    end_if;
    if domtype(fNN) = DOM_FLOAT and domtype(NN) <> DOM_INT then
     error("the 'population size' must be symbolic or a positive integer"):
    end_if;
    if domtype(fNN) = DOM_COMPLEX then
     error("the 'population size' must be real"):
    end_if;

    // ------------- check X -------------
    XX := context(X):
    fXX := float(XX):
    if domtype(XX) = DOM_INT then
     if XX < 0 or (domtype(NN)= DOM_INT and XX > NN) then
          error("the 'success population size' must be symbolic or an integer".
          " between 0 and 'population size'"):
     end_if;
    end_if;
    if domtype(fXX) = DOM_FLOAT and domtype(XX) <> DOM_INT then
     error("the 'success population size' must be symbolic or an integer".
     " between 0 and 'population size'"):
    end_if;
    if domtype(fXX) = DOM_COMPLEX then
     error("the 'success population size' must be real"):
    end_if;

    // ------------- check n -------------
    nn:= context(n):
    fnn := float(nn):
    if domtype(nn) = DOM_INT then
     if nn < 0 or (domtype(NN) = DOM_INT and nn > NN) then
          error("the 'sample size' must be symbolic or an integer".
          " between 0 and 'population size'"):
     end_if
    end_if;
    if domtype(fnn) = DOM_FLOAT and domtype(nn) <> DOM_INT then
     error("the 'sample size' must be symbolic or an integer".
     " between 0 and 'population size'"):
    end_if;
    if domtype(fnn) = DOM_COMPLEX then
     error("the 'sample size' must be real"):
    end_if;

    fx := float(x);

    // ------------ checking x --------------------------------------------------
    // If x is neither symbolic nor integer nor float
    if domtype(fx) = DOM_COMPLEX     then 
     error("the argument must be symbolic real"); 
    end_if;

    // x as being  a probability should be between 0 and 1

    if domtype(fx) = DOM_FLOAT and (fx < 0 or fx > 1) then
     error("expecting a symbolic argument or a numeric argument between 0 and 1");
    end_if;

    // if x is symbolic with an assumption x < 0 or x > 1:
    if domtype(fx) <> DOM_FLOAT then
     if (is(x>1)) = TRUE then
          error("expecting an argument between 0 and 1,".
          " the assumption on the argument, however, is ".
          expr2text(x)." > 1");
     elif (is(x<0)) = TRUE then
          error("expecting an argument between 0 and 1,".
          " the assumption on the argument, however, is ".
          expr2text(x)." < 0");
     end_if;
    end_if;
    // now we can assume that N, X and n are of type integer or symbolic,
    // x is between 0 and 1 integer, float or symbolic

    //------------ the special cases ----------------- 
    //           if x = 0, then Q(x) = 0 since P(0) >= 0 always
    // now assume x symbolic or > 0
    //           if x = 1, then Q(x) = min(X, n) 
    //                         (if Q(x) = k < X and < n, 
    //                         then P(k) < 1, i.e. 
    //                                  k would be to small)
    // now assume x symbolic or 0 < x < 1
    //          if n = 0, then Q(x) = 0 since P(0) = 1 >= x
    //                          (for all x <= 1)
    //          if n = 1, then Q(x) = 0 for all x <= (N-X)/N since 
    //                         P(0) = (N-X)/N >= x
    //                         Q(x) = 1 for all x > (N-X)/N since
    //                                  P(1) = 1 >= x and P(0) < x
    // 
    // now assume x symbolic or 0 < x < 1 and h symbolic or n >= 2
    //           if X = 0, then Q(x) = 0 since P(0) = 1 >= x 
    //                         (for all x <= 1)
    //         if X = 1, then 
    //             for 0 <= x <= (N-n)/N:
    //             Q(x) = 0, as P(0) = (N-n)/N >= x,
    //                 for (N-n)/N < x <= 1:
    //                   Q(x) = 1, as P(1) = 1 >= x and P(0) < x 
    // now assume x symbolic or 0 < x < 1 and n symbolic or n >= 2 and
    // X symbolic or >= 2
    //         if n = N, then (for X >= 1):
    //                     P(k) = 0 for all k < X, P(X) = 1,
    //                     i.e. Q(x) = X for all x > 0
    //         if n = N - 1, then 
    //                 for x = X/N
    //                     Q(x) = X-1 as P(k) < X/N for k < X-1 and
    //                                   P(X-1) = X/N
    //               for X/N < x <= 1
    //               Q(x) = X as P(X-1) = X/N, P(X) = 1         
    //         if X = N, then (for n >= 1):
    //                     P(k) = 0 for all k < n, P(n) = 1,
    //                     i.e. Qx) = n for all x > 0
    //         if X = N - 1, then 
    //                 for x = n/N
    //                     Q(x) = n-1 as P(k) < n/N for k < n-1 and
    //                                   P(n-1) = n/N
    //               for n/N < x <= 1
    //               Q(x) = n as P(n-1) = n/N, P(n) = 1         
    // -------------------------- gosh, that's lot's of cases  ----------------


    // --------------------------- let's do'em step by step --------------------------------

    //         if x = 0, then Q(x) = 0 since P(0) >= 0 always
    if iszero(x) then return(x); end_if;        

    // now assume x symbolic or x > 0
    //         if x = 1, then Q(x) = min(X, n) 
    //          no matter whether X and n are numerical or symbolic
    if iszero(x-1) then
        if domtype(x) = DOM_FLOAT then return(float(min(XX,nn))) end_if;
        return(min(XX,nn)); 
    end_if;
        
    // now assume x symbolic or 0 < x < 1
    //          if n = 0, then Q(x) = 0 since P(0) = 1 >= x
    if iszero(nn) then
        if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
        return(0); 
    end_if;


    // now assume x symbolic or 0 < x < 1 and n symbolic or n >=2
    //         if X = 0, then Q(x) = 0 since P(0) = 1 >= x 
    //                    (for all x <= 1)
    if iszero(XX) then
        if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
        return(0); 
    end_if;

    //         if X = 1, then 
    //             for 0 <= x <= (N-n)/N: Q(x) = 0
    //                 for (N-n)/N < x <= 1 : Q(x) = 1
    if iszero(XX-1) then
        // symbolic case without assume:
        if iszero(x - (NN - nn)/NN) then
            if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
            return(0); 
        elif domtype(NN) = DOM_INT and domtype(nn) = DOM_INT and 
             domtype(fx) = DOM_FLOAT then
            if ((NN-nn)/NN < fx) then
                if domtype(x) = DOM_FLOAT then return(float(1)) end_if;
                return(1); 
            else
                if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
                return(0); 
            end_if;
        else // at least one of N, n and x is symbolic, try to compare
            if is(((NN-nn)/NN - x) < 0 ) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(1)) end_if;
                return(1); 
            end_if;
            if is(((NN-nn)/NN - x) >= 0 ) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
                return(0); 
            end_if;
        end_if;
    end_if;

    // now assume x symbolic or 0 < x < 1 and n symbolic or n >= 2 and
    // X symbolic or >= 2

    //         if n = 1, then 
    //             for 0 <= x <= (N-X)/N: Q(x) = 0
    //                 for (N-X)/N < x <= 1 : Q(x) = 1
    if iszero(nn-1) then
        // symbolic case without assume:
        if iszero(x - (NN - XX)/NN) then
            if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
            return(0); 
        elif domtype(NN) = DOM_INT and domtype(XX) = DOM_INT and 
             domtype(fx) = DOM_FLOAT then
            if ((NN-XX)/NN < fx) then
                if domtype(x) = DOM_FLOAT then return(float(1)) end_if;
                return(1); 
            else
                if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
                return(0); 
            end_if;
        else // at least one of N, X and x is symbolic, try to compare
            if is(((NN-XX)/NN - x) < 0 ) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(1)) end_if;
                return(1); 
            end_if;
            if is(((NN-XX)/NN - x) >= 0 ) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(0)) end_if;
                return(0); 
            end_if;
        end_if;
    end_if;

    //         if n = N, then (X is now symbolic or >= 1) P(x) = n for all x > 0
    //         if x is not symbolic it is already > 0)
     if iszero(NN-nn) then
        if domtype(fx) = DOM_FLOAT then
            if domtype(x) = DOM_FLOAT then return(float(XX)) end_if;
            return(XX);
        else // x is symbolic, maybe carrying an assumption
            if is(x>0) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(XX)) end_if;
                return(XX);
            end_if;
        end_if;
    end_if; 
     
    //         if n = N - 1, then for x = X/N     : Q(x) = X -1,
    //                          for X/N < x <= 1: Q(x) = X        
    if iszero(NN - 1 - nn) then
        if domtype(NN) = DOM_INT and domtype(XX) = DOM_INT and
           domtype(fx) = DOM_FLOAT then
            if iszero(x - XX/NN) then
                if domtype(x) = DOM_FLOAT then return(float(XX-1)) end_if;
                return(XX-1);
            elif (fx - XX/NN) > 0 then
                if domtype(x) = DOM_FLOAT then return(float(XX)) end_if;
                return(XX);
            end_if;

        else // at least on of N, X and x is symbolic, try to compare
            if (is((x - XX/NN) = 0) = TRUE) then
                if domtype(x) = DOM_FLOAT then return(float(XX-1)) end_if;
                return(XX-1);
 
            elif (is((x - XX/NN) > 0)) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(XX)) end_if;
                return(XX);
            end_if;
        end_if;
    end_if;



    //         if X = N, then (n is now symbolic or >= 1) P(x) = n for all x > 0
    //         if x is not symbolic it is already > 0)
    if iszero(NN-XX) then
        if domtype(fx) = DOM_FLOAT then
            if domtype(x) = DOM_FLOAT then return(float(nn)) end_if;
            return(nn);
        else // x is symbolic, maybe carrying an assumption
            if is(x>0) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(nn)) end_if;
                return(nn);
            end_if;
        end_if;
    end_if; 
     
    //         if X = N - 1, then for x = n/N     : Q(x) = n-1,
    //                          for n/N < x <= 1: Q(x) = n        
    if iszero(NN - 1 - XX) then
        if domtype(NN) = DOM_INT and domtype(nn) = DOM_INT and
           domtype(fx) = DOM_FLOAT then
            if iszero(x - nn/NN) then
                if domtype(x) = DOM_FLOAT then return(float(nn-1)) end_if;
                return(nn-1);
            elif (fx - nn/NN) > 0 then
                if domtype(x) = DOM_FLOAT then return(float(nn)) end_if;
                return(nn);
            end_if;

        else // at least on of N, n and x is symbolic, try to compare
            if (is((x - nn/NN) = 0) = TRUE) then
                if domtype(x) = DOM_FLOAT then return(float(nn-1)) end_if;
                return(nn-1);
 
            elif (is((x - nn/NN) > 0)) = TRUE then
                if domtype(x) = DOM_FLOAT then return(float(nn)) end_if;
                return(nn);
            end_if;
        end_if;
    end_if;
    // ------------- now we're thru with those special cases -------------------------------

    // if any is still symbolic we can give no further help
    if domtype(NN) <> DOM_INT or domtype(XX) <> DOM_INT or domtype(nn) <> DOM_INT or
       domtype(fx) <> DOM_FLOAT then
           return(hold(stats::hypergeometricQuantile)(NN, XX, nn)(x));
    end_if;

    // ----------------------------------------
    // now everything is completly numeric
    // N,X and n are positive integers, 0 < x < 1
    // In most applications     N is fairly big e.g. 300.000
    //                 X is still pretty big, like 10.000
    //                 n is comparatively small say 100
    // then the cumulative probability for at least n/2 success is almost 1
    // if X = N/2 and n is still comparatively small, then
    // then the cumulative probability for at least n/2 success around close to .5
    // if X > N/2 we replace X with N-X taking into account that
    // P(N,X,n)(k) = 1 - P(N,N-X,n)(n-k-1).
    // ----------------------------------------

    // is trunc(x) closer to max(0, n + X - N) or to min(n, X)? Depending on that
    // two different summations further down


    // if x is float convert everything to float for speeding up

    if domtype(x) = DOM_FLOAT then
    tmp := float(1);
    else
    tmp := 1;
    end_if;

    if XX <= NN/2 then // start adding from below

      a := max(0, nn + XX - NN);
      b := min(nn, XX); 
      if domtype(x) <> DOM_INT and domtype(x) <> DOM_RAT then 
        // it's an expression or float
        c := fx * binomial(NN,nn);
      else
        c := x * binomial(NN,nn);
      end_if;
      tmp := tmp * binomial(XX, a) * binomial(NN - XX, nn - a) ;   // = P(t <= a)
      tmpsum := tmp;    
      if tmpsum >= c then // first hit of the quantile
        if domtype(x) = DOM_FLOAT then return(float(a)); end_if;
        return(a);
      end_if;
      for k from a+1 to b do
        // binomial(X, k) = binomial(X, k - 1) * (X - k + 1) / k
        // binomial(N - X, n - k) = binomial(N - X, n - (k - 1)) * (n - k + 1) / (N - X - n  + k)
        tmp := tmp * (XX - k + 1) / k * (nn - k + 1) / (NN - XX - nn + k);
        tmpsum := tmpsum + tmp;
        if tmpsum >= c then // first hit of the quantile
      if domtype(x) = DOM_FLOAT then return(float(k)); end_if;
      return(k);
        end_if;
      end_for;
  
      // we should never reach this part of code ...
      if domtype(x) = DOM_FLOAT then return(float(b)); end_if;
      return(b);            
   else
      XX := NN - XX;
      x  := 1 - x;
      fx := float(x);

      a := max(0, nn + XX - NN);
      b := min(nn, XX); 
      if domtype(x) <> DOM_INT and domtype(x) <> DOM_RAT then 
     // it's an expression or float
         c := fx * binomial(NN,nn);
      else
         c := x * binomial(NN,nn);
      end_if;

      tmp := tmp * binomial(XX, a) * binomial(NN - XX, nn - a) ;   // = P(t <= a)
      tmpsum := tmp;    
      if tmpsum > c then // first hit of the quantile
         if domtype(x) = DOM_FLOAT then return(float(nn-a)); end_if;
         return(nn-a);
      end_if;
      for k from a+1 to b do
         // binomial(X, k) = binomial(X, k - 1) * (X - k + 1) / k
         // binomial(N - X, n - k) = binomial(N - X, n - (k - 1)) * (n - k + 1) / (N - X - n  + k)
         tmp := tmp * (XX - k + 1) / k * (nn - k + 1) / (NN - XX - nn + k);
         tmpsum := tmpsum + tmp;
         if tmpsum > c then// first hit of the quantile
           if domtype(x) = DOM_FLOAT then return(float(nn-k)); end_if;
           return(nn-k);
         end_if;
      end_for;
      // we should never reach this part of code ...  
      if domtype(x) = DOM_FLOAT then return(float(nn-b)); end_if;
         return(nn-b);
      end_if;
   end_proc:
end_proc:
